Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +8 -1
- flowfile/api.py +1 -3
- flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
- flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
- flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
- flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
- flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
- flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
- flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
- flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
- flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
- flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
- flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
- flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
- flowfile/web/static/assets/Output-283fe388.css +37 -0
- flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
- flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
- flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
- flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
- flowfile/web/static/assets/Read-64a3f259.js +218 -0
- flowfile/web/static/assets/Read-e808b239.css +62 -0
- flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
- flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
- flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
- flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
- flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
- flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
- flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
- flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
- flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
- flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
- flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
- flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
- flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
- flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/Union-bfe9b996.js +77 -0
- flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
- flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
- flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
- flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
- flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
- flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
- flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
- flowfile/web/static/assets/readExcel-806d2826.css +64 -0
- flowfile/web/static/assets/readParquet-48c81530.css +19 -0
- flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
- flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
- flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
- flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/__init__.py +30 -0
- flowfile_core/configs/node_store/nodes.py +383 -99
- flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
- flowfile_core/configs/settings.py +2 -1
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +240 -54
- flowfile_core/flowfile/flow_node/flow_node.py +48 -13
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +24 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +47 -0
- flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
- flowfile_core/flowfile/node_designer/custom_node.py +371 -0
- flowfile_core/flowfile/node_designer/ui_components.py +277 -0
- flowfile_core/flowfile/schema_callbacks.py +17 -10
- flowfile_core/flowfile/setting_generator/settings.py +15 -10
- flowfile_core/main.py +5 -1
- flowfile_core/routes/routes.py +73 -30
- flowfile_core/routes/user_defined_components.py +55 -0
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +228 -65
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +153 -35
- flowfile_core/schemas/transform_schema.py +1083 -412
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/types.py +156 -0
- flowfile_core/utils/validate_setup.py +3 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +31 -24
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +9 -35
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/main.py +5 -2
- flowfile_worker/routes.py +52 -13
- shared/__init__.py +15 -0
- shared/storage_config.py +258 -0
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c3b1929c.js +0 -701
- flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
- flowfile/web/static/assets/Union-89fd73dc.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_core/flowfile/manage/open_flowfile.py +0 -135
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from typing import TypedDict, List
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# === Transform Schema YAML Types ===
|
|
5
|
+
|
|
6
|
+
class SelectInputYaml(TypedDict, total=False):
|
|
7
|
+
old_name: str
|
|
8
|
+
new_name: str
|
|
9
|
+
keep: bool
|
|
10
|
+
data_type: str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class JoinInputsYaml(TypedDict):
|
|
14
|
+
select: List[SelectInputYaml]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class JoinMapYaml(TypedDict):
|
|
18
|
+
left_col: str
|
|
19
|
+
right_col: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class JoinInputYaml(TypedDict):
|
|
23
|
+
join_mapping: List[JoinMapYaml]
|
|
24
|
+
left_select: JoinInputsYaml
|
|
25
|
+
right_select: JoinInputsYaml
|
|
26
|
+
how: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CrossJoinInputYaml(TypedDict):
|
|
30
|
+
left_select: JoinInputsYaml
|
|
31
|
+
right_select: JoinInputsYaml
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class FuzzyMappingYaml(TypedDict, total=False):
|
|
35
|
+
left_col: str
|
|
36
|
+
right_col: str
|
|
37
|
+
threshold_score: float
|
|
38
|
+
fuzzy_type: str
|
|
39
|
+
perc_unique: float
|
|
40
|
+
output_column_name: str
|
|
41
|
+
valid: bool
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class FuzzyMatchInputYaml(TypedDict):
|
|
45
|
+
join_mapping: List[FuzzyMappingYaml]
|
|
46
|
+
left_select: JoinInputsYaml
|
|
47
|
+
right_select: JoinInputsYaml
|
|
48
|
+
how: str
|
|
49
|
+
aggregate_output: bool
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# === Input Schema YAML Types ===
|
|
53
|
+
|
|
54
|
+
class OutputSettingsYaml(TypedDict, total=False):
|
|
55
|
+
name: str
|
|
56
|
+
directory: str
|
|
57
|
+
file_type: str
|
|
58
|
+
write_mode: str
|
|
59
|
+
abs_file_path: str
|
|
60
|
+
fields: List[str]
|
|
61
|
+
table_settings: dict
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class NodeSelectYaml(TypedDict):
|
|
65
|
+
cache_results: bool
|
|
66
|
+
keep_missing: bool
|
|
67
|
+
select_input: List[SelectInputYaml]
|
|
68
|
+
sorted_by: str
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class NodeJoinYaml(TypedDict):
|
|
72
|
+
cache_results: bool
|
|
73
|
+
auto_generate_selection: bool
|
|
74
|
+
verify_integrity: bool
|
|
75
|
+
join_input: JoinInputYaml
|
|
76
|
+
auto_keep_all: bool
|
|
77
|
+
auto_keep_right: bool
|
|
78
|
+
auto_keep_left: bool
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class NodeCrossJoinYaml(TypedDict):
|
|
82
|
+
cache_results: bool
|
|
83
|
+
auto_generate_selection: bool
|
|
84
|
+
verify_integrity: bool
|
|
85
|
+
cross_join_input: CrossJoinInputYaml
|
|
86
|
+
auto_keep_all: bool
|
|
87
|
+
auto_keep_right: bool
|
|
88
|
+
auto_keep_left: bool
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class NodeFuzzyMatchYaml(TypedDict):
|
|
92
|
+
cache_results: bool
|
|
93
|
+
auto_generate_selection: bool
|
|
94
|
+
verify_integrity: bool
|
|
95
|
+
join_input: FuzzyMatchInputYaml
|
|
96
|
+
auto_keep_all: bool
|
|
97
|
+
auto_keep_right: bool
|
|
98
|
+
auto_keep_left: bool
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class NodeOutputYaml(TypedDict):
|
|
102
|
+
cache_results: bool
|
|
103
|
+
output_settings: OutputSettingsYaml
|
flowfile_core/types.py
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# types.py - Public API for type specifications
|
|
2
|
+
"""
|
|
3
|
+
Public type system for column selection and data type specification.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
from flowfile_core.types import Types
|
|
7
|
+
|
|
8
|
+
# Use type groups
|
|
9
|
+
ColumnSelector(data_types=Types.Numeric)
|
|
10
|
+
ColumnSelector(data_types=Types.String)
|
|
11
|
+
|
|
12
|
+
# Use specific types
|
|
13
|
+
ColumnSelector(data_types=Types.Int64)
|
|
14
|
+
ColumnSelector(data_types=Types.Float)
|
|
15
|
+
|
|
16
|
+
# Mix and match
|
|
17
|
+
ColumnSelector(data_types=[Types.Numeric, Types.String])
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from typing import List, Literal, Union
|
|
22
|
+
import polars as pl
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
DataTypeStr = Literal[
|
|
26
|
+
"Int8", "Int16", "Int32", "Int64",
|
|
27
|
+
"UInt8", "UInt16", "UInt32", "UInt64",
|
|
28
|
+
"Float32", "Float64", "Decimal",
|
|
29
|
+
"String",
|
|
30
|
+
"Date", "Datetime", "Time", "Duration",
|
|
31
|
+
"Boolean", "Binary", "List", "Struct", "Array", "Integer", "Double", "Utf8"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TypeGroup(str, Enum):
|
|
36
|
+
"""High-level type groups for column selection."""
|
|
37
|
+
Numeric = "Numeric"
|
|
38
|
+
String = "String"
|
|
39
|
+
Date = "Date"
|
|
40
|
+
Boolean = "Boolean"
|
|
41
|
+
Binary = "Binary"
|
|
42
|
+
Complex = "Complex"
|
|
43
|
+
All = "ALL"
|
|
44
|
+
|
|
45
|
+
def __str__(self) -> str:
|
|
46
|
+
return self.value
|
|
47
|
+
|
|
48
|
+
def __repr__(self) -> str:
|
|
49
|
+
return f"Types.{self.name}"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DataType(str, Enum):
|
|
53
|
+
"""Specific data types for fine-grained control."""
|
|
54
|
+
# Numeric types
|
|
55
|
+
Int8 = "Int8"
|
|
56
|
+
Int16 = "Int16"
|
|
57
|
+
Int32 = "Int32"
|
|
58
|
+
Int64 = "Int64"
|
|
59
|
+
UInt8 = "UInt8"
|
|
60
|
+
UInt16 = "UInt16"
|
|
61
|
+
UInt32 = "UInt32"
|
|
62
|
+
UInt64 = "UInt64"
|
|
63
|
+
Float32 = "Float32"
|
|
64
|
+
Float64 = "Float64"
|
|
65
|
+
Decimal = "Decimal"
|
|
66
|
+
|
|
67
|
+
# String types
|
|
68
|
+
String = "String"
|
|
69
|
+
Categorical = "Categorical"
|
|
70
|
+
|
|
71
|
+
# Date types
|
|
72
|
+
Date = "Date"
|
|
73
|
+
Datetime = "Datetime"
|
|
74
|
+
Time = "Time"
|
|
75
|
+
Duration = "Duration"
|
|
76
|
+
|
|
77
|
+
# Other types
|
|
78
|
+
Boolean = "Boolean"
|
|
79
|
+
Binary = "Binary"
|
|
80
|
+
List = "List"
|
|
81
|
+
Struct = "Struct"
|
|
82
|
+
Array = "Array"
|
|
83
|
+
|
|
84
|
+
def __str__(self) -> str:
|
|
85
|
+
return self.value
|
|
86
|
+
|
|
87
|
+
def __repr__(self) -> str:
|
|
88
|
+
return f"Types.{self.name}"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class Types:
|
|
92
|
+
"""
|
|
93
|
+
Main entry point for type specifications.
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
Types.Numeric # All numeric columns
|
|
97
|
+
Types.String # All string columns
|
|
98
|
+
Types.Int64 # 64-bit integers only
|
|
99
|
+
Types.Float # Alias for Float64
|
|
100
|
+
Types.All # All column types
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
# Type groups (most common use case)
|
|
104
|
+
Numeric = TypeGroup.Numeric
|
|
105
|
+
String = TypeGroup.String
|
|
106
|
+
AnyDate = TypeGroup.Date
|
|
107
|
+
Boolean = TypeGroup.Boolean
|
|
108
|
+
Binary = TypeGroup.Binary
|
|
109
|
+
Complex = TypeGroup.Complex
|
|
110
|
+
All = TypeGroup.All
|
|
111
|
+
|
|
112
|
+
# Specific numeric types
|
|
113
|
+
Int = DataType.Int64 # Default integer
|
|
114
|
+
Int8 = DataType.Int8
|
|
115
|
+
Int16 = DataType.Int16
|
|
116
|
+
Int32 = DataType.Int32
|
|
117
|
+
Int64 = DataType.Int64
|
|
118
|
+
UInt8 = DataType.UInt8
|
|
119
|
+
UInt16 = DataType.UInt16
|
|
120
|
+
UInt32 = DataType.UInt32
|
|
121
|
+
UInt64 = DataType.UInt64
|
|
122
|
+
|
|
123
|
+
Float = DataType.Float64 # Default float
|
|
124
|
+
Float32 = DataType.Float32
|
|
125
|
+
Float64 = DataType.Float64
|
|
126
|
+
Decimal = DataType.Decimal
|
|
127
|
+
|
|
128
|
+
# String types
|
|
129
|
+
Str = DataType.String
|
|
130
|
+
Text = DataType.String # Alias
|
|
131
|
+
Categorical = DataType.Categorical
|
|
132
|
+
Cat = DataType.Categorical # Short alias
|
|
133
|
+
|
|
134
|
+
# Date/time types
|
|
135
|
+
Date = DataType.Date
|
|
136
|
+
Datetime = DataType.Datetime
|
|
137
|
+
Time = DataType.Time
|
|
138
|
+
Duration = DataType.Duration
|
|
139
|
+
|
|
140
|
+
# Other types
|
|
141
|
+
Bool = DataType.Boolean
|
|
142
|
+
Bytes = DataType.Binary
|
|
143
|
+
List = DataType.List
|
|
144
|
+
Struct = DataType.Struct
|
|
145
|
+
Array = DataType.Array
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# Type alias for better type hints
|
|
149
|
+
TypeSpec = Union[
|
|
150
|
+
TypeGroup,
|
|
151
|
+
DataType,
|
|
152
|
+
str,
|
|
153
|
+
List[Union[TypeGroup, DataType, str, type[pl.DataType], pl.DataType]],
|
|
154
|
+
type[pl.DataType],
|
|
155
|
+
pl.DataType
|
|
156
|
+
]
|
|
@@ -3,7 +3,7 @@ as have a component in flowfile_frontend"""
|
|
|
3
3
|
|
|
4
4
|
from flowfile_core.schemas import input_schema
|
|
5
5
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
6
|
-
from flowfile_core.configs.node_store
|
|
6
|
+
from flowfile_core.configs.node_store import nodes_list, NodeTemplate
|
|
7
7
|
import inspect
|
|
8
8
|
|
|
9
9
|
|
|
@@ -31,6 +31,8 @@ def validate_setup():
|
|
|
31
31
|
Raises ValueError if any node is missing either.
|
|
32
32
|
"""
|
|
33
33
|
for node in nodes_list:
|
|
34
|
+
if node.custom_node:
|
|
35
|
+
continue
|
|
34
36
|
check_if_node_has_add_function_in_flow_graph(node)
|
|
35
37
|
check_if_node_has_input_schema_definition(node)
|
|
36
38
|
|
flowfile_frame/__init__.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# flowframe/__init__.py
|
|
2
2
|
"""A Polars-like API for building ETL graphs."""
|
|
3
3
|
|
|
4
|
+
from importlib.metadata import version
|
|
5
|
+
|
|
4
6
|
# Core classes
|
|
5
7
|
from flowfile_frame.flow_frame import FlowFrame # noqa: F401
|
|
6
8
|
from pl_fuzzy_frame_match.models import FuzzyMapping # noqa: F401
|
|
@@ -64,4 +66,4 @@ from polars.datatypes import ( # noqa: F401
|
|
|
64
66
|
DataType, DataTypeClass, Field
|
|
65
67
|
)
|
|
66
68
|
|
|
67
|
-
__version__ = "
|
|
69
|
+
__version__ = version("Flowfile")
|
flowfile_frame/flow_frame.py
CHANGED
|
@@ -10,7 +10,7 @@ from flowfile_frame.lazy_methods import add_lazyframe_methods
|
|
|
10
10
|
from polars._typing import (CsvEncoding, FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
|
|
11
11
|
from collections.abc import Iterator
|
|
12
12
|
|
|
13
|
-
from pl_fuzzy_frame_match import FuzzyMapping
|
|
13
|
+
from pl_fuzzy_frame_match import FuzzyMapping
|
|
14
14
|
|
|
15
15
|
from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
|
|
16
16
|
from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
|
|
@@ -27,6 +27,8 @@ from flowfile_frame.join import _normalize_columns_to_list, _create_join_mapping
|
|
|
27
27
|
from flowfile_frame.utils import _check_if_convertible_to_code
|
|
28
28
|
from flowfile_frame.config import logger
|
|
29
29
|
from flowfile_frame.cloud_storage.frame_helpers import add_write_ff_to_cloud_storage
|
|
30
|
+
from collections.abc import Mapping
|
|
31
|
+
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
def can_be_expr(param: inspect.Parameter) -> bool:
|
|
@@ -624,7 +626,6 @@ class FlowFrame:
|
|
|
624
626
|
left_columns, right_columns = self._parse_join_columns(
|
|
625
627
|
on, left_on, right_on, how
|
|
626
628
|
)
|
|
627
|
-
|
|
628
629
|
# Step 5: Validate column lists have same length (except for cross join)
|
|
629
630
|
if how != 'cross' and left_columns is not None and right_columns is not None:
|
|
630
631
|
if len(left_columns) != len(right_columns):
|
|
@@ -796,33 +797,36 @@ class FlowFrame:
|
|
|
796
797
|
) -> "FlowFrame":
|
|
797
798
|
"""Execute join using native FlowFile join nodes."""
|
|
798
799
|
# Create select inputs for both frames
|
|
800
|
+
|
|
799
801
|
left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
|
|
800
802
|
right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
|
|
801
803
|
# Create appropriate join input based on join type
|
|
802
804
|
if how == 'cross':
|
|
803
805
|
join_input = transform_schema.CrossJoinInput(
|
|
804
|
-
left_select=left_select.renames,
|
|
806
|
+
left_select=transform_schema.JoinInputs(renames=left_select.renames),
|
|
805
807
|
right_select=right_select.renames,
|
|
806
808
|
)
|
|
809
|
+
join_input_manager = transform_schema.CrossJoinInputManager(join_input)
|
|
810
|
+
|
|
807
811
|
else:
|
|
808
812
|
join_input = transform_schema.JoinInput(
|
|
809
813
|
join_mapping=join_mappings,
|
|
810
|
-
left_select=left_select.renames,
|
|
814
|
+
left_select=transform_schema.JoinInputs(renames=left_select.renames),
|
|
811
815
|
right_select=right_select.renames,
|
|
812
816
|
how=how,
|
|
813
817
|
)
|
|
818
|
+
join_input_manager = transform_schema.JoinInputManager(join_input)
|
|
814
819
|
|
|
815
820
|
# Configure join input
|
|
816
|
-
|
|
817
|
-
for right_column in right_select.renames:
|
|
821
|
+
for right_column in join_input_manager.right_select.renames:
|
|
818
822
|
if right_column.join_key:
|
|
819
823
|
right_column.keep = False
|
|
820
824
|
|
|
821
825
|
# Create and add appropriate node
|
|
822
826
|
if how == 'cross':
|
|
823
|
-
self._add_cross_join_node(new_node_id,
|
|
827
|
+
self._add_cross_join_node(new_node_id, join_input_manager.to_cross_join_input(), description, other)
|
|
824
828
|
else:
|
|
825
|
-
self._add_regular_join_node(new_node_id,
|
|
829
|
+
self._add_regular_join_node(new_node_id, join_input_manager.to_join_input(), description, other)
|
|
826
830
|
|
|
827
831
|
# Add connections
|
|
828
832
|
self._add_connection(self.node_id, new_node_id, "main")
|
|
@@ -890,13 +894,18 @@ class FlowFrame:
|
|
|
890
894
|
self.flow_graph.add_record_count(node_number_of_records)
|
|
891
895
|
return self._create_child_frame(new_node_id)
|
|
892
896
|
|
|
893
|
-
def
|
|
897
|
+
def rename(self, mapping: Mapping[str, str], *, strict: bool = True,
|
|
898
|
+
description: str = None) -> "FlowFrame":
|
|
899
|
+
"""Rename columns based on a mapping or function."""
|
|
900
|
+
return self.select([col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
|
|
901
|
+
description=description, _keep_missing=True)
|
|
902
|
+
|
|
903
|
+
def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None, _keep_missing: bool = False) -> "FlowFrame":
|
|
894
904
|
"""
|
|
895
905
|
Select columns from the frame.
|
|
896
906
|
"""
|
|
897
907
|
columns_iterable = list(_parse_inputs_as_iterable(columns))
|
|
898
908
|
new_node_id = generate_node_id()
|
|
899
|
-
|
|
900
909
|
if (len(columns_iterable) == 1 and isinstance(columns_iterable[0], Expr)
|
|
901
910
|
and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"):
|
|
902
911
|
return self._add_number_of_records(new_node_id, description)
|
|
@@ -914,7 +923,6 @@ class FlowFrame:
|
|
|
914
923
|
for expr_input in effective_columns_iterable:
|
|
915
924
|
current_expr_obj = expr_input
|
|
916
925
|
is_simple_col_for_native = False
|
|
917
|
-
|
|
918
926
|
if isinstance(expr_input, str):
|
|
919
927
|
current_expr_obj = col(expr_input)
|
|
920
928
|
selected_col_names_for_native.append(transform_schema.SelectInput(old_name=expr_input))
|
|
@@ -942,14 +950,18 @@ class FlowFrame:
|
|
|
942
950
|
if can_use_native_node:
|
|
943
951
|
existing_cols = self.columns
|
|
944
952
|
selected_col_names = {select_col.old_name for select_col in selected_col_names_for_native}
|
|
945
|
-
|
|
953
|
+
not_selected_columns = [transform_schema.SelectInput(c, keep=_keep_missing) for c in existing_cols if
|
|
946
954
|
c not in selected_col_names]
|
|
947
|
-
selected_col_names_for_native.extend(
|
|
955
|
+
selected_col_names_for_native.extend(not_selected_columns)
|
|
956
|
+
if _keep_missing:
|
|
957
|
+
lookup_selection = {_col.old_name: _col for _col in selected_col_names_for_native}
|
|
958
|
+
selected_col_names_for_native = [lookup_selection.get(_col) for
|
|
959
|
+
_col in existing_cols if _col in lookup_selection]
|
|
948
960
|
select_settings = input_schema.NodeSelect(
|
|
949
961
|
flow_id=self.flow_graph.flow_id,
|
|
950
962
|
node_id=new_node_id,
|
|
951
963
|
select_input=selected_col_names_for_native,
|
|
952
|
-
keep_missing=
|
|
964
|
+
keep_missing=_keep_missing,
|
|
953
965
|
pos_x=200,
|
|
954
966
|
pos_y=100,
|
|
955
967
|
is_setup=True,
|
|
@@ -1130,16 +1142,11 @@ class FlowFrame:
|
|
|
1130
1142
|
file_name = file_str.split(os.sep)[-1]
|
|
1131
1143
|
use_polars_code = bool(kwargs.items()) or not is_path_input
|
|
1132
1144
|
|
|
1133
|
-
output_parquet_table = input_schema.OutputParquetTable(
|
|
1134
|
-
file_type="parquet"
|
|
1135
|
-
)
|
|
1136
1145
|
output_settings = input_schema.OutputSettings(
|
|
1137
1146
|
file_type='parquet',
|
|
1138
1147
|
name=file_name,
|
|
1139
1148
|
directory=file_str if is_path_input else str(file_str),
|
|
1140
|
-
|
|
1141
|
-
output_csv_table=input_schema.OutputCsvTable(),
|
|
1142
|
-
output_excel_table=input_schema.OutputExcelTable()
|
|
1149
|
+
table_settings=input_schema.OutputParquetTable()
|
|
1143
1150
|
)
|
|
1144
1151
|
|
|
1145
1152
|
if is_path_input:
|
|
@@ -1210,10 +1217,10 @@ class FlowFrame:
|
|
|
1210
1217
|
file_type='csv',
|
|
1211
1218
|
name=file_name,
|
|
1212
1219
|
directory=file_str if is_path_input else str(file_str),
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1220
|
+
table_settings=input_schema.OutputCsvTable(
|
|
1221
|
+
delimiter=separator,
|
|
1222
|
+
encoding=encoding
|
|
1223
|
+
)
|
|
1217
1224
|
)
|
|
1218
1225
|
if is_path_input:
|
|
1219
1226
|
try:
|
|
@@ -186,15 +186,17 @@ def read_csv(
|
|
|
186
186
|
file_type='csv',
|
|
187
187
|
path=current_source_path_for_native,
|
|
188
188
|
name=Path(current_source_path_for_native).name,
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
189
|
+
table_settings=input_schema.InputCsvTable(
|
|
190
|
+
delimiter=separator,
|
|
191
|
+
has_headers=has_header,
|
|
192
|
+
encoding=encoding,
|
|
193
|
+
starting_from_line=skip_rows,
|
|
194
|
+
quote_char=quote_char if quote_char is not None else '"',
|
|
195
|
+
infer_schema_length=actual_infer_schema_length if actual_infer_schema_length is not None else 10000,
|
|
196
|
+
truncate_ragged_lines=truncate_ragged_lines,
|
|
197
|
+
ignore_errors=ignore_errors,
|
|
198
|
+
row_delimiter=eol_char
|
|
199
|
+
)
|
|
198
200
|
)
|
|
199
201
|
if convert_to_absolute_path:
|
|
200
202
|
try:
|
|
@@ -407,6 +409,7 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
407
409
|
file_type='parquet',
|
|
408
410
|
path=source,
|
|
409
411
|
name=Path(source).name,
|
|
412
|
+
table_settings=input_schema.InputParquetTable()
|
|
410
413
|
)
|
|
411
414
|
if convert_to_absolute_path:
|
|
412
415
|
received_table.path = received_table.abs_file_path
|
flowfile_worker/__init__.py
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
|
-
import tempfile
|
|
3
2
|
import threading
|
|
4
3
|
import multiprocessing
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
|
|
4
|
+
from shared.storage_config import storage
|
|
5
|
+
from importlib.metadata import version
|
|
6
|
+
|
|
7
|
+
__version__ = version("Flowfile")
|
|
8
8
|
|
|
9
|
+
multiprocessing.set_start_method('spawn', force=True)
|
|
9
10
|
|
|
10
11
|
from multiprocessing import get_context
|
|
11
12
|
from flowfile_worker.models import Status
|
|
13
|
+
|
|
12
14
|
mp_context = get_context("spawn")
|
|
15
|
+
|
|
13
16
|
status_dict: Dict[str, Status] = dict()
|
|
14
17
|
process_dict = dict()
|
|
15
18
|
|
|
@@ -17,39 +20,10 @@ status_dict_lock = threading.Lock()
|
|
|
17
20
|
process_dict_lock = threading.Lock()
|
|
18
21
|
|
|
19
22
|
|
|
20
|
-
class SharedTempDirectory:
|
|
21
|
-
"""A class that mimics tempfile.TemporaryDirectory but uses a fixed directory"""
|
|
22
|
-
def __init__(self, dir_path):
|
|
23
|
-
self._path = dir_path
|
|
24
|
-
os.makedirs(self._path, exist_ok=True)
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def name(self):
|
|
28
|
-
return self._path
|
|
29
|
-
|
|
30
|
-
def cleanup(self):
|
|
31
|
-
"""Remove all contents of the temp directory"""
|
|
32
|
-
try:
|
|
33
|
-
shutil.rmtree(self._path)
|
|
34
|
-
os.makedirs(self._path, exist_ok=True)
|
|
35
|
-
print(f"Cleaned up temporary directory: {self._path}")
|
|
36
|
-
except Exception as e:
|
|
37
|
-
print(f"Error during cleanup: {e}")
|
|
38
|
-
|
|
39
|
-
def __enter__(self):
|
|
40
|
-
return self.name
|
|
41
|
-
|
|
42
|
-
def __exit__(self, exc, value, tb):
|
|
43
|
-
self.cleanup()
|
|
44
|
-
|
|
45
|
-
|
|
46
23
|
CACHE_EXPIRATION_TIME = 24 * 60 * 60
|
|
47
24
|
|
|
48
25
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
CACHE_DIR = SharedTempDirectory(TEMP_DIR)
|
|
52
|
-
else:
|
|
53
|
-
CACHE_DIR = tempfile.TemporaryDirectory()
|
|
26
|
+
CACHE_DIR = storage.cache_directory
|
|
27
|
+
|
|
54
28
|
|
|
55
29
|
PROCESS_MEMORY_USAGE: Dict[str, float] = dict()
|
|
@@ -1,29 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
ReceivedJsonTable)
|
|
1
|
+
|
|
3
2
|
from flowfile_worker.create.funcs import (create_from_path_csv, create_from_path_parquet, create_from_path_excel,
|
|
4
3
|
create_from_path_json)
|
|
5
|
-
from typing import
|
|
4
|
+
from typing import Literal
|
|
6
5
|
|
|
7
|
-
ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
|
|
8
6
|
FileType = Literal['csv', 'parquet', 'json', 'excel']
|
|
9
7
|
|
|
10
|
-
|
|
11
|
-
def received_table_parser(received_table_raw: Dict, file_type: FileType) -> ReceivedTableCollection:
|
|
12
|
-
match file_type:
|
|
13
|
-
case 'csv':
|
|
14
|
-
received_table = ReceivedCsvTable.model_validate(received_table_raw)
|
|
15
|
-
case 'parquet':
|
|
16
|
-
received_table = ReceivedParquetTable.model_validate(received_table_raw)
|
|
17
|
-
case 'excel':
|
|
18
|
-
received_table = ReceivedExcelTable.model_validate(received_table_raw)
|
|
19
|
-
case 'json':
|
|
20
|
-
return ReceivedJsonTable.model_validate(received_table_raw)
|
|
21
|
-
case _:
|
|
22
|
-
raise ValueError(f'Unsupported file type: {file_type}')
|
|
23
|
-
return received_table
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def table_creator_factory_method(file_type: Literal['csv', 'parquet', 'json', 'excel']) -> callable:
|
|
8
|
+
def table_creator_factory_method(file_type: FileType) -> callable:
|
|
27
9
|
match file_type:
|
|
28
10
|
case 'csv':
|
|
29
11
|
return create_from_path_csv
|