Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +8 -1
- flowfile/api.py +1 -3
- flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
- flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
- flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
- flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
- flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
- flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
- flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
- flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
- flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
- flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
- flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
- flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
- flowfile/web/static/assets/Output-283fe388.css +37 -0
- flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
- flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
- flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
- flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
- flowfile/web/static/assets/Read-64a3f259.js +218 -0
- flowfile/web/static/assets/Read-e808b239.css +62 -0
- flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
- flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
- flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
- flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
- flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
- flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
- flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
- flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
- flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
- flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
- flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
- flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
- flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
- flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/Union-bfe9b996.js +77 -0
- flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
- flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
- flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
- flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
- flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
- flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
- flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
- flowfile/web/static/assets/readExcel-806d2826.css +64 -0
- flowfile/web/static/assets/readParquet-48c81530.css +19 -0
- flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
- flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
- flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
- flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/__init__.py +30 -0
- flowfile_core/configs/node_store/nodes.py +383 -99
- flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
- flowfile_core/configs/settings.py +2 -1
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +240 -54
- flowfile_core/flowfile/flow_node/flow_node.py +48 -13
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +24 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +47 -0
- flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
- flowfile_core/flowfile/node_designer/custom_node.py +371 -0
- flowfile_core/flowfile/node_designer/ui_components.py +277 -0
- flowfile_core/flowfile/schema_callbacks.py +17 -10
- flowfile_core/flowfile/setting_generator/settings.py +15 -10
- flowfile_core/main.py +5 -1
- flowfile_core/routes/routes.py +73 -30
- flowfile_core/routes/user_defined_components.py +55 -0
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +228 -65
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +153 -35
- flowfile_core/schemas/transform_schema.py +1083 -412
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/types.py +156 -0
- flowfile_core/utils/validate_setup.py +3 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +31 -24
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +9 -35
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/main.py +5 -2
- flowfile_worker/routes.py +52 -13
- shared/__init__.py +15 -0
- shared/storage_config.py +258 -0
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c3b1929c.js +0 -701
- flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
- flowfile/web/static/assets/Union-89fd73dc.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_core/flowfile/manage/open_flowfile.py +0 -135
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
flowfile_worker/create/funcs.py
CHANGED
|
@@ -1,94 +1,100 @@
|
|
|
1
1
|
import polars as pl
|
|
2
2
|
import os
|
|
3
3
|
|
|
4
|
-
from flowfile_worker.create.models import
|
|
4
|
+
from flowfile_worker.create.models import ReceivedTable, InputCsvTable, InputJsonTable, InputExcelTable, InputParquetTable
|
|
5
5
|
from flowfile_worker.create.utils import create_fake_data
|
|
6
6
|
from flowfile_worker.create.read_excel_tables import df_from_openpyxl, df_from_calamine_xlsx
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def create_from_path_json(received_table:
|
|
9
|
+
def create_from_path_json(received_table: ReceivedTable):
|
|
10
|
+
if not isinstance(received_table.table_settings, InputJsonTable):
|
|
11
|
+
raise ValueError("Received table settings are not of type InputJsonTable")
|
|
12
|
+
input_table_settings: InputJsonTable = received_table.table_settings
|
|
10
13
|
f = received_table.abs_file_path
|
|
11
14
|
gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
|
|
12
15
|
low_mem = gbs_to_load > 10
|
|
13
|
-
if
|
|
16
|
+
if input_table_settings.encoding.upper() == 'UTF8' or input_table_settings.encoding.upper() == 'UTF-8':
|
|
14
17
|
try:
|
|
15
18
|
df = pl.scan_csv(f,
|
|
16
19
|
low_memory=low_mem,
|
|
17
20
|
try_parse_dates=True,
|
|
18
|
-
separator=
|
|
19
|
-
has_header=
|
|
20
|
-
skip_rows=
|
|
21
|
+
separator=input_table_settings.delimiter,
|
|
22
|
+
has_header=input_table_settings.has_headers,
|
|
23
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
21
24
|
encoding='utf8',
|
|
22
|
-
infer_schema_length=
|
|
25
|
+
infer_schema_length=input_table_settings.infer_schema_length)
|
|
23
26
|
df.head(1).collect()
|
|
24
27
|
return df
|
|
25
28
|
except:
|
|
26
29
|
try:
|
|
27
30
|
df = pl.scan_csv(f, low_memory=low_mem,
|
|
28
|
-
separator=
|
|
29
|
-
has_header=
|
|
30
|
-
skip_rows=
|
|
31
|
+
separator=input_table_settings.delimiter,
|
|
32
|
+
has_header=input_table_settings.has_headers,
|
|
33
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
31
34
|
encoding='utf8-lossy',
|
|
32
35
|
ignore_errors=True)
|
|
33
36
|
return df
|
|
34
37
|
except:
|
|
35
38
|
df = pl.scan_csv(f, low_memory=low_mem,
|
|
36
|
-
separator=
|
|
37
|
-
has_header=
|
|
38
|
-
skip_rows=
|
|
39
|
+
separator=input_table_settings.delimiter,
|
|
40
|
+
has_header=input_table_settings.has_headers,
|
|
41
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
39
42
|
encoding='utf8',
|
|
40
43
|
ignore_errors=True)
|
|
41
44
|
return df
|
|
42
45
|
else:
|
|
43
46
|
df = pl.read_csv(f, low_memory=low_mem,
|
|
44
|
-
separator=
|
|
45
|
-
has_header=
|
|
46
|
-
skip_rows=
|
|
47
|
-
encoding=
|
|
47
|
+
separator=input_table_settings.delimiter,
|
|
48
|
+
has_header=input_table_settings.has_headers,
|
|
49
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
50
|
+
encoding=input_table_settings.encoding,
|
|
48
51
|
ignore_errors=True)
|
|
49
52
|
return df
|
|
50
53
|
|
|
51
54
|
|
|
52
|
-
def create_from_path_csv(received_table:
|
|
55
|
+
def create_from_path_csv(received_table: ReceivedTable) -> pl.DataFrame:
|
|
53
56
|
f = received_table.abs_file_path
|
|
57
|
+
if not isinstance(received_table.table_settings, InputCsvTable):
|
|
58
|
+
raise ValueError("Received table settings are not of type InputCsvTable")
|
|
59
|
+
input_table_settings: InputCsvTable = received_table.table_settings
|
|
54
60
|
gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
|
|
55
61
|
low_mem = gbs_to_load > 10
|
|
56
|
-
if
|
|
62
|
+
if input_table_settings.encoding.upper() == 'UTF8' or input_table_settings.encoding.upper() == 'UTF-8':
|
|
57
63
|
try:
|
|
58
64
|
df = pl.scan_csv(f,
|
|
59
65
|
low_memory=low_mem,
|
|
60
66
|
try_parse_dates=True,
|
|
61
|
-
separator=
|
|
62
|
-
has_header=
|
|
63
|
-
skip_rows=
|
|
67
|
+
separator=input_table_settings.delimiter,
|
|
68
|
+
has_header=input_table_settings.has_headers,
|
|
69
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
64
70
|
encoding='utf8',
|
|
65
|
-
infer_schema_length=
|
|
71
|
+
infer_schema_length=input_table_settings.infer_schema_length)
|
|
66
72
|
df.head(1).collect()
|
|
67
73
|
return df
|
|
68
74
|
except:
|
|
69
75
|
try:
|
|
70
76
|
df = pl.scan_csv(f, low_memory=low_mem,
|
|
71
|
-
separator=
|
|
72
|
-
has_header=
|
|
73
|
-
skip_rows=
|
|
77
|
+
separator=input_table_settings.delimiter,
|
|
78
|
+
has_header=input_table_settings.has_headers,
|
|
79
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
74
80
|
encoding='utf8-lossy',
|
|
75
81
|
ignore_errors=True)
|
|
76
82
|
return df
|
|
77
83
|
except:
|
|
78
84
|
df = pl.scan_csv(f, low_memory=low_mem,
|
|
79
|
-
separator=
|
|
80
|
-
has_header=
|
|
81
|
-
skip_rows=
|
|
85
|
+
separator=input_table_settings.delimiter,
|
|
86
|
+
has_header=input_table_settings.has_headers,
|
|
87
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
82
88
|
encoding='utf8',
|
|
83
89
|
ignore_errors=True)
|
|
84
90
|
return df
|
|
85
91
|
else:
|
|
86
92
|
df = pl.read_csv(f,
|
|
87
93
|
low_memory=low_mem,
|
|
88
|
-
separator=
|
|
89
|
-
has_header=
|
|
90
|
-
skip_rows=
|
|
91
|
-
encoding=
|
|
94
|
+
separator=input_table_settings.delimiter,
|
|
95
|
+
has_header=input_table_settings.has_headers,
|
|
96
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
97
|
+
encoding=input_table_settings.encoding,
|
|
92
98
|
ignore_errors=True)
|
|
93
99
|
return df
|
|
94
100
|
|
|
@@ -97,50 +103,56 @@ def create_random(number_of_records: int = 1000) -> pl.LazyFrame:
|
|
|
97
103
|
return create_fake_data(number_of_records).lazy()
|
|
98
104
|
|
|
99
105
|
|
|
100
|
-
def create_from_path_parquet(received_table:
|
|
106
|
+
def create_from_path_parquet(received_table: ReceivedTable):
|
|
107
|
+
if not isinstance(received_table.table_settings, InputParquetTable):
|
|
108
|
+
raise ValueError("Received table settings are not of type InputParquetTable")
|
|
101
109
|
low_mem = (os.path.getsize(received_table.abs_file_path) / 1024 / 1000 / 1000) > 2
|
|
102
110
|
return pl.scan_parquet(source=received_table.abs_file_path, low_memory=low_mem)
|
|
103
111
|
|
|
104
112
|
|
|
105
|
-
def create_from_path_excel(received_table:
|
|
106
|
-
if received_table.
|
|
113
|
+
def create_from_path_excel(received_table: ReceivedTable):
|
|
114
|
+
if not isinstance(received_table.table_settings, InputExcelTable):
|
|
115
|
+
raise ValueError("Received table settings are not of type InputExcelTable")
|
|
116
|
+
input_table_settings: InputExcelTable = received_table.table_settings
|
|
117
|
+
|
|
118
|
+
if input_table_settings.type_inference:
|
|
107
119
|
engine = 'openpyxl'
|
|
108
|
-
elif
|
|
109
|
-
engine = 'calamine' if
|
|
110
|
-
elif
|
|
120
|
+
elif input_table_settings.start_row > 0 and input_table_settings.start_column == 0:
|
|
121
|
+
engine = 'calamine' if input_table_settings.has_headers else 'xlsx2csv'
|
|
122
|
+
elif input_table_settings.start_column > 0 or input_table_settings.start_row > 0:
|
|
111
123
|
engine = 'openpyxl'
|
|
112
124
|
else:
|
|
113
125
|
engine = 'calamine'
|
|
114
126
|
|
|
115
|
-
sheet_name =
|
|
127
|
+
sheet_name = input_table_settings.sheet_name
|
|
116
128
|
|
|
117
129
|
if engine == 'calamine':
|
|
118
130
|
df = df_from_calamine_xlsx(file_path=received_table.abs_file_path, sheet_name=sheet_name,
|
|
119
|
-
start_row=
|
|
120
|
-
if
|
|
121
|
-
end_col_index =
|
|
122
|
-
cols_to_select = [df.columns[i] for i in range(
|
|
131
|
+
start_row=input_table_settings.start_row, end_row=input_table_settings.end_row)
|
|
132
|
+
if input_table_settings.end_column > 0:
|
|
133
|
+
end_col_index = input_table_settings.end_column
|
|
134
|
+
cols_to_select = [df.columns[i] for i in range(input_table_settings.start_column, end_col_index)]
|
|
123
135
|
df = df.select(cols_to_select)
|
|
124
136
|
|
|
125
137
|
elif engine == 'xlsx2csv':
|
|
126
|
-
csv_options = {'has_header':
|
|
138
|
+
csv_options = {'has_header': input_table_settings.has_headers, 'skip_rows': input_table_settings.start_row}
|
|
127
139
|
df = pl.read_excel(source=received_table.abs_file_path,
|
|
128
140
|
read_options=csv_options,
|
|
129
141
|
engine='xlsx2csv',
|
|
130
|
-
sheet_name=
|
|
131
|
-
end_col_index =
|
|
132
|
-
cols_to_select = [df.columns[i] for i in range(
|
|
142
|
+
sheet_name=input_table_settings.sheet_name)
|
|
143
|
+
end_col_index = input_table_settings.end_column if input_table_settings.end_column > 0 else len(df.columns)
|
|
144
|
+
cols_to_select = [df.columns[i] for i in range(input_table_settings.start_column, end_col_index)]
|
|
133
145
|
df = df.select(cols_to_select)
|
|
134
|
-
if 0 <
|
|
135
|
-
df = df.head(
|
|
146
|
+
if 0 < input_table_settings.end_row < len(df):
|
|
147
|
+
df = df.head(input_table_settings.end_row)
|
|
136
148
|
|
|
137
149
|
else:
|
|
138
|
-
max_col =
|
|
139
|
-
max_row =
|
|
150
|
+
max_col = input_table_settings.end_column if input_table_settings.end_column > 0 else None
|
|
151
|
+
max_row = input_table_settings.end_row + 1 if input_table_settings.end_row > 0 else None
|
|
140
152
|
df = df_from_openpyxl(file_path=received_table.abs_file_path,
|
|
141
|
-
sheet_name=
|
|
142
|
-
min_row=
|
|
143
|
-
min_col=
|
|
153
|
+
sheet_name=input_table_settings.sheet_name,
|
|
154
|
+
min_row=input_table_settings.start_row + 1,
|
|
155
|
+
min_col=input_table_settings.start_column + 1,
|
|
144
156
|
max_row=max_row,
|
|
145
|
-
max_col=max_col, has_headers=
|
|
157
|
+
max_col=max_col, has_headers=input_table_settings.has_headers)
|
|
146
158
|
return df
|
flowfile_worker/create/models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field, model_validator
|
|
2
|
-
from typing import List, Optional
|
|
1
|
+
from pydantic import BaseModel, Field, model_validator, field_validator
|
|
2
|
+
from typing import List, Optional, Literal, Annotated
|
|
3
3
|
import os
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
@@ -9,78 +9,146 @@ class MinimalFieldInfo(BaseModel):
|
|
|
9
9
|
data_type: str
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
class
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
path: str
|
|
16
|
-
directory: Optional[str] = None
|
|
17
|
-
analysis_file_available: Optional[bool] = False
|
|
18
|
-
status: Optional[str] = None
|
|
19
|
-
file_type: Optional[str] = None
|
|
20
|
-
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
21
|
-
abs_file_path: Optional[str] = None
|
|
12
|
+
class InputTableBase(BaseModel):
|
|
13
|
+
"""Base settings for input file operations."""
|
|
14
|
+
file_type: str # Will be overridden with Literal in subclasses
|
|
22
15
|
|
|
23
|
-
@classmethod
|
|
24
|
-
def create_from_path(cls, path: str):
|
|
25
|
-
filename = os.path.basename(path)
|
|
26
|
-
return cls(name=filename, path=path)
|
|
27
16
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
abs_file_path = getattr(values, "abs_file_path", None)
|
|
37
|
-
if abs_file_path is None:
|
|
38
|
-
path = getattr(values, "path", None)
|
|
39
|
-
if not path:
|
|
40
|
-
raise ValueError("Field 'path' is required to compute abs_file_path")
|
|
41
|
-
setattr(values, "abs_file_path", str(Path(path).absolute()))
|
|
42
|
-
return values
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class ReceivedCsvTable(ReceivedTableBase):
|
|
46
|
-
file_type: Optional[str] = 'csv'
|
|
47
|
-
reference: Optional[str] = ''
|
|
48
|
-
starting_from_line: Optional[int] = 0
|
|
49
|
-
delimiter: Optional[str] = ','
|
|
50
|
-
has_headers: Optional[bool] = True
|
|
51
|
-
encoding: Optional[str] = 'utf-8'
|
|
17
|
+
class InputCsvTable(InputTableBase):
|
|
18
|
+
"""Defines settings for reading a CSV file."""
|
|
19
|
+
file_type: Literal['csv'] = 'csv'
|
|
20
|
+
reference: str = ''
|
|
21
|
+
starting_from_line: int = 0
|
|
22
|
+
delimiter: str = ','
|
|
23
|
+
has_headers: bool = True
|
|
24
|
+
encoding: str = 'utf-8'
|
|
52
25
|
parquet_ref: Optional[str] = None
|
|
53
|
-
row_delimiter:
|
|
54
|
-
quote_char:
|
|
55
|
-
infer_schema_length:
|
|
56
|
-
truncate_ragged_lines:
|
|
57
|
-
ignore_errors:
|
|
26
|
+
row_delimiter: str = '\n'
|
|
27
|
+
quote_char: str = '"'
|
|
28
|
+
infer_schema_length: int = 10_000
|
|
29
|
+
truncate_ragged_lines: bool = False
|
|
30
|
+
ignore_errors: bool = False
|
|
58
31
|
|
|
59
32
|
|
|
60
|
-
class
|
|
61
|
-
|
|
33
|
+
class InputJsonTable(InputCsvTable):
|
|
34
|
+
"""Defines settings for reading a JSON file."""
|
|
35
|
+
file_type: Literal['json'] = 'json'
|
|
62
36
|
|
|
63
37
|
|
|
64
|
-
class
|
|
65
|
-
|
|
38
|
+
class InputParquetTable(InputTableBase):
|
|
39
|
+
"""Defines settings for reading a Parquet file."""
|
|
40
|
+
file_type: Literal['parquet'] = 'parquet'
|
|
66
41
|
|
|
67
42
|
|
|
68
|
-
class
|
|
43
|
+
class InputExcelTable(InputTableBase):
|
|
44
|
+
"""Defines settings for reading an Excel file."""
|
|
45
|
+
file_type: Literal['excel'] = 'excel'
|
|
69
46
|
sheet_name: Optional[str] = None
|
|
70
|
-
start_row:
|
|
71
|
-
start_column:
|
|
72
|
-
end_row:
|
|
73
|
-
end_column:
|
|
74
|
-
has_headers:
|
|
75
|
-
type_inference:
|
|
76
|
-
|
|
47
|
+
start_row: int = 0
|
|
48
|
+
start_column: int = 0
|
|
49
|
+
end_row: int = 0
|
|
50
|
+
end_column: int = 0
|
|
51
|
+
has_headers: bool = True
|
|
52
|
+
type_inference: bool = False
|
|
53
|
+
|
|
54
|
+
@model_validator(mode='after')
|
|
77
55
|
def validate_range_values(self):
|
|
78
|
-
|
|
56
|
+
"""Validates that the Excel cell range is logical."""
|
|
79
57
|
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
80
58
|
if not isinstance(attribute, int) or attribute < 0:
|
|
81
59
|
raise ValueError("Row and column indices must be non-negative integers")
|
|
60
|
+
if (self.end_row > 0 and self.start_row > self.end_row) or \
|
|
61
|
+
(self.end_column > 0 and self.start_column > self.end_column):
|
|
62
|
+
raise ValueError("Start row/column must not be greater than end row/column")
|
|
63
|
+
return self
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Create the discriminated union (similar to OutputTableSettings)
|
|
67
|
+
InputTableSettings = Annotated[
|
|
68
|
+
InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable,
|
|
69
|
+
Field(discriminator='file_type')
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Now create the main ReceivedTable model
|
|
74
|
+
class ReceivedTable(BaseModel):
|
|
75
|
+
"""Model for defining a table received from an external source."""
|
|
76
|
+
# Metadata fields
|
|
77
|
+
id: Optional[int] = None
|
|
78
|
+
name: Optional[str] = None
|
|
79
|
+
path: str # This can be an absolute or relative path
|
|
80
|
+
directory: Optional[str] = None
|
|
81
|
+
analysis_file_available: bool = False
|
|
82
|
+
status: Optional[str] = None
|
|
83
|
+
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
84
|
+
abs_file_path: Optional[str] = None
|
|
85
|
+
|
|
86
|
+
file_type: Literal['csv', 'json', 'parquet', 'excel']
|
|
87
|
+
|
|
88
|
+
table_settings: InputTableSettings
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def create_from_path(cls, path: str, file_type: Literal['csv', 'json', 'parquet', 'excel'] = 'csv'):
|
|
92
|
+
"""Creates an instance from a file path string."""
|
|
93
|
+
filename = Path(path).name
|
|
94
|
+
|
|
95
|
+
# Create appropriate table_settings based on file_type
|
|
96
|
+
settings_map = {
|
|
97
|
+
'csv': InputCsvTable(),
|
|
98
|
+
'json': InputJsonTable(),
|
|
99
|
+
'parquet': InputParquetTable(),
|
|
100
|
+
'excel': InputExcelTable(),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return cls(
|
|
104
|
+
name=filename,
|
|
105
|
+
path=path,
|
|
106
|
+
file_type=file_type,
|
|
107
|
+
table_settings=settings_map.get(file_type, InputCsvTable())
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def file_path(self) -> str:
|
|
112
|
+
"""Constructs the full file path from the directory and name."""
|
|
113
|
+
if self.name and self.name not in self.path:
|
|
114
|
+
return os.path.join(self.path, self.name)
|
|
115
|
+
else:
|
|
116
|
+
return self.path
|
|
117
|
+
|
|
118
|
+
def set_absolute_filepath(self):
|
|
119
|
+
"""Resolves the path to an absolute file path."""
|
|
120
|
+
base_path = Path(self.path).expanduser()
|
|
121
|
+
if not base_path.is_absolute():
|
|
122
|
+
base_path = Path.cwd() / base_path
|
|
123
|
+
if self.name and self.name not in base_path.name:
|
|
124
|
+
base_path = base_path / self.name
|
|
125
|
+
self.abs_file_path = str(base_path.resolve())
|
|
126
|
+
|
|
127
|
+
@field_validator('table_settings', mode='before')
|
|
128
|
+
@classmethod
|
|
129
|
+
def validate_table_settings(cls, v, info):
|
|
130
|
+
"""Ensures table_settings matches the file_type."""
|
|
131
|
+
if v is None:
|
|
132
|
+
file_type = info.data.get('file_type', 'csv')
|
|
133
|
+
# Create default based on file_type
|
|
134
|
+
settings_map = {
|
|
135
|
+
'csv': InputCsvTable(),
|
|
136
|
+
'json': InputJsonTable(),
|
|
137
|
+
'parquet': InputParquetTable(),
|
|
138
|
+
'excel': InputExcelTable(),
|
|
139
|
+
}
|
|
140
|
+
return settings_map.get(file_type, InputCsvTable())
|
|
141
|
+
|
|
142
|
+
# If it's a dict, add file_type if missing
|
|
143
|
+
if isinstance(v, dict) and 'file_type' not in v:
|
|
144
|
+
v['file_type'] = info.data.get('file_type', 'csv')
|
|
145
|
+
|
|
146
|
+
return v
|
|
147
|
+
|
|
148
|
+
@model_validator(mode='after')
|
|
149
|
+
def populate_abs_file_path(self):
|
|
150
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
151
|
+
if not self.abs_file_path:
|
|
152
|
+
self.set_absolute_filepath()
|
|
153
|
+
return self
|
|
82
154
|
|
|
83
|
-
# Validate that start is before end if end is specified (non-zero)
|
|
84
|
-
if (0 < self.end_row < self.start_row) or \
|
|
85
|
-
(0 < self.end_column < self.start_column):
|
|
86
|
-
raise ValueError("Start row/column must not be greater than end row/column if specified")
|
flowfile_worker/main.py
CHANGED
|
@@ -4,8 +4,11 @@ import signal
|
|
|
4
4
|
|
|
5
5
|
from contextlib import asynccontextmanager
|
|
6
6
|
from fastapi import FastAPI
|
|
7
|
+
|
|
8
|
+
from shared.storage_config import storage
|
|
9
|
+
|
|
7
10
|
from flowfile_worker.routes import router
|
|
8
|
-
from flowfile_worker import mp_context
|
|
11
|
+
from flowfile_worker import mp_context
|
|
9
12
|
from flowfile_worker.configs import logger, FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT
|
|
10
13
|
|
|
11
14
|
|
|
@@ -30,7 +33,7 @@ async def shutdown_handler(app: FastAPI):
|
|
|
30
33
|
logger.error(f"Error cleaning up process: {e}")
|
|
31
34
|
|
|
32
35
|
try:
|
|
33
|
-
|
|
36
|
+
storage.cleanup_directories()
|
|
34
37
|
except Exception as e:
|
|
35
38
|
print(f"Error cleaning up cache directory: {e}")
|
|
36
39
|
|
flowfile_worker/routes.py
CHANGED
|
@@ -8,22 +8,30 @@ from base64 import encodebytes
|
|
|
8
8
|
from flowfile_worker import status_dict, CACHE_DIR, PROCESS_MEMORY_USAGE, status_dict_lock
|
|
9
9
|
from flowfile_worker import models
|
|
10
10
|
from flowfile_worker.spawner import start_process, start_fuzzy_process, start_generic_process, process_manager
|
|
11
|
-
from flowfile_worker.create import table_creator_factory_method,
|
|
11
|
+
from flowfile_worker.create import table_creator_factory_method, FileType
|
|
12
12
|
from flowfile_worker.configs import logger
|
|
13
13
|
from flowfile_worker.external_sources.sql_source.models import DatabaseReadSettings
|
|
14
|
-
from flowfile_worker.external_sources.sql_source.main import read_sql_source
|
|
15
|
-
|
|
14
|
+
from flowfile_worker.external_sources.sql_source.main import read_sql_source
|
|
15
|
+
from flowfile_worker.create.models import ReceivedTable
|
|
16
16
|
|
|
17
17
|
router = APIRouter()
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def create_and_get_default_cache_dir(flowfile_flow_id: int) -> str:
|
|
21
|
+
default_cache_dir = CACHE_DIR / str(flowfile_flow_id)
|
|
22
|
+
default_cache_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
return str(default_cache_dir)
|
|
24
|
+
|
|
25
|
+
|
|
20
26
|
@router.post("/submit_query/")
|
|
21
27
|
def submit_query(polars_script: models.PolarsScript, background_tasks: BackgroundTasks) -> models.Status:
|
|
22
28
|
logger.info(f"Processing query with operation: {polars_script.operation_type}")
|
|
23
29
|
|
|
24
30
|
try:
|
|
25
31
|
polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
|
|
26
|
-
|
|
32
|
+
default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
|
|
33
|
+
|
|
34
|
+
polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
|
|
27
35
|
polars_serializable_object = polars_script.polars_serializable_object()
|
|
28
36
|
file_path = os.path.join(polars_script.cache_dir, f"{polars_script.task_id}.arrow")
|
|
29
37
|
result_type = "polars" if polars_script.operation_type == "store" else "other"
|
|
@@ -49,8 +57,9 @@ def store_sample(polars_script: models.PolarsScriptSample, background_tasks: Bac
|
|
|
49
57
|
logger.info(f"Processing sample storage with size: {polars_script.sample_size}")
|
|
50
58
|
|
|
51
59
|
try:
|
|
60
|
+
default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
|
|
52
61
|
polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
|
|
53
|
-
polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else
|
|
62
|
+
polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
|
|
54
63
|
polars_serializable_object = polars_script.polars_serializable_object()
|
|
55
64
|
|
|
56
65
|
file_path = os.path.join(polars_script.cache_dir, f"{polars_script.task_id}.arrow")
|
|
@@ -210,7 +219,8 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
|
|
|
210
219
|
|
|
211
220
|
try:
|
|
212
221
|
task_id = str(uuid.uuid4())
|
|
213
|
-
file_path = os.path.join(
|
|
222
|
+
file_path = os.path.join(create_and_get_default_cache_dir(database_read_settings.flowfile_flow_id),
|
|
223
|
+
f"{task_id}.arrow")
|
|
214
224
|
status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_path,
|
|
215
225
|
result_type="polars")
|
|
216
226
|
status_dict[task_id] = status
|
|
@@ -227,7 +237,7 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
|
|
|
227
237
|
|
|
228
238
|
|
|
229
239
|
@router.post('/create_table/{file_type}')
|
|
230
|
-
def create_table(file_type: FileType, received_table:
|
|
240
|
+
def create_table(file_type: FileType, received_table: ReceivedTable, background_tasks: BackgroundTasks,
|
|
231
241
|
flowfile_flow_id: int = 1, flowfile_node_id: int | str = -1) -> models.Status:
|
|
232
242
|
"""
|
|
233
243
|
Create a Polars table from received dictionary data based on specified file type.
|
|
@@ -243,18 +253,15 @@ def create_table(file_type: FileType, received_table: Dict, background_tasks: Ba
|
|
|
243
253
|
models.Status: Status object tracking the table creation
|
|
244
254
|
"""
|
|
245
255
|
logger.info(f"Creating table of type: {file_type}")
|
|
246
|
-
|
|
247
256
|
try:
|
|
248
257
|
task_id = str(uuid.uuid4())
|
|
249
|
-
file_ref = os.path.join(
|
|
250
|
-
|
|
258
|
+
file_ref = os.path.join(create_and_get_default_cache_dir(flowfile_flow_id), f"{task_id}.arrow")
|
|
251
259
|
status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_ref,
|
|
252
260
|
result_type="polars")
|
|
253
261
|
status_dict[task_id] = status
|
|
254
262
|
func_ref = table_creator_factory_method(file_type)
|
|
255
|
-
received_table_parsed = received_table_parser(received_table, file_type)
|
|
256
263
|
background_tasks.add_task(start_generic_process, func_ref=func_ref, file_ref=file_ref,
|
|
257
|
-
task_id=task_id, kwargs={'received_table':
|
|
264
|
+
task_id=task_id, kwargs={'received_table': received_table},
|
|
258
265
|
flowfile_flow_id=flowfile_flow_id,
|
|
259
266
|
flowfile_node_id=flowfile_node_id)
|
|
260
267
|
logger.info(f"Started table creation task: {task_id}")
|
|
@@ -382,8 +389,9 @@ async def add_fuzzy_join(polars_script: models.FuzzyJoinInput, background_tasks:
|
|
|
382
389
|
"""
|
|
383
390
|
logger.info("Starting fuzzy join operation")
|
|
384
391
|
try:
|
|
392
|
+
default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
|
|
385
393
|
polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
|
|
386
|
-
polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else
|
|
394
|
+
polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
|
|
387
395
|
left_serializable_object = polars_script.left_df_operation.polars_serializable_object()
|
|
388
396
|
right_serializable_object = polars_script.right_df_operation.polars_serializable_object()
|
|
389
397
|
|
|
@@ -405,6 +413,37 @@ async def add_fuzzy_join(polars_script: models.FuzzyJoinInput, background_tasks:
|
|
|
405
413
|
raise HTTPException(status_code=500, detail=str(e))
|
|
406
414
|
|
|
407
415
|
|
|
416
|
+
@router.delete("/clear_task/{task_id}")
|
|
417
|
+
def clear_task(task_id: str):
|
|
418
|
+
"""
|
|
419
|
+
Clear task data and status by ID.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
task_id: Unique identifier of the task to clear
|
|
423
|
+
Returns:
|
|
424
|
+
dict: Success message
|
|
425
|
+
Raises:
|
|
426
|
+
HTTPException: If task not found
|
|
427
|
+
"""
|
|
428
|
+
|
|
429
|
+
logger.info(f"Clearing task: {task_id}")
|
|
430
|
+
status = status_dict.get(task_id)
|
|
431
|
+
if not status:
|
|
432
|
+
logger.warning(f"Task not found for clearing: {task_id}")
|
|
433
|
+
raise HTTPException(status_code=404, detail="Task not found")
|
|
434
|
+
try:
|
|
435
|
+
if os.path.exists(status.file_ref):
|
|
436
|
+
os.remove(status.file_ref)
|
|
437
|
+
logger.debug(f"Removed file: {status.file_ref}")
|
|
438
|
+
except Exception as e:
|
|
439
|
+
logger.error(f"Error removing file {status.file_ref}: {str(e)}", exc_info=True)
|
|
440
|
+
with status_dict_lock:
|
|
441
|
+
status_dict.pop(task_id, None)
|
|
442
|
+
PROCESS_MEMORY_USAGE.pop(task_id, None)
|
|
443
|
+
logger.info(f"Successfully cleared task: {task_id}")
|
|
444
|
+
return {"message": f"Task {task_id} has been cleared."}
|
|
445
|
+
|
|
446
|
+
|
|
408
447
|
@router.post("/cancel_task/{task_id}")
|
|
409
448
|
def cancel_task(task_id: str):
|
|
410
449
|
"""Cancel a running task by ID.
|
shared/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared utilities for Flowfile services.
|
|
3
|
+
This package contains common functionality that can be used across
|
|
4
|
+
flowfile_core, flowfile_worker, and other components without creating
|
|
5
|
+
circular dependencies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .storage_config import storage, get_cache_directory, get_temp_directory, get_flows_directory
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
'storage',
|
|
12
|
+
'get_cache_directory',
|
|
13
|
+
'get_temp_directory',
|
|
14
|
+
'get_flows_directory'
|
|
15
|
+
]
|