Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +8 -1
- flowfile/api.py +1 -3
- flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
- flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
- flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
- flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
- flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
- flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
- flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
- flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
- flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
- flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
- flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
- flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
- flowfile/web/static/assets/Output-283fe388.css +37 -0
- flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
- flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
- flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
- flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
- flowfile/web/static/assets/Read-64a3f259.js +218 -0
- flowfile/web/static/assets/Read-e808b239.css +62 -0
- flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
- flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
- flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
- flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
- flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
- flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
- flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
- flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
- flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
- flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
- flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
- flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
- flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
- flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/Union-bfe9b996.js +77 -0
- flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
- flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
- flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
- flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
- flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
- flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
- flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
- flowfile/web/static/assets/readExcel-806d2826.css +64 -0
- flowfile/web/static/assets/readParquet-48c81530.css +19 -0
- flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
- flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
- flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
- flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/__init__.py +30 -0
- flowfile_core/configs/node_store/nodes.py +383 -99
- flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
- flowfile_core/configs/settings.py +2 -1
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +240 -54
- flowfile_core/flowfile/flow_node/flow_node.py +48 -13
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +24 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +47 -0
- flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
- flowfile_core/flowfile/node_designer/custom_node.py +371 -0
- flowfile_core/flowfile/node_designer/ui_components.py +277 -0
- flowfile_core/flowfile/schema_callbacks.py +17 -10
- flowfile_core/flowfile/setting_generator/settings.py +15 -10
- flowfile_core/main.py +5 -1
- flowfile_core/routes/routes.py +73 -30
- flowfile_core/routes/user_defined_components.py +55 -0
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +228 -65
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +153 -35
- flowfile_core/schemas/transform_schema.py +1083 -412
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/types.py +156 -0
- flowfile_core/utils/validate_setup.py +3 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +31 -24
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +9 -35
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/main.py +5 -2
- flowfile_worker/routes.py +52 -13
- shared/__init__.py +15 -0
- shared/storage_config.py +258 -0
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c3b1929c.js +0 -701
- flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
- flowfile/web/static/assets/Union-89fd73dc.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_core/flowfile/manage/open_flowfile.py +0 -135
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -23,15 +23,10 @@ from flowfile_core.flowfile.sources.external_sources.sql_source.models import (D
|
|
|
23
23
|
DatabaseExternalWriteSettings)
|
|
24
24
|
from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
|
|
25
25
|
from flowfile_core.schemas.input_schema import (
|
|
26
|
-
|
|
27
|
-
ReceivedExcelTable,
|
|
28
|
-
ReceivedJsonTable,
|
|
29
|
-
ReceivedParquetTable
|
|
26
|
+
ReceivedTable
|
|
30
27
|
)
|
|
31
28
|
from flowfile_core.utils.arrow_reader import read
|
|
32
29
|
|
|
33
|
-
ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
|
|
34
|
-
|
|
35
30
|
|
|
36
31
|
def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = 'store') -> Status:
|
|
37
32
|
encoded_operation = encodebytes(lf.serialize()).decode()
|
|
@@ -39,7 +34,7 @@ def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, fil
|
|
|
39
34
|
'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
|
|
40
35
|
v = requests.post(url=f'{WORKER_URL}/submit_query/', json=_json)
|
|
41
36
|
if not v.ok:
|
|
42
|
-
raise Exception(f'Could not cache the data, {v.text}')
|
|
37
|
+
raise Exception(f'trigger_df_operation: Could not cache the data, {v.text}')
|
|
43
38
|
return Status(**v.json())
|
|
44
39
|
|
|
45
40
|
|
|
@@ -49,7 +44,7 @@ def trigger_sample_operation(lf: pl.LazyFrame, file_ref: str, flow_id: int, node
|
|
|
49
44
|
'sample_size': sample_size, 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
|
|
50
45
|
v = requests.post(url=f'{WORKER_URL}/store_sample/', json=_json)
|
|
51
46
|
if not v.ok:
|
|
52
|
-
raise Exception(f'Could not cache the data, {v.text}')
|
|
47
|
+
raise Exception(f'trigger_sample_operation: Could not cache the data, {v.text}')
|
|
53
48
|
return Status(**v.json())
|
|
54
49
|
|
|
55
50
|
|
|
@@ -67,18 +62,19 @@ def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
|
|
|
67
62
|
flowfile_flow_id=flow_id,
|
|
68
63
|
flowfile_node_id=node_id
|
|
69
64
|
)
|
|
65
|
+
print("fuzzy join input", fuzzy_join_input)
|
|
70
66
|
v = requests.post(f'{WORKER_URL}/add_fuzzy_join', data=fuzzy_join_input.model_dump_json())
|
|
71
67
|
if not v.ok:
|
|
72
|
-
raise Exception(f'Could not cache the data, {v.text}')
|
|
68
|
+
raise Exception(f'trigger_fuzzy_match_operation: Could not cache the data, {v.text}')
|
|
73
69
|
return Status(**v.json())
|
|
74
70
|
|
|
75
71
|
|
|
76
|
-
def trigger_create_operation(flow_id: int, node_id: int | str, received_table:
|
|
72
|
+
def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTable,
|
|
77
73
|
file_type: str = Literal['csv', 'parquet', 'json', 'excel']):
|
|
78
74
|
f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
|
|
79
75
|
params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
|
|
80
76
|
if not f.ok:
|
|
81
|
-
raise Exception(f'Could not cache the data, {f.text}')
|
|
77
|
+
raise Exception(f'trigger_create_operation: Could not cache the data, {f.text}')
|
|
82
78
|
return Status(**f.json())
|
|
83
79
|
|
|
84
80
|
|
|
@@ -86,7 +82,7 @@ def trigger_database_read_collector(database_external_read_settings: DatabaseExt
|
|
|
86
82
|
f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
|
|
87
83
|
data=database_external_read_settings.model_dump_json())
|
|
88
84
|
if not f.ok:
|
|
89
|
-
raise Exception(f'Could not cache the data, {f.text}')
|
|
85
|
+
raise Exception(f'trigger_database_read_collector: Could not cache the data, {f.text}')
|
|
90
86
|
return Status(**f.json())
|
|
91
87
|
|
|
92
88
|
|
|
@@ -94,7 +90,7 @@ def trigger_database_write(database_external_write_settings: DatabaseExternalWri
|
|
|
94
90
|
f = requests.post(url=f'{WORKER_URL}/store_database_write_result',
|
|
95
91
|
data=database_external_write_settings.model_dump_json())
|
|
96
92
|
if not f.ok:
|
|
97
|
-
raise Exception(f'Could not cache the data, {f.text}')
|
|
93
|
+
raise Exception(f'trigger_database_write: Could not cache the data, {f.text}')
|
|
98
94
|
return Status(**f.json())
|
|
99
95
|
|
|
100
96
|
|
|
@@ -102,7 +98,7 @@ def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWr
|
|
|
102
98
|
f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
|
|
103
99
|
data=database_external_write_settings.model_dump_json())
|
|
104
100
|
if not f.ok:
|
|
105
|
-
raise Exception(f'Could not cache the data, {f.text}')
|
|
101
|
+
raise Exception(f'trigger_cloud_storage_write: Could not cache the data, {f.text}')
|
|
106
102
|
return Status(**f.json())
|
|
107
103
|
|
|
108
104
|
|
|
@@ -111,7 +107,7 @@ def get_results(file_ref: str) -> Status | None:
|
|
|
111
107
|
if f.status_code == 200:
|
|
112
108
|
return Status(**f.json())
|
|
113
109
|
else:
|
|
114
|
-
raise Exception(f'Could not fetch the data, {f.text}')
|
|
110
|
+
raise Exception(f'get_results: Could not fetch the data, {f.text}')
|
|
115
111
|
|
|
116
112
|
|
|
117
113
|
def results_exists(file_ref: str):
|
|
@@ -128,6 +124,25 @@ def results_exists(file_ref: str):
|
|
|
128
124
|
return False
|
|
129
125
|
|
|
130
126
|
|
|
127
|
+
def clear_task_from_worker(file_ref: str) -> bool:
|
|
128
|
+
"""
|
|
129
|
+
Clears a task from the worker service by making a DELETE request. It also removes associated cached files.
|
|
130
|
+
Args:
|
|
131
|
+
file_ref (str): The unique identifier of the task to clear.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
bool: True if the task was successfully cleared, False otherwise.
|
|
135
|
+
"""
|
|
136
|
+
try:
|
|
137
|
+
f = requests.delete(f'{WORKER_URL}/clear_task/{file_ref}')
|
|
138
|
+
if f.status_code == 200:
|
|
139
|
+
return True
|
|
140
|
+
return False
|
|
141
|
+
except requests.RequestException as e:
|
|
142
|
+
logger.error(f"Failed to remove results: {str(e)}")
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
|
|
131
146
|
def get_df_result(encoded_df: str) -> pl.LazyFrame:
|
|
132
147
|
r = decodebytes(encoded_df.encode())
|
|
133
148
|
return pl.LazyFrame.deserialize(io.BytesIO(r))
|
|
@@ -174,87 +189,161 @@ def cancel_task(file_ref: str) -> bool:
|
|
|
174
189
|
|
|
175
190
|
|
|
176
191
|
class BaseFetcher:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
error_code: int = 0
|
|
181
|
-
error_description: Optional[str] = None
|
|
182
|
-
file_ref: Optional[str] = None
|
|
192
|
+
"""
|
|
193
|
+
Thread-safe fetcher for polling worker status and retrieving results.
|
|
194
|
+
"""
|
|
183
195
|
|
|
184
196
|
def __init__(self, file_ref: str = None):
|
|
185
197
|
self.file_ref = file_ref if file_ref else str(uuid4())
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
self.
|
|
189
|
-
self.
|
|
190
|
-
self.
|
|
191
|
-
self.
|
|
192
|
-
|
|
193
|
-
|
|
198
|
+
|
|
199
|
+
# Thread synchronization
|
|
200
|
+
self._lock = threading.Lock()
|
|
201
|
+
self._condition = threading.Condition(self._lock)
|
|
202
|
+
self._stop_event = threading.Event()
|
|
203
|
+
self._thread = None
|
|
204
|
+
|
|
205
|
+
# State variables - use properties for thread-safe access
|
|
206
|
+
self._result: Optional[Any] = None
|
|
207
|
+
self._started: bool = False
|
|
208
|
+
self._running: bool = False
|
|
209
|
+
self._error_code: int = 0
|
|
210
|
+
self._error_description: Optional[str] = None
|
|
211
|
+
|
|
212
|
+
# Public properties for compatibility with subclasses
|
|
213
|
+
@property
|
|
214
|
+
def result(self) -> Optional[Any]:
|
|
215
|
+
with self._lock:
|
|
216
|
+
return self._result
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def started(self) -> bool:
|
|
220
|
+
with self._lock:
|
|
221
|
+
return self._started
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def running(self) -> bool:
|
|
225
|
+
with self._lock:
|
|
226
|
+
return self._running
|
|
227
|
+
|
|
228
|
+
@running.setter
|
|
229
|
+
def running(self, value: bool):
|
|
230
|
+
"""Allow subclasses to set running status and auto-start if needed."""
|
|
231
|
+
with self._lock:
|
|
232
|
+
self._running = value
|
|
233
|
+
# If subclass sets running=True, auto-start the thread
|
|
234
|
+
if value and not self._started:
|
|
235
|
+
self._start_thread()
|
|
236
|
+
|
|
237
|
+
@property
|
|
238
|
+
def error_code(self) -> int:
|
|
239
|
+
with self._lock:
|
|
240
|
+
return self._error_code
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def error_description(self) -> Optional[str]:
|
|
244
|
+
with self._lock:
|
|
245
|
+
return self._error_description
|
|
246
|
+
|
|
247
|
+
def _start_thread(self):
|
|
248
|
+
"""Internal method to start thread (must be called under lock)."""
|
|
249
|
+
if not self._started:
|
|
250
|
+
self._thread = threading.Thread(target=self._fetch_cached_df, daemon=True)
|
|
251
|
+
self._thread.start()
|
|
252
|
+
self._started = True
|
|
194
253
|
|
|
195
254
|
def _fetch_cached_df(self):
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
logger.info('Already running the fetching')
|
|
199
|
-
return
|
|
255
|
+
"""Background thread that polls for results."""
|
|
256
|
+
sleep_time = 0.5
|
|
200
257
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
while not self.
|
|
258
|
+
# Don't check _running here - subclasses already set it
|
|
259
|
+
try:
|
|
260
|
+
while not self._stop_event.is_set():
|
|
204
261
|
try:
|
|
205
|
-
r = requests.get(f'{WORKER_URL}/status/{self.file_ref}')
|
|
262
|
+
r = requests.get(f'{WORKER_URL}/status/{self.file_ref}', timeout=10)
|
|
263
|
+
|
|
206
264
|
if r.status_code == 200:
|
|
207
265
|
status = Status(**r.json())
|
|
266
|
+
|
|
208
267
|
if status.status == 'Completed':
|
|
209
268
|
self._handle_completion(status)
|
|
210
269
|
return
|
|
211
270
|
elif status.status == 'Error':
|
|
212
271
|
self._handle_error(1, status.error_message)
|
|
213
|
-
|
|
272
|
+
return
|
|
214
273
|
elif status.status == 'Unknown Error':
|
|
215
|
-
self._handle_error(
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
274
|
+
self._handle_error(
|
|
275
|
+
-1,
|
|
276
|
+
'There was an unknown error with the process, '
|
|
277
|
+
'and the process got killed by the server'
|
|
278
|
+
)
|
|
279
|
+
return
|
|
219
280
|
else:
|
|
220
|
-
self._handle_error(2, r.text)
|
|
221
|
-
|
|
281
|
+
self._handle_error(2, f"HTTP {r.status_code}: {r.text}")
|
|
282
|
+
return
|
|
283
|
+
|
|
222
284
|
except requests.RequestException as e:
|
|
223
285
|
self._handle_error(2, f"Request failed: {e}")
|
|
224
|
-
|
|
286
|
+
return
|
|
225
287
|
|
|
226
|
-
|
|
288
|
+
# Sleep without holding the lock
|
|
289
|
+
if not self._stop_event.wait(timeout=sleep_time):
|
|
290
|
+
continue
|
|
291
|
+
else:
|
|
292
|
+
break
|
|
227
293
|
|
|
294
|
+
# Only reached if stop_event was set
|
|
228
295
|
self._handle_cancellation()
|
|
229
296
|
|
|
297
|
+
except Exception as e:
|
|
298
|
+
# Catch any unexpected errors
|
|
299
|
+
logger.exception("Unexpected error in fetch thread")
|
|
300
|
+
self._handle_error(-1, f"Unexpected error: {e}")
|
|
301
|
+
|
|
230
302
|
def _handle_completion(self, status):
|
|
231
|
-
|
|
232
|
-
self.
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
303
|
+
"""Handle successful completion. Must be called from fetch thread."""
|
|
304
|
+
with self._condition:
|
|
305
|
+
try:
|
|
306
|
+
if status.result_type == 'polars':
|
|
307
|
+
self._result = get_df_result(status.results)
|
|
308
|
+
else:
|
|
309
|
+
self._result = status.results
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logger.exception("Error processing result")
|
|
312
|
+
self._error_code = -1
|
|
313
|
+
self._error_description = f"Error processing result: {e}"
|
|
314
|
+
finally:
|
|
315
|
+
self._running = False
|
|
316
|
+
self._condition.notify_all()
|
|
317
|
+
|
|
318
|
+
def _handle_error(self, code: int, description: str):
|
|
319
|
+
"""Handle error state. Must be called from fetch thread."""
|
|
320
|
+
with self._condition:
|
|
321
|
+
self._error_code = code
|
|
322
|
+
self._error_description = description
|
|
323
|
+
self._running = False
|
|
324
|
+
self._condition.notify_all()
|
|
243
325
|
|
|
244
326
|
def _handle_cancellation(self):
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
327
|
+
"""Handle cancellation. Must be called from fetch thread."""
|
|
328
|
+
with self._condition:
|
|
329
|
+
if self._error_description is None:
|
|
330
|
+
self._error_description = "Task cancelled"
|
|
331
|
+
logger.warning(f"Fetch operation cancelled: {self._error_description}")
|
|
332
|
+
self._running = False
|
|
333
|
+
self._condition.notify_all()
|
|
250
334
|
|
|
251
335
|
def start(self):
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
self.
|
|
336
|
+
"""Start the background fetch thread."""
|
|
337
|
+
with self._lock:
|
|
338
|
+
if self._started:
|
|
339
|
+
logger.info('Fetcher already started')
|
|
340
|
+
return
|
|
341
|
+
if self._running:
|
|
342
|
+
logger.info('Already running the fetching')
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
self._running = True
|
|
346
|
+
self._start_thread()
|
|
258
347
|
|
|
259
348
|
def cancel(self):
|
|
260
349
|
"""
|
|
@@ -262,30 +351,67 @@ class BaseFetcher:
|
|
|
262
351
|
Also cleans up any resources being used.
|
|
263
352
|
"""
|
|
264
353
|
logger.warning('Cancelling the operation')
|
|
354
|
+
|
|
355
|
+
# Cancel on the worker side
|
|
265
356
|
try:
|
|
266
357
|
cancel_task(self.file_ref)
|
|
267
358
|
except Exception as e:
|
|
268
359
|
logger.error(f'Failed to cancel task on worker: {str(e)}')
|
|
269
360
|
|
|
270
|
-
#
|
|
271
|
-
self.
|
|
272
|
-
self.thread.join()
|
|
361
|
+
# Signal the thread to stop
|
|
362
|
+
self._stop_event.set()
|
|
273
363
|
|
|
274
|
-
#
|
|
275
|
-
|
|
276
|
-
self.
|
|
277
|
-
self.
|
|
278
|
-
|
|
364
|
+
# Wait for thread to finish
|
|
365
|
+
if self._thread and self._thread.is_alive():
|
|
366
|
+
self._thread.join(timeout=5.0)
|
|
367
|
+
if self._thread.is_alive():
|
|
368
|
+
logger.warning("Fetch thread did not stop within timeout")
|
|
279
369
|
|
|
280
370
|
def get_result(self) -> Optional[Any]:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
371
|
+
"""
|
|
372
|
+
Get the result, blocking until it's available.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
The fetched result.
|
|
376
|
+
|
|
377
|
+
Raises:
|
|
378
|
+
Exception: If an error occurred during fetching.
|
|
379
|
+
"""
|
|
380
|
+
# Start if not already started (for manual usage)
|
|
381
|
+
with self._lock:
|
|
382
|
+
if not self._started:
|
|
383
|
+
if not self._running:
|
|
384
|
+
self._running = True
|
|
385
|
+
self._start_thread()
|
|
386
|
+
|
|
387
|
+
# Wait for completion
|
|
388
|
+
with self._condition:
|
|
389
|
+
while self._running:
|
|
390
|
+
self._condition.wait()
|
|
391
|
+
|
|
392
|
+
# Check for errors
|
|
393
|
+
with self._lock:
|
|
394
|
+
if self._error_description is not None:
|
|
395
|
+
raise Exception(self._error_description)
|
|
396
|
+
return self._result
|
|
397
|
+
|
|
398
|
+
@property
|
|
399
|
+
def is_running(self) -> bool:
|
|
400
|
+
"""Check if the fetcher is currently running."""
|
|
401
|
+
with self._lock:
|
|
402
|
+
return self._running
|
|
403
|
+
|
|
404
|
+
@property
|
|
405
|
+
def has_error(self) -> bool:
|
|
406
|
+
"""Check if the fetcher encountered an error."""
|
|
407
|
+
with self._lock:
|
|
408
|
+
return self._error_description is not None
|
|
409
|
+
|
|
410
|
+
@property
|
|
411
|
+
def error_info(self) -> tuple[int, Optional[str]]:
|
|
412
|
+
"""Get error code and description."""
|
|
413
|
+
with self._lock:
|
|
414
|
+
return self._error_code, self._error_description
|
|
289
415
|
|
|
290
416
|
|
|
291
417
|
class ExternalDfFetcher(BaseFetcher):
|
|
@@ -334,7 +460,7 @@ class ExternalFuzzyMatchFetcher(BaseFetcher):
|
|
|
334
460
|
|
|
335
461
|
|
|
336
462
|
class ExternalCreateFetcher(BaseFetcher):
|
|
337
|
-
def __init__(self, received_table:
|
|
463
|
+
def __init__(self, received_table: ReceivedTable, node_id: int, flow_id: int,
|
|
338
464
|
file_type: str = 'csv', wait_on_completion: bool = True):
|
|
339
465
|
r = trigger_create_operation(received_table=received_table, file_type=file_type,
|
|
340
466
|
node_id=node_id, flow_id=flow_id)
|
|
@@ -23,8 +23,10 @@ def get_data_type(vals: Iterable[Any]):
|
|
|
23
23
|
def calculate_schema(lf: pl.LazyFrame) -> List[Dict]:
|
|
24
24
|
r = ExternalDfFetcher(lf=lf, operation_type='calculate_schema', wait_on_completion=False, flow_id=-1, node_id=-1)
|
|
25
25
|
schema_stats: List[Dict] = r.get_result()
|
|
26
|
+
|
|
26
27
|
for schema_stat in schema_stats:
|
|
27
28
|
schema_stat['pl_datatype'] = getattr(pl.datatypes, schema_stat['pl_datatype'])
|
|
29
|
+
|
|
28
30
|
return schema_stats
|
|
29
31
|
|
|
30
32
|
|