Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +179 -73
- flowfile/__main__.py +10 -7
- flowfile/api.py +52 -59
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
- flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
- flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
- flowfile-0.5.3.dist-info/RECORD +402 -0
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +13 -3
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +391 -279
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
- flowfile_core/flowfile/flow_graph.py +1011 -561
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +332 -232
- flowfile_core/flowfile/flow_node/models.py +54 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +82 -32
- flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
- flowfile_core/flowfile/manage/io_flowfile.py +391 -0
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +136 -35
- flowfile_core/flowfile/schema_callbacks.py +77 -54
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +72 -55
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +77 -43
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -55
- flowfile_core/schemas/input_schema.py +398 -154
- flowfile_core/schemas/output_model.py +50 -35
- flowfile_core/schemas/schemas.py +207 -67
- flowfile_core/schemas/transform_schema.py +1360 -435
- flowfile_core/schemas/yaml_types.py +117 -0
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +107 -50
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +581 -489
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +236 -252
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -4
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -27
- flowfile_worker/create/funcs.py +143 -94
- flowfile_worker/create/models.py +139 -68
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -93
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +118 -0
- tools/migrate/legacy_schemas.py +682 -0
- tools/migrate/migrate.py +610 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +21 -0
- tools/migrate/tests/test_migrate.py +622 -0
- tools/migrate/tests/test_migration_e2e.py +1009 -0
- tools/migrate/tests/test_node_migrations.py +843 -0
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-812dcbca.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-538058f3.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.4.1.dist-info/RECORD +0 -376
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
# Standard library imports
|
|
2
|
-
from base64 import decodebytes, encodebytes
|
|
3
2
|
import io
|
|
4
3
|
import threading
|
|
4
|
+
from base64 import decodebytes, encodebytes
|
|
5
5
|
from time import sleep
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Literal
|
|
7
7
|
from uuid import uuid4
|
|
8
8
|
|
|
9
9
|
import polars as pl
|
|
10
10
|
import requests
|
|
11
|
-
|
|
12
11
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
13
12
|
|
|
14
13
|
from flowfile_core.configs import logger
|
|
@@ -17,109 +16,131 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.models import
|
|
|
17
16
|
FuzzyJoinInput,
|
|
18
17
|
OperationType,
|
|
19
18
|
PolarsOperation,
|
|
20
|
-
Status
|
|
19
|
+
Status,
|
|
21
20
|
)
|
|
22
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source.models import (
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
from flowfile_core.schemas.input_schema import (
|
|
26
|
-
ReceivedCsvTable,
|
|
27
|
-
ReceivedExcelTable,
|
|
28
|
-
ReceivedJsonTable,
|
|
29
|
-
ReceivedParquetTable
|
|
21
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source.models import (
|
|
22
|
+
DatabaseExternalReadSettings,
|
|
23
|
+
DatabaseExternalWriteSettings,
|
|
30
24
|
)
|
|
25
|
+
from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
|
|
26
|
+
from flowfile_core.schemas.input_schema import ReceivedTable
|
|
31
27
|
from flowfile_core.utils.arrow_reader import read
|
|
32
28
|
|
|
33
|
-
ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
|
|
34
29
|
|
|
35
|
-
|
|
36
|
-
|
|
30
|
+
def trigger_df_operation(
|
|
31
|
+
flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = "store"
|
|
32
|
+
) -> Status:
|
|
37
33
|
encoded_operation = encodebytes(lf.serialize()).decode()
|
|
38
|
-
_json = {
|
|
39
|
-
|
|
40
|
-
|
|
34
|
+
_json = {
|
|
35
|
+
"task_id": file_ref,
|
|
36
|
+
"operation": encoded_operation,
|
|
37
|
+
"operation_type": operation_type,
|
|
38
|
+
"flowfile_flow_id": flow_id,
|
|
39
|
+
"flowfile_node_id": node_id,
|
|
40
|
+
}
|
|
41
|
+
v = requests.post(url=f"{WORKER_URL}/submit_query/", json=_json)
|
|
41
42
|
if not v.ok:
|
|
42
|
-
raise Exception(f
|
|
43
|
+
raise Exception(f"trigger_df_operation: Could not cache the data, {v.text}")
|
|
43
44
|
return Status(**v.json())
|
|
44
45
|
|
|
45
46
|
|
|
46
|
-
def trigger_sample_operation(
|
|
47
|
+
def trigger_sample_operation(
|
|
48
|
+
lf: pl.LazyFrame, file_ref: str, flow_id: int, node_id: str | int, sample_size: int = 100
|
|
49
|
+
) -> Status:
|
|
47
50
|
encoded_operation = encodebytes(lf.serialize()).decode()
|
|
48
|
-
_json = {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
+
_json = {
|
|
52
|
+
"task_id": file_ref,
|
|
53
|
+
"operation": encoded_operation,
|
|
54
|
+
"operation_type": "store_sample",
|
|
55
|
+
"sample_size": sample_size,
|
|
56
|
+
"flowfile_flow_id": flow_id,
|
|
57
|
+
"flowfile_node_id": node_id,
|
|
58
|
+
}
|
|
59
|
+
v = requests.post(url=f"{WORKER_URL}/store_sample/", json=_json)
|
|
51
60
|
if not v.ok:
|
|
52
|
-
raise Exception(f
|
|
61
|
+
raise Exception(f"trigger_sample_operation: Could not cache the data, {v.text}")
|
|
53
62
|
return Status(**v.json())
|
|
54
63
|
|
|
55
64
|
|
|
56
|
-
def trigger_fuzzy_match_operation(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
65
|
+
def trigger_fuzzy_match_operation(
|
|
66
|
+
left_df: pl.LazyFrame,
|
|
67
|
+
right_df: pl.LazyFrame,
|
|
68
|
+
fuzzy_maps: list[FuzzyMapping],
|
|
69
|
+
file_ref: str,
|
|
70
|
+
flow_id: int,
|
|
71
|
+
node_id: int | str,
|
|
72
|
+
) -> Status:
|
|
61
73
|
left_serializable_object = PolarsOperation(operation=encodebytes(left_df.serialize()))
|
|
62
74
|
right_serializable_object = PolarsOperation(operation=encodebytes(right_df.serialize()))
|
|
63
|
-
fuzzy_join_input = FuzzyJoinInput(
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
75
|
+
fuzzy_join_input = FuzzyJoinInput(
|
|
76
|
+
left_df_operation=left_serializable_object,
|
|
77
|
+
right_df_operation=right_serializable_object,
|
|
78
|
+
fuzzy_maps=fuzzy_maps,
|
|
79
|
+
task_id=file_ref,
|
|
80
|
+
flowfile_flow_id=flow_id,
|
|
81
|
+
flowfile_node_id=node_id,
|
|
82
|
+
)
|
|
70
83
|
print("fuzzy join input", fuzzy_join_input)
|
|
71
|
-
v = requests.post(f
|
|
84
|
+
v = requests.post(f"{WORKER_URL}/add_fuzzy_join", data=fuzzy_join_input.model_dump_json())
|
|
72
85
|
if not v.ok:
|
|
73
|
-
raise Exception(f
|
|
86
|
+
raise Exception(f"trigger_fuzzy_match_operation: Could not cache the data, {v.text}")
|
|
74
87
|
return Status(**v.json())
|
|
75
88
|
|
|
76
89
|
|
|
77
|
-
def trigger_create_operation(
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
90
|
+
def trigger_create_operation(
|
|
91
|
+
flow_id: int,
|
|
92
|
+
node_id: int | str,
|
|
93
|
+
received_table: ReceivedTable,
|
|
94
|
+
file_type: str = Literal["csv", "parquet", "json", "excel"],
|
|
95
|
+
):
|
|
96
|
+
f = requests.post(
|
|
97
|
+
url=f"{WORKER_URL}/create_table/{file_type}",
|
|
98
|
+
data=received_table.model_dump_json(),
|
|
99
|
+
params={"flowfile_flow_id": flow_id, "flowfile_node_id": node_id},
|
|
100
|
+
)
|
|
81
101
|
if not f.ok:
|
|
82
|
-
raise Exception(f
|
|
102
|
+
raise Exception(f"trigger_create_operation: Could not cache the data, {f.text}")
|
|
83
103
|
return Status(**f.json())
|
|
84
104
|
|
|
85
105
|
|
|
86
106
|
def trigger_database_read_collector(database_external_read_settings: DatabaseExternalReadSettings):
|
|
87
|
-
f = requests.post(
|
|
88
|
-
|
|
107
|
+
f = requests.post(
|
|
108
|
+
url=f"{WORKER_URL}/store_database_read_result", data=database_external_read_settings.model_dump_json()
|
|
109
|
+
)
|
|
89
110
|
if not f.ok:
|
|
90
|
-
raise Exception(f
|
|
111
|
+
raise Exception(f"trigger_database_read_collector: Could not cache the data, {f.text}")
|
|
91
112
|
return Status(**f.json())
|
|
92
113
|
|
|
93
114
|
|
|
94
115
|
def trigger_database_write(database_external_write_settings: DatabaseExternalWriteSettings):
|
|
95
|
-
f = requests.post(
|
|
96
|
-
|
|
116
|
+
f = requests.post(
|
|
117
|
+
url=f"{WORKER_URL}/store_database_write_result", data=database_external_write_settings.model_dump_json()
|
|
118
|
+
)
|
|
97
119
|
if not f.ok:
|
|
98
|
-
raise Exception(f
|
|
120
|
+
raise Exception(f"trigger_database_write: Could not cache the data, {f.text}")
|
|
99
121
|
return Status(**f.json())
|
|
100
122
|
|
|
101
123
|
|
|
102
124
|
def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWriteSettingsWorkerInterface):
|
|
103
|
-
f = requests.post(url=f
|
|
104
|
-
data=database_external_write_settings.model_dump_json())
|
|
125
|
+
f = requests.post(url=f"{WORKER_URL}/write_data_to_cloud", data=database_external_write_settings.model_dump_json())
|
|
105
126
|
if not f.ok:
|
|
106
|
-
raise Exception(f
|
|
127
|
+
raise Exception(f"trigger_cloud_storage_write: Could not cache the data, {f.text}")
|
|
107
128
|
return Status(**f.json())
|
|
108
129
|
|
|
109
130
|
|
|
110
131
|
def get_results(file_ref: str) -> Status | None:
|
|
111
|
-
f = requests.get(f
|
|
132
|
+
f = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
112
133
|
if f.status_code == 200:
|
|
113
134
|
return Status(**f.json())
|
|
114
135
|
else:
|
|
115
|
-
raise Exception(f
|
|
136
|
+
raise Exception(f"get_results: Could not fetch the data, {f.text}")
|
|
116
137
|
|
|
117
138
|
|
|
118
139
|
def results_exists(file_ref: str):
|
|
119
140
|
try:
|
|
120
|
-
f = requests.get(f
|
|
141
|
+
f = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
121
142
|
if f.status_code == 200:
|
|
122
|
-
if f.json()[
|
|
143
|
+
if f.json()["status"] == "Completed":
|
|
123
144
|
return True
|
|
124
145
|
return False
|
|
125
146
|
except requests.RequestException as e:
|
|
@@ -139,7 +160,7 @@ def clear_task_from_worker(file_ref: str) -> bool:
|
|
|
139
160
|
bool: True if the task was successfully cleared, False otherwise.
|
|
140
161
|
"""
|
|
141
162
|
try:
|
|
142
|
-
f = requests.delete(f
|
|
163
|
+
f = requests.delete(f"{WORKER_URL}/clear_task/{file_ref}")
|
|
143
164
|
if f.status_code == 200:
|
|
144
165
|
return True
|
|
145
166
|
return False
|
|
@@ -155,16 +176,16 @@ def get_df_result(encoded_df: str) -> pl.LazyFrame:
|
|
|
155
176
|
|
|
156
177
|
def get_external_df_result(file_ref: str) -> pl.LazyFrame | None:
|
|
157
178
|
status = get_results(file_ref)
|
|
158
|
-
if status.status !=
|
|
179
|
+
if status.status != "Completed":
|
|
159
180
|
raise Exception(f"Status is not completed, {status.status}")
|
|
160
|
-
if status.result_type ==
|
|
181
|
+
if status.result_type == "polars":
|
|
161
182
|
return get_df_result(status.results)
|
|
162
183
|
else:
|
|
163
184
|
raise Exception(f"Result type is not polars, {status.result_type}")
|
|
164
185
|
|
|
165
186
|
|
|
166
187
|
def get_status(file_ref: str) -> Status:
|
|
167
|
-
status_response = requests.get(f
|
|
188
|
+
status_response = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
168
189
|
if status_response.status_code == 200:
|
|
169
190
|
return Status(**status_response.json())
|
|
170
191
|
else:
|
|
@@ -185,222 +206,369 @@ def cancel_task(file_ref: str) -> bool:
|
|
|
185
206
|
Exception: If there's an error communicating with the worker service
|
|
186
207
|
"""
|
|
187
208
|
try:
|
|
188
|
-
response = requests.post(f
|
|
209
|
+
response = requests.post(f"{WORKER_URL}/cancel_task/{file_ref}")
|
|
189
210
|
if response.ok:
|
|
190
211
|
return True
|
|
191
212
|
return False
|
|
192
213
|
except requests.RequestException as e:
|
|
193
|
-
raise Exception(f
|
|
214
|
+
raise Exception(f"Failed to cancel task: {str(e)}")
|
|
194
215
|
|
|
195
216
|
|
|
196
217
|
class BaseFetcher:
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
error_code: int = 0
|
|
201
|
-
error_description: Optional[str] = None
|
|
202
|
-
file_ref: Optional[str] = None
|
|
218
|
+
"""
|
|
219
|
+
Thread-safe fetcher for polling worker status and retrieving results.
|
|
220
|
+
"""
|
|
203
221
|
|
|
204
222
|
def __init__(self, file_ref: str = None):
|
|
205
223
|
self.file_ref = file_ref if file_ref else str(uuid4())
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
self.
|
|
209
|
-
self.
|
|
210
|
-
self.
|
|
211
|
-
self.
|
|
212
|
-
|
|
213
|
-
|
|
224
|
+
|
|
225
|
+
# Thread synchronization
|
|
226
|
+
self._lock = threading.Lock()
|
|
227
|
+
self._condition = threading.Condition(self._lock)
|
|
228
|
+
self._stop_event = threading.Event()
|
|
229
|
+
self._thread = None
|
|
230
|
+
|
|
231
|
+
# State variables - use properties for thread-safe access
|
|
232
|
+
self._result: Any | None = None
|
|
233
|
+
self._started: bool = False
|
|
234
|
+
self._running: bool = False
|
|
235
|
+
self._error_code: int = 0
|
|
236
|
+
self._error_description: str | None = None
|
|
237
|
+
|
|
238
|
+
# Public properties for compatibility with subclasses
|
|
239
|
+
@property
|
|
240
|
+
def result(self) -> Any | None:
|
|
241
|
+
with self._lock:
|
|
242
|
+
return self._result
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def started(self) -> bool:
|
|
246
|
+
with self._lock:
|
|
247
|
+
return self._started
|
|
248
|
+
|
|
249
|
+
@property
|
|
250
|
+
def running(self) -> bool:
|
|
251
|
+
with self._lock:
|
|
252
|
+
return self._running
|
|
253
|
+
|
|
254
|
+
@running.setter
|
|
255
|
+
def running(self, value: bool):
|
|
256
|
+
"""Allow subclasses to set running status and auto-start if needed."""
|
|
257
|
+
with self._lock:
|
|
258
|
+
self._running = value
|
|
259
|
+
# If subclass sets running=True, auto-start the thread
|
|
260
|
+
if value and not self._started:
|
|
261
|
+
self._start_thread()
|
|
262
|
+
|
|
263
|
+
@property
|
|
264
|
+
def error_code(self) -> int:
|
|
265
|
+
with self._lock:
|
|
266
|
+
return self._error_code
|
|
267
|
+
|
|
268
|
+
@property
|
|
269
|
+
def error_description(self) -> str | None:
|
|
270
|
+
with self._lock:
|
|
271
|
+
return self._error_description
|
|
272
|
+
|
|
273
|
+
def _start_thread(self):
|
|
274
|
+
"""Internal method to start thread (must be called under lock)."""
|
|
275
|
+
if not self._started:
|
|
276
|
+
self._thread = threading.Thread(target=self._fetch_cached_df, daemon=True)
|
|
277
|
+
self._thread.start()
|
|
278
|
+
self._started = True
|
|
214
279
|
|
|
215
280
|
def _fetch_cached_df(self):
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
logger.info('Already running the fetching')
|
|
219
|
-
return
|
|
281
|
+
"""Background thread that polls for results."""
|
|
282
|
+
sleep_time = 0.5
|
|
220
283
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
while not self.
|
|
284
|
+
# Don't check _running here - subclasses already set it
|
|
285
|
+
try:
|
|
286
|
+
while not self._stop_event.is_set():
|
|
224
287
|
try:
|
|
225
|
-
r = requests.get(f
|
|
288
|
+
r = requests.get(f"{WORKER_URL}/status/{self.file_ref}", timeout=10)
|
|
289
|
+
|
|
226
290
|
if r.status_code == 200:
|
|
227
291
|
status = Status(**r.json())
|
|
228
|
-
|
|
292
|
+
|
|
293
|
+
if status.status == "Completed":
|
|
229
294
|
self._handle_completion(status)
|
|
230
295
|
return
|
|
231
|
-
elif status.status ==
|
|
296
|
+
elif status.status == "Error":
|
|
232
297
|
self._handle_error(1, status.error_message)
|
|
233
|
-
|
|
234
|
-
elif status.status ==
|
|
235
|
-
self._handle_error(
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
298
|
+
return
|
|
299
|
+
elif status.status == "Unknown Error":
|
|
300
|
+
self._handle_error(
|
|
301
|
+
-1,
|
|
302
|
+
"There was an unknown error with the process, "
|
|
303
|
+
"and the process got killed by the server",
|
|
304
|
+
)
|
|
305
|
+
return
|
|
239
306
|
else:
|
|
240
|
-
self._handle_error(2, r.text)
|
|
241
|
-
|
|
307
|
+
self._handle_error(2, f"HTTP {r.status_code}: {r.text}")
|
|
308
|
+
return
|
|
309
|
+
|
|
242
310
|
except requests.RequestException as e:
|
|
243
311
|
self._handle_error(2, f"Request failed: {e}")
|
|
244
|
-
|
|
312
|
+
return
|
|
245
313
|
|
|
246
|
-
|
|
314
|
+
# Sleep without holding the lock
|
|
315
|
+
if not self._stop_event.wait(timeout=sleep_time):
|
|
316
|
+
continue
|
|
317
|
+
else:
|
|
318
|
+
break
|
|
247
319
|
|
|
320
|
+
# Only reached if stop_event was set
|
|
248
321
|
self._handle_cancellation()
|
|
249
322
|
|
|
323
|
+
except Exception as e:
|
|
324
|
+
# Catch any unexpected errors
|
|
325
|
+
logger.exception("Unexpected error in fetch thread")
|
|
326
|
+
self._handle_error(-1, f"Unexpected error: {e}")
|
|
327
|
+
|
|
250
328
|
def _handle_completion(self, status):
|
|
251
|
-
|
|
252
|
-
self.
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
329
|
+
"""Handle successful completion. Must be called from fetch thread."""
|
|
330
|
+
with self._condition:
|
|
331
|
+
try:
|
|
332
|
+
if status.result_type == "polars":
|
|
333
|
+
self._result = get_df_result(status.results)
|
|
334
|
+
else:
|
|
335
|
+
self._result = status.results
|
|
336
|
+
except Exception as e:
|
|
337
|
+
logger.exception("Error processing result")
|
|
338
|
+
self._error_code = -1
|
|
339
|
+
self._error_description = f"Error processing result: {e}"
|
|
340
|
+
finally:
|
|
341
|
+
self._running = False
|
|
342
|
+
self._condition.notify_all()
|
|
343
|
+
|
|
344
|
+
def _handle_error(self, code: int, description: str):
|
|
345
|
+
"""Handle error state. Must be called from fetch thread."""
|
|
346
|
+
with self._condition:
|
|
347
|
+
self._error_code = code
|
|
348
|
+
self._error_description = description
|
|
349
|
+
self._running = False
|
|
350
|
+
self._condition.notify_all()
|
|
263
351
|
|
|
264
352
|
def _handle_cancellation(self):
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
353
|
+
"""Handle cancellation. Must be called from fetch thread."""
|
|
354
|
+
with self._condition:
|
|
355
|
+
if self._error_description is None:
|
|
356
|
+
self._error_description = "Task cancelled"
|
|
357
|
+
logger.warning(f"Fetch operation cancelled: {self._error_description}")
|
|
358
|
+
self._running = False
|
|
359
|
+
self._condition.notify_all()
|
|
270
360
|
|
|
271
361
|
def start(self):
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
self.
|
|
362
|
+
"""Start the background fetch thread."""
|
|
363
|
+
with self._lock:
|
|
364
|
+
if self._started:
|
|
365
|
+
logger.info("Fetcher already started")
|
|
366
|
+
return
|
|
367
|
+
if self._running:
|
|
368
|
+
logger.info("Already running the fetching")
|
|
369
|
+
return
|
|
370
|
+
|
|
371
|
+
self._running = True
|
|
372
|
+
self._start_thread()
|
|
278
373
|
|
|
279
374
|
def cancel(self):
|
|
280
375
|
"""
|
|
281
376
|
Cancels the current task both locally and on the worker service.
|
|
282
377
|
Also cleans up any resources being used.
|
|
283
378
|
"""
|
|
284
|
-
logger.warning(
|
|
379
|
+
logger.warning("Cancelling the operation")
|
|
380
|
+
|
|
381
|
+
# Cancel on the worker side
|
|
285
382
|
try:
|
|
286
383
|
cancel_task(self.file_ref)
|
|
287
384
|
except Exception as e:
|
|
288
|
-
logger.error(f
|
|
385
|
+
logger.error(f"Failed to cancel task on worker: {str(e)}")
|
|
289
386
|
|
|
290
|
-
#
|
|
291
|
-
self.
|
|
292
|
-
self.thread.join()
|
|
387
|
+
# Signal the thread to stop
|
|
388
|
+
self._stop_event.set()
|
|
293
389
|
|
|
294
|
-
#
|
|
295
|
-
|
|
296
|
-
self.
|
|
297
|
-
self.
|
|
298
|
-
|
|
390
|
+
# Wait for thread to finish
|
|
391
|
+
if self._thread and self._thread.is_alive():
|
|
392
|
+
self._thread.join(timeout=5.0)
|
|
393
|
+
if self._thread.is_alive():
|
|
394
|
+
logger.warning("Fetch thread did not stop within timeout")
|
|
299
395
|
|
|
300
|
-
def get_result(self) ->
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
with self.condition:
|
|
304
|
-
while self.running and self.result is None:
|
|
305
|
-
self.condition.wait() # Wait until notified
|
|
306
|
-
if self.error_description is not None:
|
|
307
|
-
raise Exception(self.error_description)
|
|
308
|
-
return self.result
|
|
396
|
+
def get_result(self) -> Any | None:
|
|
397
|
+
"""
|
|
398
|
+
Get the result, blocking until it's available.
|
|
309
399
|
|
|
400
|
+
Returns:
|
|
401
|
+
The fetched result.
|
|
402
|
+
|
|
403
|
+
Raises:
|
|
404
|
+
Exception: If an error occurred during fetching.
|
|
405
|
+
"""
|
|
406
|
+
# Start if not already started (for manual usage)
|
|
407
|
+
with self._lock:
|
|
408
|
+
if not self._started:
|
|
409
|
+
if not self._running:
|
|
410
|
+
self._running = True
|
|
411
|
+
self._start_thread()
|
|
412
|
+
|
|
413
|
+
# Wait for completion
|
|
414
|
+
with self._condition:
|
|
415
|
+
while self._running:
|
|
416
|
+
self._condition.wait()
|
|
417
|
+
|
|
418
|
+
# Check for errors
|
|
419
|
+
with self._lock:
|
|
420
|
+
if self._error_description is not None:
|
|
421
|
+
raise Exception(self._error_description)
|
|
422
|
+
return self._result
|
|
423
|
+
|
|
424
|
+
@property
|
|
425
|
+
def is_running(self) -> bool:
|
|
426
|
+
"""Check if the fetcher is currently running."""
|
|
427
|
+
with self._lock:
|
|
428
|
+
return self._running
|
|
429
|
+
|
|
430
|
+
@property
|
|
431
|
+
def has_error(self) -> bool:
|
|
432
|
+
"""Check if the fetcher encountered an error."""
|
|
433
|
+
with self._lock:
|
|
434
|
+
return self._error_description is not None
|
|
435
|
+
|
|
436
|
+
@property
|
|
437
|
+
def error_info(self) -> tuple[int, str | None]:
|
|
438
|
+
"""Get error code and description."""
|
|
439
|
+
with self._lock:
|
|
440
|
+
return self._error_code, self._error_description
|
|
310
441
|
|
|
311
|
-
class ExternalDfFetcher(BaseFetcher):
|
|
312
|
-
status: Optional[Status] = None
|
|
313
442
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
443
|
+
class ExternalDfFetcher(BaseFetcher):
|
|
444
|
+
status: Status | None = None
|
|
445
|
+
|
|
446
|
+
def __init__(
|
|
447
|
+
self,
|
|
448
|
+
flow_id: int,
|
|
449
|
+
node_id: int | str,
|
|
450
|
+
lf: pl.LazyFrame | pl.DataFrame,
|
|
451
|
+
file_ref: str = None,
|
|
452
|
+
wait_on_completion: bool = True,
|
|
453
|
+
operation_type: OperationType = "store",
|
|
454
|
+
offload_to_worker: bool = True,
|
|
455
|
+
):
|
|
317
456
|
super().__init__(file_ref=file_ref)
|
|
318
457
|
lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
|
|
319
|
-
r = trigger_df_operation(
|
|
320
|
-
|
|
321
|
-
|
|
458
|
+
r = trigger_df_operation(
|
|
459
|
+
lf=lf, file_ref=self.file_ref, operation_type=operation_type, node_id=node_id, flow_id=flow_id
|
|
460
|
+
)
|
|
461
|
+
self.running = r.status == "Processing"
|
|
322
462
|
if wait_on_completion:
|
|
323
463
|
_ = self.get_result()
|
|
324
464
|
self.status = get_status(self.file_ref)
|
|
325
465
|
|
|
326
466
|
|
|
327
467
|
class ExternalSampler(BaseFetcher):
|
|
328
|
-
status:
|
|
329
|
-
|
|
330
|
-
def __init__(
|
|
331
|
-
|
|
468
|
+
status: Status | None = None
|
|
469
|
+
|
|
470
|
+
def __init__(
|
|
471
|
+
self,
|
|
472
|
+
lf: pl.LazyFrame | pl.DataFrame,
|
|
473
|
+
node_id: str | int,
|
|
474
|
+
flow_id: int,
|
|
475
|
+
file_ref: str = None,
|
|
476
|
+
wait_on_completion: bool = True,
|
|
477
|
+
sample_size: int = 100,
|
|
478
|
+
):
|
|
332
479
|
super().__init__(file_ref=file_ref)
|
|
333
480
|
lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
|
|
334
|
-
r = trigger_sample_operation(
|
|
335
|
-
|
|
481
|
+
r = trigger_sample_operation(
|
|
482
|
+
lf=lf, file_ref=file_ref, sample_size=sample_size, node_id=node_id, flow_id=flow_id
|
|
483
|
+
)
|
|
484
|
+
self.running = r.status == "Processing"
|
|
336
485
|
if wait_on_completion:
|
|
337
486
|
_ = self.get_result()
|
|
338
487
|
self.status = get_status(self.file_ref)
|
|
339
488
|
|
|
340
489
|
|
|
341
490
|
class ExternalFuzzyMatchFetcher(BaseFetcher):
|
|
342
|
-
def __init__(
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
491
|
+
def __init__(
|
|
492
|
+
self,
|
|
493
|
+
left_df: pl.LazyFrame,
|
|
494
|
+
right_df: pl.LazyFrame,
|
|
495
|
+
fuzzy_maps: list[Any],
|
|
496
|
+
flow_id: int,
|
|
497
|
+
node_id: int | str,
|
|
498
|
+
file_ref: str = None,
|
|
499
|
+
wait_on_completion: bool = True,
|
|
500
|
+
):
|
|
346
501
|
super().__init__(file_ref=file_ref)
|
|
347
502
|
|
|
348
|
-
r = trigger_fuzzy_match_operation(
|
|
349
|
-
|
|
503
|
+
r = trigger_fuzzy_match_operation(
|
|
504
|
+
left_df=left_df,
|
|
505
|
+
right_df=right_df,
|
|
506
|
+
fuzzy_maps=fuzzy_maps,
|
|
507
|
+
file_ref=file_ref,
|
|
508
|
+
flow_id=flow_id,
|
|
509
|
+
node_id=node_id,
|
|
510
|
+
)
|
|
350
511
|
self.file_ref = r.background_task_id
|
|
351
|
-
self.running = r.status ==
|
|
512
|
+
self.running = r.status == "Processing"
|
|
352
513
|
if wait_on_completion:
|
|
353
514
|
_ = self.get_result()
|
|
354
515
|
|
|
355
516
|
|
|
356
517
|
class ExternalCreateFetcher(BaseFetcher):
|
|
357
|
-
def __init__(
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
518
|
+
def __init__(
|
|
519
|
+
self,
|
|
520
|
+
received_table: ReceivedTable,
|
|
521
|
+
node_id: int,
|
|
522
|
+
flow_id: int,
|
|
523
|
+
file_type: str = "csv",
|
|
524
|
+
wait_on_completion: bool = True,
|
|
525
|
+
):
|
|
526
|
+
r = trigger_create_operation(
|
|
527
|
+
received_table=received_table, file_type=file_type, node_id=node_id, flow_id=flow_id
|
|
528
|
+
)
|
|
361
529
|
super().__init__(file_ref=r.background_task_id)
|
|
362
|
-
self.running = r.status ==
|
|
530
|
+
self.running = r.status == "Processing"
|
|
363
531
|
if wait_on_completion:
|
|
364
532
|
_ = self.get_result()
|
|
365
533
|
|
|
366
534
|
|
|
367
535
|
class ExternalDatabaseFetcher(BaseFetcher):
|
|
368
|
-
def __init__(self, database_external_read_settings: DatabaseExternalReadSettings,
|
|
369
|
-
wait_on_completion: bool = True):
|
|
536
|
+
def __init__(self, database_external_read_settings: DatabaseExternalReadSettings, wait_on_completion: bool = True):
|
|
370
537
|
r = trigger_database_read_collector(database_external_read_settings=database_external_read_settings)
|
|
371
538
|
super().__init__(file_ref=r.background_task_id)
|
|
372
|
-
self.running = r.status ==
|
|
539
|
+
self.running = r.status == "Processing"
|
|
373
540
|
if wait_on_completion:
|
|
374
541
|
_ = self.get_result()
|
|
375
542
|
|
|
376
543
|
|
|
377
544
|
class ExternalDatabaseWriter(BaseFetcher):
|
|
378
|
-
def __init__(
|
|
379
|
-
|
|
545
|
+
def __init__(
|
|
546
|
+
self, database_external_write_settings: DatabaseExternalWriteSettings, wait_on_completion: bool = True
|
|
547
|
+
):
|
|
380
548
|
r = trigger_database_write(database_external_write_settings=database_external_write_settings)
|
|
381
549
|
super().__init__(file_ref=r.background_task_id)
|
|
382
|
-
self.running = r.status ==
|
|
550
|
+
self.running = r.status == "Processing"
|
|
383
551
|
if wait_on_completion:
|
|
384
552
|
_ = self.get_result()
|
|
385
553
|
|
|
386
554
|
|
|
387
555
|
class ExternalCloudWriter(BaseFetcher):
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
556
|
+
def __init__(
|
|
557
|
+
self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface, wait_on_completion: bool = True
|
|
558
|
+
):
|
|
391
559
|
r = trigger_cloud_storage_write(database_external_write_settings=cloud_storage_write_settings)
|
|
392
560
|
super().__init__(file_ref=r.background_task_id)
|
|
393
|
-
self.running = r.status ==
|
|
561
|
+
self.running = r.status == "Processing"
|
|
394
562
|
if wait_on_completion:
|
|
395
563
|
_ = self.get_result()
|
|
396
564
|
|
|
397
565
|
|
|
398
566
|
class ExternalExecutorTracker:
|
|
399
|
-
result:
|
|
567
|
+
result: pl.LazyFrame | None
|
|
400
568
|
started: bool = False
|
|
401
569
|
running: bool = False
|
|
402
570
|
error_code: int = 0
|
|
403
|
-
error_description:
|
|
571
|
+
error_description: str | None = None
|
|
404
572
|
file_ref: str = None
|
|
405
573
|
|
|
406
574
|
def __init__(self, initial_response: Status, wait_on_completion: bool = True):
|
|
@@ -409,7 +577,7 @@ class ExternalExecutorTracker:
|
|
|
409
577
|
self.thread = threading.Thread(target=self._fetch_cached_df)
|
|
410
578
|
self.result = None
|
|
411
579
|
self.error_description = None
|
|
412
|
-
self.running = initial_response.status ==
|
|
580
|
+
self.running = initial_response.status == "Processing"
|
|
413
581
|
self.condition = threading.Condition()
|
|
414
582
|
if wait_on_completion:
|
|
415
583
|
_ = self.get_result()
|
|
@@ -417,30 +585,32 @@ class ExternalExecutorTracker:
|
|
|
417
585
|
def _fetch_cached_df(self):
|
|
418
586
|
with self.condition:
|
|
419
587
|
if self.running:
|
|
420
|
-
logger.info(
|
|
588
|
+
logger.info("Already running the fetching")
|
|
421
589
|
return
|
|
422
590
|
sleep_time = 1
|
|
423
591
|
self.running = True
|
|
424
592
|
while not self.stop_event.is_set():
|
|
425
593
|
try:
|
|
426
|
-
r = requests.get(f
|
|
594
|
+
r = requests.get(f"{WORKER_URL}/status/{self.file_ref}")
|
|
427
595
|
if r.status_code == 200:
|
|
428
596
|
status = Status(**r.json())
|
|
429
|
-
if status.status ==
|
|
597
|
+
if status.status == "Completed":
|
|
430
598
|
self.running = False
|
|
431
599
|
self.condition.notify_all() # Notify all waiting threads
|
|
432
|
-
if status.result_type ==
|
|
600
|
+
if status.result_type == "polars":
|
|
433
601
|
self.result = get_df_result(status.results)
|
|
434
602
|
else:
|
|
435
603
|
self.result = status.results
|
|
436
604
|
return
|
|
437
|
-
elif status.status ==
|
|
605
|
+
elif status.status == "Error":
|
|
438
606
|
self.error_code = 1
|
|
439
607
|
self.error_description = status.error_message
|
|
440
608
|
break
|
|
441
|
-
elif status.status ==
|
|
609
|
+
elif status.status == "Unknown Error":
|
|
442
610
|
self.error_code = -1
|
|
443
|
-
self.error_description =
|
|
611
|
+
self.error_description = (
|
|
612
|
+
"There was an unknown error with the process, and the process got killed by the server"
|
|
613
|
+
)
|
|
444
614
|
break
|
|
445
615
|
else:
|
|
446
616
|
self.error_description = r.text
|
|
@@ -464,12 +634,12 @@ class ExternalExecutorTracker:
|
|
|
464
634
|
def start(self):
|
|
465
635
|
self.started = True
|
|
466
636
|
if self.running:
|
|
467
|
-
logger.info(
|
|
637
|
+
logger.info("Already running the fetching")
|
|
468
638
|
return
|
|
469
639
|
self.thread.start()
|
|
470
640
|
|
|
471
641
|
def cancel(self):
|
|
472
|
-
logger.warning(
|
|
642
|
+
logger.warning("Cancelling the operation")
|
|
473
643
|
self.thread.join()
|
|
474
644
|
|
|
475
645
|
self.running = False
|
|
@@ -485,7 +655,7 @@ class ExternalExecutorTracker:
|
|
|
485
655
|
return self.result
|
|
486
656
|
|
|
487
657
|
|
|
488
|
-
def fetch_unique_values(lf: pl.LazyFrame) ->
|
|
658
|
+
def fetch_unique_values(lf: pl.LazyFrame) -> list[str]:
|
|
489
659
|
"""
|
|
490
660
|
Fetches unique values from a specified column in a LazyFrame, attempting first via an external fetcher
|
|
491
661
|
and falling back to direct LazyFrame computation if that fails.
|
|
@@ -510,8 +680,7 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
|
|
|
510
680
|
# Try external source first if lf is provided
|
|
511
681
|
try:
|
|
512
682
|
external_df_fetcher = ExternalDfFetcher(lf=lf, flow_id=1, node_id=-1)
|
|
513
|
-
if external_df_fetcher.status.status ==
|
|
514
|
-
|
|
683
|
+
if external_df_fetcher.status.status == "Completed":
|
|
515
684
|
unique_values = read(external_df_fetcher.status.file_ref).column(0).to_pylist()
|
|
516
685
|
if logger:
|
|
517
686
|
logger.info(f"Got {len(unique_values)} unique values from external source")
|
|
@@ -520,10 +689,10 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
|
|
|
520
689
|
if logger:
|
|
521
690
|
logger.debug(f"Failed reading external file: {str(e)}")
|
|
522
691
|
|
|
523
|
-
unique_values =
|
|
692
|
+
unique_values = lf.unique().collect(engine="streaming")[:, 0].to_list()
|
|
524
693
|
|
|
525
694
|
if not unique_values:
|
|
526
|
-
raise ValueError(
|
|
695
|
+
raise ValueError("No unique values found in lazyframe")
|
|
527
696
|
|
|
528
697
|
return unique_values
|
|
529
698
|
|