Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +194 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
- flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
- flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
- flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
- flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
- flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
- flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
- flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
- flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
- flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
- flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
- flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
- flowfile/web/static/assets/Output-692dd25d.css +37 -0
- flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
- flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
- flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
- flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
- flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
- flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
- flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/Unique-2b705521.css +3 -0
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
- flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
- flowfile/web/static/assets/index-6b367bb5.js +38 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
- flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
- flowfile-0.5.4.dist-info/RECORD +407 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +64 -19
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +145 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/__init__.py +11 -0
- flowfile_core/flowfile/code_generator/code_generator.py +706 -247
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +920 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +379 -258
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +19 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +278 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +46 -4
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +96 -66
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +123 -18
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +117 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/database/__init__.py +36 -0
- flowfile_frame/database/connection_manager.py +205 -0
- flowfile_frame/database/frame_helpers.py +249 -0
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +41 -33
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +114 -21
- flowfile_worker/spawner.py +89 -54
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
- flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/Output-283fe388.css +0 -37
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/Sort-3643d625.css +0 -51
- flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
# Standard library imports
|
|
2
|
-
from base64 import decodebytes, encodebytes
|
|
3
2
|
import io
|
|
4
3
|
import threading
|
|
4
|
+
from base64 import decodebytes, encodebytes
|
|
5
5
|
from time import sleep
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Literal
|
|
7
7
|
from uuid import uuid4
|
|
8
8
|
|
|
9
9
|
import polars as pl
|
|
10
10
|
import requests
|
|
11
|
-
|
|
12
11
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
13
12
|
|
|
14
13
|
from flowfile_core.configs import logger
|
|
@@ -17,104 +16,131 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.models import
|
|
|
17
16
|
FuzzyJoinInput,
|
|
18
17
|
OperationType,
|
|
19
18
|
PolarsOperation,
|
|
20
|
-
Status
|
|
19
|
+
Status,
|
|
21
20
|
)
|
|
22
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source.models import (
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
from flowfile_core.schemas.input_schema import (
|
|
26
|
-
ReceivedTable
|
|
21
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source.models import (
|
|
22
|
+
DatabaseExternalReadSettings,
|
|
23
|
+
DatabaseExternalWriteSettings,
|
|
27
24
|
)
|
|
25
|
+
from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
|
|
26
|
+
from flowfile_core.schemas.input_schema import ReceivedTable
|
|
28
27
|
from flowfile_core.utils.arrow_reader import read
|
|
29
28
|
|
|
30
29
|
|
|
31
|
-
def trigger_df_operation(
|
|
30
|
+
def trigger_df_operation(
|
|
31
|
+
flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = "store"
|
|
32
|
+
) -> Status:
|
|
32
33
|
encoded_operation = encodebytes(lf.serialize()).decode()
|
|
33
|
-
_json = {
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
_json = {
|
|
35
|
+
"task_id": file_ref,
|
|
36
|
+
"operation": encoded_operation,
|
|
37
|
+
"operation_type": operation_type,
|
|
38
|
+
"flowfile_flow_id": flow_id,
|
|
39
|
+
"flowfile_node_id": node_id,
|
|
40
|
+
}
|
|
41
|
+
v = requests.post(url=f"{WORKER_URL}/submit_query/", json=_json)
|
|
36
42
|
if not v.ok:
|
|
37
|
-
raise Exception(f
|
|
43
|
+
raise Exception(f"trigger_df_operation: Could not cache the data, {v.text}")
|
|
38
44
|
return Status(**v.json())
|
|
39
45
|
|
|
40
46
|
|
|
41
|
-
def trigger_sample_operation(
|
|
47
|
+
def trigger_sample_operation(
|
|
48
|
+
lf: pl.LazyFrame, file_ref: str, flow_id: int, node_id: str | int, sample_size: int = 100
|
|
49
|
+
) -> Status:
|
|
42
50
|
encoded_operation = encodebytes(lf.serialize()).decode()
|
|
43
|
-
_json = {
|
|
44
|
-
|
|
45
|
-
|
|
51
|
+
_json = {
|
|
52
|
+
"task_id": file_ref,
|
|
53
|
+
"operation": encoded_operation,
|
|
54
|
+
"operation_type": "store_sample",
|
|
55
|
+
"sample_size": sample_size,
|
|
56
|
+
"flowfile_flow_id": flow_id,
|
|
57
|
+
"flowfile_node_id": node_id,
|
|
58
|
+
}
|
|
59
|
+
v = requests.post(url=f"{WORKER_URL}/store_sample/", json=_json)
|
|
46
60
|
if not v.ok:
|
|
47
|
-
raise Exception(f
|
|
61
|
+
raise Exception(f"trigger_sample_operation: Could not cache the data, {v.text}")
|
|
48
62
|
return Status(**v.json())
|
|
49
63
|
|
|
50
64
|
|
|
51
|
-
def trigger_fuzzy_match_operation(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
65
|
+
def trigger_fuzzy_match_operation(
|
|
66
|
+
left_df: pl.LazyFrame,
|
|
67
|
+
right_df: pl.LazyFrame,
|
|
68
|
+
fuzzy_maps: list[FuzzyMapping],
|
|
69
|
+
file_ref: str,
|
|
70
|
+
flow_id: int,
|
|
71
|
+
node_id: int | str,
|
|
72
|
+
) -> Status:
|
|
56
73
|
left_serializable_object = PolarsOperation(operation=encodebytes(left_df.serialize()))
|
|
57
74
|
right_serializable_object = PolarsOperation(operation=encodebytes(right_df.serialize()))
|
|
58
|
-
fuzzy_join_input = FuzzyJoinInput(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
75
|
+
fuzzy_join_input = FuzzyJoinInput(
|
|
76
|
+
left_df_operation=left_serializable_object,
|
|
77
|
+
right_df_operation=right_serializable_object,
|
|
78
|
+
fuzzy_maps=fuzzy_maps,
|
|
79
|
+
task_id=file_ref,
|
|
80
|
+
flowfile_flow_id=flow_id,
|
|
81
|
+
flowfile_node_id=node_id,
|
|
82
|
+
)
|
|
65
83
|
print("fuzzy join input", fuzzy_join_input)
|
|
66
|
-
v = requests.post(f
|
|
84
|
+
v = requests.post(f"{WORKER_URL}/add_fuzzy_join", data=fuzzy_join_input.model_dump_json())
|
|
67
85
|
if not v.ok:
|
|
68
|
-
raise Exception(f
|
|
86
|
+
raise Exception(f"trigger_fuzzy_match_operation: Could not cache the data, {v.text}")
|
|
69
87
|
return Status(**v.json())
|
|
70
88
|
|
|
71
89
|
|
|
72
|
-
def trigger_create_operation(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
90
|
+
def trigger_create_operation(
|
|
91
|
+
flow_id: int,
|
|
92
|
+
node_id: int | str,
|
|
93
|
+
received_table: ReceivedTable,
|
|
94
|
+
file_type: str = Literal["csv", "parquet", "json", "excel"],
|
|
95
|
+
):
|
|
96
|
+
f = requests.post(
|
|
97
|
+
url=f"{WORKER_URL}/create_table/{file_type}",
|
|
98
|
+
data=received_table.model_dump_json(),
|
|
99
|
+
params={"flowfile_flow_id": flow_id, "flowfile_node_id": node_id},
|
|
100
|
+
)
|
|
76
101
|
if not f.ok:
|
|
77
|
-
raise Exception(f
|
|
102
|
+
raise Exception(f"trigger_create_operation: Could not cache the data, {f.text}")
|
|
78
103
|
return Status(**f.json())
|
|
79
104
|
|
|
80
105
|
|
|
81
106
|
def trigger_database_read_collector(database_external_read_settings: DatabaseExternalReadSettings):
|
|
82
|
-
f = requests.post(
|
|
83
|
-
|
|
107
|
+
f = requests.post(
|
|
108
|
+
url=f"{WORKER_URL}/store_database_read_result", data=database_external_read_settings.model_dump_json()
|
|
109
|
+
)
|
|
84
110
|
if not f.ok:
|
|
85
|
-
raise Exception(f
|
|
111
|
+
raise Exception(f"trigger_database_read_collector: Could not cache the data, {f.text}")
|
|
86
112
|
return Status(**f.json())
|
|
87
113
|
|
|
88
114
|
|
|
89
115
|
def trigger_database_write(database_external_write_settings: DatabaseExternalWriteSettings):
|
|
90
|
-
f = requests.post(
|
|
91
|
-
|
|
116
|
+
f = requests.post(
|
|
117
|
+
url=f"{WORKER_URL}/store_database_write_result", data=database_external_write_settings.model_dump_json()
|
|
118
|
+
)
|
|
92
119
|
if not f.ok:
|
|
93
|
-
raise Exception(f
|
|
120
|
+
raise Exception(f"trigger_database_write: Could not cache the data, {f.text}")
|
|
94
121
|
return Status(**f.json())
|
|
95
122
|
|
|
96
123
|
|
|
97
124
|
def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWriteSettingsWorkerInterface):
|
|
98
|
-
f = requests.post(url=f
|
|
99
|
-
data=database_external_write_settings.model_dump_json())
|
|
125
|
+
f = requests.post(url=f"{WORKER_URL}/write_data_to_cloud", data=database_external_write_settings.model_dump_json())
|
|
100
126
|
if not f.ok:
|
|
101
|
-
raise Exception(f
|
|
127
|
+
raise Exception(f"trigger_cloud_storage_write: Could not cache the data, {f.text}")
|
|
102
128
|
return Status(**f.json())
|
|
103
129
|
|
|
104
130
|
|
|
105
131
|
def get_results(file_ref: str) -> Status | None:
|
|
106
|
-
f = requests.get(f
|
|
132
|
+
f = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
107
133
|
if f.status_code == 200:
|
|
108
134
|
return Status(**f.json())
|
|
109
135
|
else:
|
|
110
|
-
raise Exception(f
|
|
136
|
+
raise Exception(f"get_results: Could not fetch the data, {f.text}")
|
|
111
137
|
|
|
112
138
|
|
|
113
139
|
def results_exists(file_ref: str):
|
|
114
140
|
try:
|
|
115
|
-
f = requests.get(f
|
|
141
|
+
f = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
116
142
|
if f.status_code == 200:
|
|
117
|
-
if f.json()[
|
|
143
|
+
if f.json()["status"] == "Completed":
|
|
118
144
|
return True
|
|
119
145
|
return False
|
|
120
146
|
except requests.RequestException as e:
|
|
@@ -134,7 +160,7 @@ def clear_task_from_worker(file_ref: str) -> bool:
|
|
|
134
160
|
bool: True if the task was successfully cleared, False otherwise.
|
|
135
161
|
"""
|
|
136
162
|
try:
|
|
137
|
-
f = requests.delete(f
|
|
163
|
+
f = requests.delete(f"{WORKER_URL}/clear_task/{file_ref}")
|
|
138
164
|
if f.status_code == 200:
|
|
139
165
|
return True
|
|
140
166
|
return False
|
|
@@ -150,16 +176,16 @@ def get_df_result(encoded_df: str) -> pl.LazyFrame:
|
|
|
150
176
|
|
|
151
177
|
def get_external_df_result(file_ref: str) -> pl.LazyFrame | None:
|
|
152
178
|
status = get_results(file_ref)
|
|
153
|
-
if status.status !=
|
|
179
|
+
if status.status != "Completed":
|
|
154
180
|
raise Exception(f"Status is not completed, {status.status}")
|
|
155
|
-
if status.result_type ==
|
|
181
|
+
if status.result_type == "polars":
|
|
156
182
|
return get_df_result(status.results)
|
|
157
183
|
else:
|
|
158
184
|
raise Exception(f"Result type is not polars, {status.result_type}")
|
|
159
185
|
|
|
160
186
|
|
|
161
187
|
def get_status(file_ref: str) -> Status:
|
|
162
|
-
status_response = requests.get(f
|
|
188
|
+
status_response = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
163
189
|
if status_response.status_code == 200:
|
|
164
190
|
return Status(**status_response.json())
|
|
165
191
|
else:
|
|
@@ -180,12 +206,12 @@ def cancel_task(file_ref: str) -> bool:
|
|
|
180
206
|
Exception: If there's an error communicating with the worker service
|
|
181
207
|
"""
|
|
182
208
|
try:
|
|
183
|
-
response = requests.post(f
|
|
209
|
+
response = requests.post(f"{WORKER_URL}/cancel_task/{file_ref}")
|
|
184
210
|
if response.ok:
|
|
185
211
|
return True
|
|
186
212
|
return False
|
|
187
213
|
except requests.RequestException as e:
|
|
188
|
-
raise Exception(f
|
|
214
|
+
raise Exception(f"Failed to cancel task: {str(e)}")
|
|
189
215
|
|
|
190
216
|
|
|
191
217
|
class BaseFetcher:
|
|
@@ -203,15 +229,15 @@ class BaseFetcher:
|
|
|
203
229
|
self._thread = None
|
|
204
230
|
|
|
205
231
|
# State variables - use properties for thread-safe access
|
|
206
|
-
self._result:
|
|
232
|
+
self._result: Any | None = None
|
|
207
233
|
self._started: bool = False
|
|
208
234
|
self._running: bool = False
|
|
209
235
|
self._error_code: int = 0
|
|
210
|
-
self._error_description:
|
|
236
|
+
self._error_description: str | None = None
|
|
211
237
|
|
|
212
238
|
# Public properties for compatibility with subclasses
|
|
213
239
|
@property
|
|
214
|
-
def result(self) ->
|
|
240
|
+
def result(self) -> Any | None:
|
|
215
241
|
with self._lock:
|
|
216
242
|
return self._result
|
|
217
243
|
|
|
@@ -240,7 +266,7 @@ class BaseFetcher:
|
|
|
240
266
|
return self._error_code
|
|
241
267
|
|
|
242
268
|
@property
|
|
243
|
-
def error_description(self) ->
|
|
269
|
+
def error_description(self) -> str | None:
|
|
244
270
|
with self._lock:
|
|
245
271
|
return self._error_description
|
|
246
272
|
|
|
@@ -259,22 +285,22 @@ class BaseFetcher:
|
|
|
259
285
|
try:
|
|
260
286
|
while not self._stop_event.is_set():
|
|
261
287
|
try:
|
|
262
|
-
r = requests.get(f
|
|
288
|
+
r = requests.get(f"{WORKER_URL}/status/{self.file_ref}", timeout=10)
|
|
263
289
|
|
|
264
290
|
if r.status_code == 200:
|
|
265
291
|
status = Status(**r.json())
|
|
266
292
|
|
|
267
|
-
if status.status ==
|
|
293
|
+
if status.status == "Completed":
|
|
268
294
|
self._handle_completion(status)
|
|
269
295
|
return
|
|
270
|
-
elif status.status ==
|
|
296
|
+
elif status.status == "Error":
|
|
271
297
|
self._handle_error(1, status.error_message)
|
|
272
298
|
return
|
|
273
|
-
elif status.status ==
|
|
299
|
+
elif status.status == "Unknown Error":
|
|
274
300
|
self._handle_error(
|
|
275
301
|
-1,
|
|
276
|
-
|
|
277
|
-
|
|
302
|
+
"There was an unknown error with the process, "
|
|
303
|
+
"and the process got killed by the server",
|
|
278
304
|
)
|
|
279
305
|
return
|
|
280
306
|
else:
|
|
@@ -303,7 +329,7 @@ class BaseFetcher:
|
|
|
303
329
|
"""Handle successful completion. Must be called from fetch thread."""
|
|
304
330
|
with self._condition:
|
|
305
331
|
try:
|
|
306
|
-
if status.result_type ==
|
|
332
|
+
if status.result_type == "polars":
|
|
307
333
|
self._result = get_df_result(status.results)
|
|
308
334
|
else:
|
|
309
335
|
self._result = status.results
|
|
@@ -336,10 +362,10 @@ class BaseFetcher:
|
|
|
336
362
|
"""Start the background fetch thread."""
|
|
337
363
|
with self._lock:
|
|
338
364
|
if self._started:
|
|
339
|
-
logger.info(
|
|
365
|
+
logger.info("Fetcher already started")
|
|
340
366
|
return
|
|
341
367
|
if self._running:
|
|
342
|
-
logger.info(
|
|
368
|
+
logger.info("Already running the fetching")
|
|
343
369
|
return
|
|
344
370
|
|
|
345
371
|
self._running = True
|
|
@@ -350,13 +376,13 @@ class BaseFetcher:
|
|
|
350
376
|
Cancels the current task both locally and on the worker service.
|
|
351
377
|
Also cleans up any resources being used.
|
|
352
378
|
"""
|
|
353
|
-
logger.warning(
|
|
379
|
+
logger.warning("Cancelling the operation")
|
|
354
380
|
|
|
355
381
|
# Cancel on the worker side
|
|
356
382
|
try:
|
|
357
383
|
cancel_task(self.file_ref)
|
|
358
384
|
except Exception as e:
|
|
359
|
-
logger.error(f
|
|
385
|
+
logger.error(f"Failed to cancel task on worker: {str(e)}")
|
|
360
386
|
|
|
361
387
|
# Signal the thread to stop
|
|
362
388
|
self._stop_event.set()
|
|
@@ -367,7 +393,7 @@ class BaseFetcher:
|
|
|
367
393
|
if self._thread.is_alive():
|
|
368
394
|
logger.warning("Fetch thread did not stop within timeout")
|
|
369
395
|
|
|
370
|
-
def get_result(self) ->
|
|
396
|
+
def get_result(self) -> Any | None:
|
|
371
397
|
"""
|
|
372
398
|
Get the result, blocking until it's available.
|
|
373
399
|
|
|
@@ -408,105 +434,141 @@ class BaseFetcher:
|
|
|
408
434
|
return self._error_description is not None
|
|
409
435
|
|
|
410
436
|
@property
|
|
411
|
-
def error_info(self) -> tuple[int,
|
|
437
|
+
def error_info(self) -> tuple[int, str | None]:
|
|
412
438
|
"""Get error code and description."""
|
|
413
439
|
with self._lock:
|
|
414
440
|
return self._error_code, self._error_description
|
|
415
441
|
|
|
416
442
|
|
|
417
443
|
class ExternalDfFetcher(BaseFetcher):
|
|
418
|
-
status:
|
|
419
|
-
|
|
420
|
-
def __init__(
|
|
421
|
-
|
|
422
|
-
|
|
444
|
+
status: Status | None = None
|
|
445
|
+
|
|
446
|
+
def __init__(
|
|
447
|
+
self,
|
|
448
|
+
flow_id: int,
|
|
449
|
+
node_id: int | str,
|
|
450
|
+
lf: pl.LazyFrame | pl.DataFrame,
|
|
451
|
+
file_ref: str = None,
|
|
452
|
+
wait_on_completion: bool = True,
|
|
453
|
+
operation_type: OperationType = "store",
|
|
454
|
+
offload_to_worker: bool = True,
|
|
455
|
+
):
|
|
423
456
|
super().__init__(file_ref=file_ref)
|
|
424
457
|
lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
|
|
425
|
-
r = trigger_df_operation(
|
|
426
|
-
|
|
427
|
-
|
|
458
|
+
r = trigger_df_operation(
|
|
459
|
+
lf=lf, file_ref=self.file_ref, operation_type=operation_type, node_id=node_id, flow_id=flow_id
|
|
460
|
+
)
|
|
461
|
+
self.running = r.status == "Processing"
|
|
428
462
|
if wait_on_completion:
|
|
429
463
|
_ = self.get_result()
|
|
430
464
|
self.status = get_status(self.file_ref)
|
|
431
465
|
|
|
432
466
|
|
|
433
467
|
class ExternalSampler(BaseFetcher):
|
|
434
|
-
status:
|
|
435
|
-
|
|
436
|
-
def __init__(
|
|
437
|
-
|
|
468
|
+
status: Status | None = None
|
|
469
|
+
|
|
470
|
+
def __init__(
|
|
471
|
+
self,
|
|
472
|
+
lf: pl.LazyFrame | pl.DataFrame,
|
|
473
|
+
node_id: str | int,
|
|
474
|
+
flow_id: int,
|
|
475
|
+
file_ref: str = None,
|
|
476
|
+
wait_on_completion: bool = True,
|
|
477
|
+
sample_size: int = 100,
|
|
478
|
+
):
|
|
438
479
|
super().__init__(file_ref=file_ref)
|
|
439
480
|
lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
|
|
440
|
-
r = trigger_sample_operation(
|
|
441
|
-
|
|
481
|
+
r = trigger_sample_operation(
|
|
482
|
+
lf=lf, file_ref=file_ref, sample_size=sample_size, node_id=node_id, flow_id=flow_id
|
|
483
|
+
)
|
|
484
|
+
self.running = r.status == "Processing"
|
|
442
485
|
if wait_on_completion:
|
|
443
486
|
_ = self.get_result()
|
|
444
487
|
self.status = get_status(self.file_ref)
|
|
445
488
|
|
|
446
489
|
|
|
447
490
|
class ExternalFuzzyMatchFetcher(BaseFetcher):
|
|
448
|
-
def __init__(
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
491
|
+
def __init__(
|
|
492
|
+
self,
|
|
493
|
+
left_df: pl.LazyFrame,
|
|
494
|
+
right_df: pl.LazyFrame,
|
|
495
|
+
fuzzy_maps: list[Any],
|
|
496
|
+
flow_id: int,
|
|
497
|
+
node_id: int | str,
|
|
498
|
+
file_ref: str = None,
|
|
499
|
+
wait_on_completion: bool = True,
|
|
500
|
+
):
|
|
452
501
|
super().__init__(file_ref=file_ref)
|
|
453
502
|
|
|
454
|
-
r = trigger_fuzzy_match_operation(
|
|
455
|
-
|
|
503
|
+
r = trigger_fuzzy_match_operation(
|
|
504
|
+
left_df=left_df,
|
|
505
|
+
right_df=right_df,
|
|
506
|
+
fuzzy_maps=fuzzy_maps,
|
|
507
|
+
file_ref=file_ref,
|
|
508
|
+
flow_id=flow_id,
|
|
509
|
+
node_id=node_id,
|
|
510
|
+
)
|
|
456
511
|
self.file_ref = r.background_task_id
|
|
457
|
-
self.running = r.status ==
|
|
512
|
+
self.running = r.status == "Processing"
|
|
458
513
|
if wait_on_completion:
|
|
459
514
|
_ = self.get_result()
|
|
460
515
|
|
|
461
516
|
|
|
462
517
|
class ExternalCreateFetcher(BaseFetcher):
|
|
463
|
-
def __init__(
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
518
|
+
def __init__(
|
|
519
|
+
self,
|
|
520
|
+
received_table: ReceivedTable,
|
|
521
|
+
node_id: int,
|
|
522
|
+
flow_id: int,
|
|
523
|
+
file_type: str = "csv",
|
|
524
|
+
wait_on_completion: bool = True,
|
|
525
|
+
):
|
|
526
|
+
r = trigger_create_operation(
|
|
527
|
+
received_table=received_table, file_type=file_type, node_id=node_id, flow_id=flow_id
|
|
528
|
+
)
|
|
467
529
|
super().__init__(file_ref=r.background_task_id)
|
|
468
|
-
self.running = r.status ==
|
|
530
|
+
self.running = r.status == "Processing"
|
|
469
531
|
if wait_on_completion:
|
|
470
532
|
_ = self.get_result()
|
|
471
533
|
|
|
472
534
|
|
|
473
535
|
class ExternalDatabaseFetcher(BaseFetcher):
|
|
474
|
-
def __init__(self, database_external_read_settings: DatabaseExternalReadSettings,
|
|
475
|
-
wait_on_completion: bool = True):
|
|
536
|
+
def __init__(self, database_external_read_settings: DatabaseExternalReadSettings, wait_on_completion: bool = True):
|
|
476
537
|
r = trigger_database_read_collector(database_external_read_settings=database_external_read_settings)
|
|
477
538
|
super().__init__(file_ref=r.background_task_id)
|
|
478
|
-
self.running = r.status ==
|
|
539
|
+
self.running = r.status == "Processing"
|
|
479
540
|
if wait_on_completion:
|
|
480
541
|
_ = self.get_result()
|
|
481
542
|
|
|
482
543
|
|
|
483
544
|
class ExternalDatabaseWriter(BaseFetcher):
|
|
484
|
-
def __init__(
|
|
485
|
-
|
|
545
|
+
def __init__(
|
|
546
|
+
self, database_external_write_settings: DatabaseExternalWriteSettings, wait_on_completion: bool = True
|
|
547
|
+
):
|
|
486
548
|
r = trigger_database_write(database_external_write_settings=database_external_write_settings)
|
|
487
549
|
super().__init__(file_ref=r.background_task_id)
|
|
488
|
-
self.running = r.status ==
|
|
550
|
+
self.running = r.status == "Processing"
|
|
489
551
|
if wait_on_completion:
|
|
490
552
|
_ = self.get_result()
|
|
491
553
|
|
|
492
554
|
|
|
493
555
|
class ExternalCloudWriter(BaseFetcher):
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
556
|
+
def __init__(
|
|
557
|
+
self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface, wait_on_completion: bool = True
|
|
558
|
+
):
|
|
497
559
|
r = trigger_cloud_storage_write(database_external_write_settings=cloud_storage_write_settings)
|
|
498
560
|
super().__init__(file_ref=r.background_task_id)
|
|
499
|
-
self.running = r.status ==
|
|
561
|
+
self.running = r.status == "Processing"
|
|
500
562
|
if wait_on_completion:
|
|
501
563
|
_ = self.get_result()
|
|
502
564
|
|
|
503
565
|
|
|
504
566
|
class ExternalExecutorTracker:
|
|
505
|
-
result:
|
|
567
|
+
result: pl.LazyFrame | None
|
|
506
568
|
started: bool = False
|
|
507
569
|
running: bool = False
|
|
508
570
|
error_code: int = 0
|
|
509
|
-
error_description:
|
|
571
|
+
error_description: str | None = None
|
|
510
572
|
file_ref: str = None
|
|
511
573
|
|
|
512
574
|
def __init__(self, initial_response: Status, wait_on_completion: bool = True):
|
|
@@ -515,7 +577,7 @@ class ExternalExecutorTracker:
|
|
|
515
577
|
self.thread = threading.Thread(target=self._fetch_cached_df)
|
|
516
578
|
self.result = None
|
|
517
579
|
self.error_description = None
|
|
518
|
-
self.running = initial_response.status ==
|
|
580
|
+
self.running = initial_response.status == "Processing"
|
|
519
581
|
self.condition = threading.Condition()
|
|
520
582
|
if wait_on_completion:
|
|
521
583
|
_ = self.get_result()
|
|
@@ -523,30 +585,32 @@ class ExternalExecutorTracker:
|
|
|
523
585
|
def _fetch_cached_df(self):
|
|
524
586
|
with self.condition:
|
|
525
587
|
if self.running:
|
|
526
|
-
logger.info(
|
|
588
|
+
logger.info("Already running the fetching")
|
|
527
589
|
return
|
|
528
590
|
sleep_time = 1
|
|
529
591
|
self.running = True
|
|
530
592
|
while not self.stop_event.is_set():
|
|
531
593
|
try:
|
|
532
|
-
r = requests.get(f
|
|
594
|
+
r = requests.get(f"{WORKER_URL}/status/{self.file_ref}")
|
|
533
595
|
if r.status_code == 200:
|
|
534
596
|
status = Status(**r.json())
|
|
535
|
-
if status.status ==
|
|
597
|
+
if status.status == "Completed":
|
|
536
598
|
self.running = False
|
|
537
599
|
self.condition.notify_all() # Notify all waiting threads
|
|
538
|
-
if status.result_type ==
|
|
600
|
+
if status.result_type == "polars":
|
|
539
601
|
self.result = get_df_result(status.results)
|
|
540
602
|
else:
|
|
541
603
|
self.result = status.results
|
|
542
604
|
return
|
|
543
|
-
elif status.status ==
|
|
605
|
+
elif status.status == "Error":
|
|
544
606
|
self.error_code = 1
|
|
545
607
|
self.error_description = status.error_message
|
|
546
608
|
break
|
|
547
|
-
elif status.status ==
|
|
609
|
+
elif status.status == "Unknown Error":
|
|
548
610
|
self.error_code = -1
|
|
549
|
-
self.error_description =
|
|
611
|
+
self.error_description = (
|
|
612
|
+
"There was an unknown error with the process, and the process got killed by the server"
|
|
613
|
+
)
|
|
550
614
|
break
|
|
551
615
|
else:
|
|
552
616
|
self.error_description = r.text
|
|
@@ -570,12 +634,12 @@ class ExternalExecutorTracker:
|
|
|
570
634
|
def start(self):
|
|
571
635
|
self.started = True
|
|
572
636
|
if self.running:
|
|
573
|
-
logger.info(
|
|
637
|
+
logger.info("Already running the fetching")
|
|
574
638
|
return
|
|
575
639
|
self.thread.start()
|
|
576
640
|
|
|
577
641
|
def cancel(self):
|
|
578
|
-
logger.warning(
|
|
642
|
+
logger.warning("Cancelling the operation")
|
|
579
643
|
self.thread.join()
|
|
580
644
|
|
|
581
645
|
self.running = False
|
|
@@ -591,7 +655,7 @@ class ExternalExecutorTracker:
|
|
|
591
655
|
return self.result
|
|
592
656
|
|
|
593
657
|
|
|
594
|
-
def fetch_unique_values(lf: pl.LazyFrame) ->
|
|
658
|
+
def fetch_unique_values(lf: pl.LazyFrame) -> list[str]:
|
|
595
659
|
"""
|
|
596
660
|
Fetches unique values from a specified column in a LazyFrame, attempting first via an external fetcher
|
|
597
661
|
and falling back to direct LazyFrame computation if that fails.
|
|
@@ -616,8 +680,7 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
|
|
|
616
680
|
# Try external source first if lf is provided
|
|
617
681
|
try:
|
|
618
682
|
external_df_fetcher = ExternalDfFetcher(lf=lf, flow_id=1, node_id=-1)
|
|
619
|
-
if external_df_fetcher.status.status ==
|
|
620
|
-
|
|
683
|
+
if external_df_fetcher.status.status == "Completed":
|
|
621
684
|
unique_values = read(external_df_fetcher.status.file_ref).column(0).to_pylist()
|
|
622
685
|
if logger:
|
|
623
686
|
logger.info(f"Got {len(unique_values)} unique values from external source")
|
|
@@ -626,10 +689,10 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
|
|
|
626
689
|
if logger:
|
|
627
690
|
logger.debug(f"Failed reading external file: {str(e)}")
|
|
628
691
|
|
|
629
|
-
unique_values =
|
|
692
|
+
unique_values = lf.unique().collect(engine="streaming")[:, 0].to_list()
|
|
630
693
|
|
|
631
694
|
if not unique_values:
|
|
632
|
-
raise ValueError(
|
|
695
|
+
raise ValueError("No unique values found in lazyframe")
|
|
633
696
|
|
|
634
697
|
return unique_values
|
|
635
698
|
|
|
@@ -1,27 +1,27 @@
|
|
|
1
|
-
|
|
2
1
|
import polars as pl
|
|
3
|
-
|
|
2
|
+
|
|
4
3
|
from flowfile_core.flowfile.flow_data_engine import utils
|
|
4
|
+
from flowfile_core.utils.fl_executor import process_executor
|
|
5
5
|
|
|
6
6
|
# calculate_schema_threaded = process_executor(wait_on_completion=True, max_workers=1)(utils.calculate_schema)
|
|
7
7
|
write_threaded = process_executor(False, max_workers=1)(utils.write_polars_frame)
|
|
8
8
|
collect_threaded = process_executor(wait_on_completion=False, max_workers=1)(utils.collect)
|
|
9
9
|
cache_polars_frame_to_temp_thread = process_executor(wait_on_completion=True, max_workers=1)(
|
|
10
|
-
utils.cache_polars_frame_to_temp
|
|
10
|
+
utils.cache_polars_frame_to_temp
|
|
11
|
+
)
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
@process_executor(False, max_workers=1)
|
|
14
15
|
def do_something_random():
|
|
15
|
-
print(
|
|
16
|
+
print("10 seconds")
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
# @process_executor(False, max_workers=1)
|
|
19
20
|
def get_join_count(left: pl.LazyFrame, right: pl.LazyFrame, left_on_keys, right_on_keys, how):
|
|
20
21
|
left_joined_df = left.group_by(left_on_keys).count()
|
|
21
22
|
right_joined_df = right.group_by(right_on_keys).count()
|
|
22
|
-
data: pl.LazyFrame = left_joined_df.join(right_joined_df, left_on=left_on_keys,
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
result = data.select((pl.col('total') * pl.col('count') * pl.col('count_right'))).sum()
|
|
23
|
+
data: pl.LazyFrame = left_joined_df.join(right_joined_df, left_on=left_on_keys, right_on=right_on_keys, how=how)
|
|
24
|
+
data = data.with_columns(pl.lit(1).alias("total").cast(pl.UInt64))
|
|
25
|
+
result = data.select(pl.col("total") * pl.col("count") * pl.col("count_right")).sum()
|
|
26
26
|
n_records = result.collect().to_series().to_list()[0]
|
|
27
27
|
return n_records
|