Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +194 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
- flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
- flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
- flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
- flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
- flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
- flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
- flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
- flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
- flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
- flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
- flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
- flowfile/web/static/assets/Output-692dd25d.css +37 -0
- flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
- flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
- flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
- flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
- flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
- flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
- flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/Unique-2b705521.css +3 -0
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
- flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
- flowfile/web/static/assets/index-6b367bb5.js +38 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
- flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
- flowfile-0.5.4.dist-info/RECORD +407 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +64 -19
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +145 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/__init__.py +11 -0
- flowfile_core/flowfile/code_generator/code_generator.py +706 -247
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +920 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +379 -258
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +19 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +278 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +46 -4
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +96 -66
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +123 -18
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +117 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/database/__init__.py +36 -0
- flowfile_frame/database/connection_manager.py +205 -0
- flowfile_frame/database/frame_helpers.py +249 -0
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +41 -33
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +114 -21
- flowfile_worker/spawner.py +89 -54
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
- flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/Output-283fe388.css +0 -37
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/Sort-3643d625.css +0 -51
- flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
flowfile_worker/funcs.py
CHANGED
|
@@ -1,43 +1,47 @@
|
|
|
1
|
-
import polars as pl
|
|
2
1
|
import io
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from base64 import encodebytes
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
from logging import Logger
|
|
7
|
+
from multiprocessing import Array, Queue, Value
|
|
5
8
|
|
|
6
|
-
|
|
9
|
+
import polars as pl
|
|
10
|
+
from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs
|
|
7
11
|
|
|
8
|
-
from flowfile_worker.flow_logger import get_worker_logger
|
|
9
|
-
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
10
|
-
from flowfile_worker.external_sources.sql_source.main import write_df_to_database
|
|
11
12
|
from flowfile_worker.external_sources.s3_source.main import write_df_to_cloud
|
|
12
13
|
from flowfile_worker.external_sources.s3_source.models import CloudStorageWriteSettings
|
|
13
|
-
from
|
|
14
|
-
from
|
|
15
|
-
import
|
|
16
|
-
import os
|
|
14
|
+
from flowfile_worker.external_sources.sql_source.main import write_df_to_database
|
|
15
|
+
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
16
|
+
from flowfile_worker.flow_logger import get_worker_logger
|
|
17
17
|
from flowfile_worker.utils import collect_lazy_frame, collect_lazy_frame_and_get_streaming_info
|
|
18
18
|
|
|
19
|
-
|
|
20
19
|
# 'store', 'calculate_schema', 'calculate_number_of_records', 'write_output', 'fuzzy', 'store_sample']
|
|
21
20
|
|
|
22
|
-
logging.basicConfig(format=
|
|
23
|
-
logger = logging.getLogger(
|
|
21
|
+
logging.basicConfig(format="%(asctime)s: %(message)s")
|
|
22
|
+
logger = logging.getLogger("Spawner")
|
|
24
23
|
logger.setLevel(logging.INFO)
|
|
25
24
|
|
|
26
25
|
|
|
27
|
-
def fuzzy_join_task(
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
26
|
+
def fuzzy_join_task(
|
|
27
|
+
left_serializable_object: bytes,
|
|
28
|
+
right_serializable_object: bytes,
|
|
29
|
+
fuzzy_maps: list[FuzzyMapping],
|
|
30
|
+
error_message: Array,
|
|
31
|
+
file_path: str,
|
|
32
|
+
progress: Value,
|
|
33
|
+
queue: Queue,
|
|
34
|
+
flowfile_flow_id: int,
|
|
35
|
+
flowfile_node_id: int | str,
|
|
36
|
+
):
|
|
32
37
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
33
38
|
try:
|
|
34
39
|
flowfile_logger.info("Starting fuzzy join operation")
|
|
35
40
|
left_df = pl.LazyFrame.deserialize(io.BytesIO(left_serializable_object))
|
|
36
41
|
right_df = pl.LazyFrame.deserialize(io.BytesIO(right_serializable_object))
|
|
37
|
-
fuzzy_match_result = fuzzy_match_dfs(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
logger=flowfile_logger)
|
|
42
|
+
fuzzy_match_result = fuzzy_match_dfs(
|
|
43
|
+
left_df=left_df, right_df=right_df, fuzzy_maps=fuzzy_maps, logger=flowfile_logger
|
|
44
|
+
)
|
|
41
45
|
flowfile_logger.info("Fuzzy join operation completed successfully")
|
|
42
46
|
fuzzy_match_result.write_ipc(file_path)
|
|
43
47
|
with progress.get_lock():
|
|
@@ -45,18 +49,23 @@ def fuzzy_join_task(left_serializable_object: bytes, right_serializable_object:
|
|
|
45
49
|
except Exception as e:
|
|
46
50
|
error_msg = str(e).encode()[:256]
|
|
47
51
|
with error_message.get_lock():
|
|
48
|
-
error_message[:len(error_msg)] = error_msg
|
|
52
|
+
error_message[: len(error_msg)] = error_msg
|
|
49
53
|
with progress.get_lock():
|
|
50
54
|
progress.value = -1
|
|
51
|
-
flowfile_logger.error(f
|
|
55
|
+
flowfile_logger.error(f"Error during fuzzy join operation: {str(e)}")
|
|
52
56
|
lf = pl.scan_ipc(file_path)
|
|
53
57
|
number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
54
|
-
flowfile_logger.info(f
|
|
58
|
+
flowfile_logger.info(f"Number of records after fuzzy match: {number_of_records}")
|
|
55
59
|
queue.put(encodebytes(lf.serialize()))
|
|
56
60
|
|
|
57
61
|
|
|
58
|
-
def process_and_cache(
|
|
59
|
-
|
|
62
|
+
def process_and_cache(
|
|
63
|
+
polars_serializable_object: io.BytesIO,
|
|
64
|
+
progress: Value,
|
|
65
|
+
error_message: Array,
|
|
66
|
+
file_path: str,
|
|
67
|
+
flowfile_logger: Logger,
|
|
68
|
+
) -> bytes:
|
|
60
69
|
try:
|
|
61
70
|
lf = pl.LazyFrame.deserialize(polars_serializable_object)
|
|
62
71
|
collect_lazy_frame(lf).write_ipc(file_path)
|
|
@@ -65,23 +74,24 @@ def process_and_cache(polars_serializable_object: io.BytesIO, progress: Value, e
|
|
|
65
74
|
progress.value = 100
|
|
66
75
|
except Exception as e:
|
|
67
76
|
error_msg = str(e).encode()[:1024] # Limit error message length
|
|
68
|
-
flowfile_logger.error(f
|
|
77
|
+
flowfile_logger.error(f"Error during process and cache operation: {str(e)}")
|
|
69
78
|
with error_message.get_lock():
|
|
70
|
-
error_message[:len(error_msg)] = error_msg
|
|
79
|
+
error_message[: len(error_msg)] = error_msg
|
|
71
80
|
with progress.get_lock():
|
|
72
81
|
progress.value = -1 # Indicate error
|
|
73
82
|
return error_msg
|
|
74
83
|
|
|
75
84
|
|
|
76
|
-
def store_sample(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
+
def store_sample(
|
|
86
|
+
polars_serializable_object: bytes,
|
|
87
|
+
progress: Value,
|
|
88
|
+
error_message: Array,
|
|
89
|
+
queue: Queue,
|
|
90
|
+
file_path: str,
|
|
91
|
+
sample_size: int,
|
|
92
|
+
flowfile_flow_id: int,
|
|
93
|
+
flowfile_node_id: int | str,
|
|
94
|
+
):
|
|
85
95
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
86
96
|
flowfile_logger.info("Starting store sample operation")
|
|
87
97
|
try:
|
|
@@ -91,38 +101,46 @@ def store_sample(polars_serializable_object: bytes,
|
|
|
91
101
|
with progress.get_lock():
|
|
92
102
|
progress.value = 100
|
|
93
103
|
except Exception as e:
|
|
94
|
-
flowfile_logger.error(f
|
|
104
|
+
flowfile_logger.error(f"Error during store sample operation: {str(e)}")
|
|
95
105
|
error_msg = str(e).encode()[:1024] # Limit error message length
|
|
96
106
|
with error_message.get_lock():
|
|
97
|
-
error_message[:len(error_msg)] = error_msg
|
|
107
|
+
error_message[: len(error_msg)] = error_msg
|
|
98
108
|
with progress.get_lock():
|
|
99
109
|
progress.value = -1 # Indicate error
|
|
100
110
|
return error_msg
|
|
101
111
|
|
|
102
112
|
|
|
103
|
-
def store(
|
|
104
|
-
|
|
113
|
+
def store(
|
|
114
|
+
polars_serializable_object: bytes,
|
|
115
|
+
progress: Value,
|
|
116
|
+
error_message: Array,
|
|
117
|
+
queue: Queue,
|
|
118
|
+
file_path: str,
|
|
119
|
+
flowfile_flow_id: int,
|
|
120
|
+
flowfile_node_id: int | str,
|
|
121
|
+
):
|
|
105
122
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
106
123
|
flowfile_logger.info("Starting store operation")
|
|
107
124
|
polars_serializable_object_io = io.BytesIO(polars_serializable_object)
|
|
108
125
|
process_and_cache(polars_serializable_object_io, progress, error_message, file_path, flowfile_logger)
|
|
109
126
|
lf = pl.scan_ipc(file_path)
|
|
110
127
|
number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
111
|
-
flowfile_logger.info(f
|
|
128
|
+
flowfile_logger.info(f"Number of records processed: {number_of_records}")
|
|
112
129
|
queue.put(encodebytes(lf.serialize()))
|
|
113
130
|
|
|
114
131
|
|
|
115
|
-
def calculate_schema_logic(
|
|
132
|
+
def calculate_schema_logic(
|
|
133
|
+
df: pl.LazyFrame, optimize_memory: bool = True, flowfile_logger: Logger = None
|
|
134
|
+
) -> list[dict]:
|
|
116
135
|
if flowfile_logger is None:
|
|
117
|
-
raise ValueError(
|
|
136
|
+
raise ValueError("flowfile_logger is required")
|
|
118
137
|
schema = df.collect_schema()
|
|
119
|
-
schema_stats = [dict(column_name=k, pl_datatype=str(v), col_index=i) for i, (k, v) in
|
|
120
|
-
|
|
121
|
-
flowfile_logger.info('Starting to calculate the number of records')
|
|
138
|
+
schema_stats = [dict(column_name=k, pl_datatype=str(v), col_index=i) for i, (k, v) in enumerate(schema.items())]
|
|
139
|
+
flowfile_logger.info("Starting to calculate the number of records")
|
|
122
140
|
collected_streaming_info = collect_lazy_frame_and_get_streaming_info(df.select(pl.len()))
|
|
123
141
|
n_records = collected_streaming_info.df[0, 0]
|
|
124
142
|
if n_records < 10_000:
|
|
125
|
-
flowfile_logger.info(
|
|
143
|
+
flowfile_logger.info("Collecting the whole dataset")
|
|
126
144
|
df = collect_lazy_frame(df).lazy()
|
|
127
145
|
if optimize_memory and n_records > 1_000_000:
|
|
128
146
|
df = df.head(1_000_000)
|
|
@@ -133,20 +151,27 @@ def calculate_schema_logic(df: pl.LazyFrame, optimize_memory: bool = True, flowf
|
|
|
133
151
|
else:
|
|
134
152
|
df = df.drop(null_cols)
|
|
135
153
|
pl_stats = df.describe()
|
|
136
|
-
n_unique_per_cols = list(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
154
|
+
n_unique_per_cols = list(
|
|
155
|
+
df.select(pl.all().approx_n_unique())
|
|
156
|
+
.collect(engine="streaming" if collected_streaming_info.streaming_collect_available else "auto")
|
|
157
|
+
.to_dicts()[0]
|
|
158
|
+
.values()
|
|
159
|
+
)
|
|
160
|
+
stats_headers = pl_stats.drop_in_place("statistic").to_list()
|
|
161
|
+
stats = {
|
|
162
|
+
v["column_name"]: v
|
|
163
|
+
for v in pl_stats.transpose(
|
|
164
|
+
include_header=True, header_name="column_name", column_names=stats_headers
|
|
165
|
+
).to_dicts()
|
|
166
|
+
}
|
|
167
|
+
for i, (col_stat, n_unique_values) in enumerate(zip(stats.values(), n_unique_per_cols, strict=False)):
|
|
168
|
+
col_stat["n_unique"] = n_unique_values
|
|
169
|
+
col_stat["examples"] = ", ".join({str(col_stat["min"]), str(col_stat["max"])})
|
|
170
|
+
col_stat["null_count"] = int(float(col_stat["null_count"]))
|
|
171
|
+
col_stat["count"] = int(float(col_stat["count"]))
|
|
147
172
|
|
|
148
173
|
for schema_stat in schema_stats:
|
|
149
|
-
deep_stat = stats.get(schema_stat[
|
|
174
|
+
deep_stat = stats.get(schema_stat["column_name"])
|
|
150
175
|
if deep_stat:
|
|
151
176
|
schema_stat.update(deep_stat)
|
|
152
177
|
del df
|
|
@@ -155,30 +180,45 @@ def calculate_schema_logic(df: pl.LazyFrame, optimize_memory: bool = True, flowf
|
|
|
155
180
|
return schema_stats
|
|
156
181
|
|
|
157
182
|
|
|
158
|
-
def calculate_schema(
|
|
159
|
-
|
|
183
|
+
def calculate_schema(
|
|
184
|
+
polars_serializable_object: bytes,
|
|
185
|
+
progress: Value,
|
|
186
|
+
error_message: Array,
|
|
187
|
+
queue: Queue,
|
|
188
|
+
flowfile_flow_id: int,
|
|
189
|
+
flowfile_node_id: int | str,
|
|
190
|
+
*args,
|
|
191
|
+
**kwargs,
|
|
192
|
+
):
|
|
160
193
|
polars_serializable_object_io = io.BytesIO(polars_serializable_object)
|
|
161
194
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
162
195
|
flowfile_logger.info("Starting schema calculation")
|
|
163
196
|
try:
|
|
164
197
|
lf = pl.LazyFrame.deserialize(polars_serializable_object_io)
|
|
165
198
|
schema_stats = calculate_schema_logic(lf, flowfile_logger=flowfile_logger)
|
|
166
|
-
flowfile_logger.info(
|
|
199
|
+
flowfile_logger.info("schema_stats", schema_stats)
|
|
167
200
|
queue.put(schema_stats)
|
|
168
201
|
flowfile_logger.info("Schema calculation completed successfully")
|
|
169
202
|
with progress.get_lock():
|
|
170
203
|
progress.value = 100
|
|
171
204
|
except Exception as e:
|
|
172
205
|
error_msg = str(e).encode()[:256] # Limit error message length
|
|
173
|
-
flowfile_logger.error(
|
|
206
|
+
flowfile_logger.error("error", e)
|
|
174
207
|
with error_message.get_lock():
|
|
175
|
-
error_message[:len(error_msg)] = error_msg
|
|
208
|
+
error_message[: len(error_msg)] = error_msg
|
|
176
209
|
with progress.get_lock():
|
|
177
210
|
progress.value = -1 # Indicate error
|
|
178
211
|
|
|
179
212
|
|
|
180
|
-
def calculate_number_of_records(
|
|
181
|
-
|
|
213
|
+
def calculate_number_of_records(
|
|
214
|
+
polars_serializable_object: bytes,
|
|
215
|
+
progress: Value,
|
|
216
|
+
error_message: Array,
|
|
217
|
+
queue: Queue,
|
|
218
|
+
flowfile_flow_id: int,
|
|
219
|
+
*args,
|
|
220
|
+
**kwargs,
|
|
221
|
+
):
|
|
182
222
|
flowfile_logger = get_worker_logger(flowfile_flow_id, -1)
|
|
183
223
|
flowfile_logger.info("Starting number of records calculation")
|
|
184
224
|
polars_serializable_object_io = io.BytesIO(polars_serializable_object)
|
|
@@ -187,47 +227,54 @@ def calculate_number_of_records(polars_serializable_object: bytes, progress: Val
|
|
|
187
227
|
n_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
188
228
|
queue.put(n_records)
|
|
189
229
|
flowfile_logger.debug("Number of records calculation completed successfully")
|
|
190
|
-
flowfile_logger.debug(f
|
|
230
|
+
flowfile_logger.debug(f"n_records {n_records}")
|
|
191
231
|
with progress.get_lock():
|
|
192
232
|
progress.value = 100
|
|
193
233
|
except Exception as e:
|
|
194
|
-
flowfile_logger.error(
|
|
234
|
+
flowfile_logger.error("error", e)
|
|
195
235
|
error_msg = str(e).encode()[:256] # Limit error message length
|
|
196
236
|
with error_message.get_lock():
|
|
197
|
-
error_message[:len(error_msg)] = error_msg
|
|
237
|
+
error_message[: len(error_msg)] = error_msg
|
|
198
238
|
with progress.get_lock():
|
|
199
239
|
progress.value = -1 # Indicate error
|
|
200
|
-
return b
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
def execute_write_method(
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
240
|
+
return b"error"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def execute_write_method(
|
|
244
|
+
write_method: Callable,
|
|
245
|
+
path: str,
|
|
246
|
+
data_type: str = None,
|
|
247
|
+
sheet_name: str = None,
|
|
248
|
+
delimiter: str = None,
|
|
249
|
+
write_mode: str = "create",
|
|
250
|
+
flowfile_logger: Logger = None,
|
|
251
|
+
):
|
|
252
|
+
flowfile_logger.info("executing write method")
|
|
253
|
+
if data_type == "excel":
|
|
254
|
+
logger.info("Writing as excel file")
|
|
209
255
|
write_method(path, worksheet=sheet_name)
|
|
210
|
-
elif data_type ==
|
|
211
|
-
logger.info(
|
|
212
|
-
if write_mode ==
|
|
213
|
-
with open(path,
|
|
214
|
-
write_method(f, separator=delimiter, quote_style=
|
|
256
|
+
elif data_type == "csv":
|
|
257
|
+
logger.info("Writing as csv file")
|
|
258
|
+
if write_mode == "append":
|
|
259
|
+
with open(path, "ab") as f:
|
|
260
|
+
write_method(f, separator=delimiter, quote_style="always")
|
|
215
261
|
else:
|
|
216
|
-
write_method(path, separator=delimiter, quote_style=
|
|
217
|
-
elif data_type ==
|
|
218
|
-
logger.info(
|
|
262
|
+
write_method(path, separator=delimiter, quote_style="always")
|
|
263
|
+
elif data_type == "parquet":
|
|
264
|
+
logger.info("Writing as parquet file")
|
|
219
265
|
write_method(path)
|
|
220
266
|
|
|
221
267
|
|
|
222
|
-
def write_to_database(
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
268
|
+
def write_to_database(
|
|
269
|
+
polars_serializable_object: bytes,
|
|
270
|
+
progress: Value,
|
|
271
|
+
error_message: Array,
|
|
272
|
+
queue: Queue,
|
|
273
|
+
file_path: str,
|
|
274
|
+
database_write_settings: DatabaseWriteSettings,
|
|
275
|
+
flowfile_flow_id: int = -1,
|
|
276
|
+
flowfile_node_id: int | str = -1,
|
|
277
|
+
):
|
|
231
278
|
"""
|
|
232
279
|
Writes a Polars DataFrame to a SQL database.
|
|
233
280
|
"""
|
|
@@ -242,22 +289,23 @@ def write_to_database(polars_serializable_object: bytes,
|
|
|
242
289
|
progress.value = 100
|
|
243
290
|
except Exception as e:
|
|
244
291
|
error_msg = str(e).encode()[:1024]
|
|
245
|
-
flowfile_logger.error(f
|
|
292
|
+
flowfile_logger.error(f"Error during write operation: {str(e)}")
|
|
246
293
|
with error_message.get_lock():
|
|
247
|
-
error_message[:len(error_msg)] = error_msg
|
|
294
|
+
error_message[: len(error_msg)] = error_msg
|
|
248
295
|
with progress.get_lock():
|
|
249
296
|
progress.value = -1
|
|
250
297
|
|
|
251
298
|
|
|
252
|
-
def write_to_cloud_storage(
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
299
|
+
def write_to_cloud_storage(
|
|
300
|
+
polars_serializable_object: bytes,
|
|
301
|
+
progress: Value,
|
|
302
|
+
error_message: Array,
|
|
303
|
+
queue: Queue,
|
|
304
|
+
file_path: str,
|
|
305
|
+
cloud_write_settings: CloudStorageWriteSettings,
|
|
306
|
+
flowfile_flow_id: int = -1,
|
|
307
|
+
flowfile_node_id: int | str = -1,
|
|
308
|
+
) -> None:
|
|
261
309
|
"""
|
|
262
310
|
Writes a Polars DataFrame to cloud storage using the provided settings.
|
|
263
311
|
Args:
|
|
@@ -276,8 +324,7 @@ def write_to_cloud_storage(polars_serializable_object: bytes,
|
|
|
276
324
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
277
325
|
flowfile_logger.info(f"Starting write operation to: {cloud_write_settings.write_settings.resource_path}")
|
|
278
326
|
df = pl.LazyFrame.deserialize(io.BytesIO(polars_serializable_object))
|
|
279
|
-
flowfile_logger.info(f"Starting to sync the data to cloud, execution plan: \n"
|
|
280
|
-
f"{df.explain(format='plain')}")
|
|
327
|
+
flowfile_logger.info(f"Starting to sync the data to cloud, execution plan: \n" f"{df.explain(format='plain')}")
|
|
281
328
|
try:
|
|
282
329
|
write_df_to_cloud(df, cloud_write_settings, flowfile_logger)
|
|
283
330
|
flowfile_logger.info("Write operation completed successfully")
|
|
@@ -285,26 +332,27 @@ def write_to_cloud_storage(polars_serializable_object: bytes,
|
|
|
285
332
|
progress.value = 100
|
|
286
333
|
except Exception as e:
|
|
287
334
|
error_msg = str(e).encode()[:1024]
|
|
288
|
-
flowfile_logger.error(f
|
|
335
|
+
flowfile_logger.error(f"Error during write operation: {str(e)}")
|
|
289
336
|
with error_message.get_lock():
|
|
290
|
-
error_message[:len(error_msg)] = error_msg
|
|
337
|
+
error_message[: len(error_msg)] = error_msg
|
|
291
338
|
with progress.get_lock():
|
|
292
339
|
progress.value = -1
|
|
293
340
|
|
|
294
341
|
|
|
295
|
-
def write_output(
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
342
|
+
def write_output(
|
|
343
|
+
polars_serializable_object: bytes,
|
|
344
|
+
progress: Value,
|
|
345
|
+
error_message: Array,
|
|
346
|
+
queue: Queue,
|
|
347
|
+
file_path: str,
|
|
348
|
+
data_type: str,
|
|
349
|
+
path: str,
|
|
350
|
+
write_mode: str,
|
|
351
|
+
sheet_name: str = None,
|
|
352
|
+
delimiter: str = None,
|
|
353
|
+
flowfile_flow_id: int = -1,
|
|
354
|
+
flowfile_node_id: int | str = -1,
|
|
355
|
+
):
|
|
308
356
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
309
357
|
flowfile_logger.info(f"Starting write operation to: {path}")
|
|
310
358
|
try:
|
|
@@ -312,42 +360,53 @@ def write_output(polars_serializable_object: bytes,
|
|
|
312
360
|
if isinstance(df, pl.LazyFrame):
|
|
313
361
|
flowfile_logger.info(f'Execution plan explanation:\n{df.explain(format="plain")}')
|
|
314
362
|
flowfile_logger.info("Successfully deserialized dataframe")
|
|
315
|
-
sink_method_str =
|
|
316
|
-
write_method_str =
|
|
363
|
+
sink_method_str = "sink_" + data_type
|
|
364
|
+
write_method_str = "write_" + data_type
|
|
317
365
|
has_sink_method = hasattr(df, sink_method_str)
|
|
318
366
|
write_method = None
|
|
319
|
-
if os.path.exists(path) and write_mode ==
|
|
320
|
-
raise Exception(
|
|
321
|
-
if has_sink_method and write_method !=
|
|
322
|
-
flowfile_logger.info(f
|
|
323
|
-
write_method = getattr(df,
|
|
367
|
+
if os.path.exists(path) and write_mode == "create":
|
|
368
|
+
raise Exception("File already exists")
|
|
369
|
+
if has_sink_method and write_method != "append":
|
|
370
|
+
flowfile_logger.info(f"Using sink method: {sink_method_str}")
|
|
371
|
+
write_method = getattr(df, "sink_" + data_type)
|
|
324
372
|
elif not has_sink_method:
|
|
325
373
|
if isinstance(df, pl.LazyFrame):
|
|
326
374
|
df = collect_lazy_frame(df)
|
|
327
375
|
write_method = getattr(df, write_method_str)
|
|
328
376
|
if write_method is not None:
|
|
329
|
-
execute_write_method(
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
377
|
+
execute_write_method(
|
|
378
|
+
write_method,
|
|
379
|
+
path=path,
|
|
380
|
+
data_type=data_type,
|
|
381
|
+
sheet_name=sheet_name,
|
|
382
|
+
delimiter=delimiter,
|
|
383
|
+
write_mode=write_mode,
|
|
384
|
+
flowfile_logger=flowfile_logger,
|
|
385
|
+
)
|
|
386
|
+
number_of_records_written = (
|
|
387
|
+
collect_lazy_frame(df.select(pl.len()))[0, 0] if isinstance(df, pl.LazyFrame) else df.height
|
|
388
|
+
)
|
|
389
|
+
flowfile_logger.info(f"Number of records written: {number_of_records_written}")
|
|
334
390
|
else:
|
|
335
|
-
raise Exception(
|
|
391
|
+
raise Exception("Write method not found")
|
|
336
392
|
with progress.get_lock():
|
|
337
393
|
progress.value = 100
|
|
338
394
|
except Exception as e:
|
|
339
|
-
logger.info(f
|
|
340
|
-
error_message[:len(str(e))] = str(e).encode()
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
def generic_task(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
395
|
+
logger.info(f"Error during write operation: {str(e)}")
|
|
396
|
+
error_message[: len(str(e))] = str(e).encode()
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def generic_task(
|
|
400
|
+
func: Callable,
|
|
401
|
+
progress: Value,
|
|
402
|
+
error_message: Array,
|
|
403
|
+
queue: Queue,
|
|
404
|
+
file_path: str,
|
|
405
|
+
flowfile_flow_id: int,
|
|
406
|
+
flowfile_node_id: int | str,
|
|
407
|
+
*args,
|
|
408
|
+
**kwargs,
|
|
409
|
+
):
|
|
351
410
|
print(kwargs)
|
|
352
411
|
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
353
412
|
flowfile_logger.info("Starting generic task")
|
|
@@ -358,19 +417,19 @@ def generic_task(func: Callable,
|
|
|
358
417
|
elif isinstance(df, pl.DataFrame):
|
|
359
418
|
df.write_ipc(file_path)
|
|
360
419
|
else:
|
|
361
|
-
raise Exception(
|
|
420
|
+
raise Exception("Returned object is not a DataFrame or LazyFrame")
|
|
362
421
|
with progress.get_lock():
|
|
363
422
|
progress.value = 100
|
|
364
423
|
flowfile_logger.info("Task completed successfully")
|
|
365
424
|
except Exception as e:
|
|
366
|
-
flowfile_logger.error(f
|
|
425
|
+
flowfile_logger.error(f"Error during task execution: {str(e)}")
|
|
367
426
|
error_msg = str(e).encode()[:1024]
|
|
368
427
|
with error_message.get_lock():
|
|
369
|
-
error_message[:len(error_msg)] = error_msg
|
|
428
|
+
error_message[: len(error_msg)] = error_msg
|
|
370
429
|
with progress.get_lock():
|
|
371
430
|
progress.value = -1
|
|
372
431
|
|
|
373
432
|
lf = pl.scan_ipc(file_path)
|
|
374
433
|
number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
|
|
375
|
-
flowfile_logger.info(f
|
|
434
|
+
flowfile_logger.info(f"Number of records processed: {number_of_records}")
|
|
376
435
|
queue.put(encodebytes(lf.serialize()))
|
flowfile_worker/main.py
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import uvicorn
|
|
3
2
|
import signal
|
|
4
|
-
|
|
5
3
|
from contextlib import asynccontextmanager
|
|
6
|
-
from fastapi import FastAPI
|
|
7
4
|
|
|
8
|
-
|
|
5
|
+
import uvicorn
|
|
6
|
+
from fastapi import FastAPI
|
|
9
7
|
|
|
10
|
-
from flowfile_worker.routes import router
|
|
11
8
|
from flowfile_worker import mp_context
|
|
12
|
-
from flowfile_worker.configs import
|
|
13
|
-
|
|
9
|
+
from flowfile_worker.configs import FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT, logger
|
|
10
|
+
from flowfile_worker.routes import router
|
|
11
|
+
from shared.storage_config import storage
|
|
14
12
|
|
|
15
13
|
should_exit = False
|
|
16
14
|
server_instance = None
|
|
@@ -19,11 +17,11 @@ server_instance = None
|
|
|
19
17
|
@asynccontextmanager
|
|
20
18
|
async def shutdown_handler(app: FastAPI):
|
|
21
19
|
"""Handle application startup and shutdown"""
|
|
22
|
-
logger.info(
|
|
20
|
+
logger.info("Starting application...")
|
|
23
21
|
try:
|
|
24
22
|
yield
|
|
25
23
|
finally:
|
|
26
|
-
logger.info(
|
|
24
|
+
logger.info("Shutting down application...")
|
|
27
25
|
logger.info("Cleaning up worker resources...")
|
|
28
26
|
for p in mp_context.active_children():
|
|
29
27
|
try:
|
|
@@ -84,17 +82,12 @@ def run(host: str = None, port: int = None):
|
|
|
84
82
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
85
83
|
signal.signal(signal.SIGINT, signal_handler)
|
|
86
84
|
|
|
87
|
-
config = uvicorn.Config(
|
|
88
|
-
app,
|
|
89
|
-
host=host,
|
|
90
|
-
port=port,
|
|
91
|
-
loop="asyncio"
|
|
92
|
-
)
|
|
85
|
+
config = uvicorn.Config(app, host=host, port=port, loop="asyncio")
|
|
93
86
|
server = uvicorn.Server(config)
|
|
94
87
|
server_instance = server # Store server instance globally
|
|
95
88
|
|
|
96
|
-
logger.info(
|
|
97
|
-
logger.info(
|
|
89
|
+
logger.info("Starting server...")
|
|
90
|
+
logger.info("Server started")
|
|
98
91
|
|
|
99
92
|
try:
|
|
100
93
|
server.run()
|
|
@@ -107,5 +100,6 @@ def run(host: str = None, port: int = None):
|
|
|
107
100
|
|
|
108
101
|
if __name__ == "__main__":
|
|
109
102
|
import multiprocessing
|
|
103
|
+
|
|
110
104
|
multiprocessing.freeze_support()
|
|
111
105
|
run()
|