Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +194 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
- flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
- flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
- flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
- flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
- flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
- flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
- flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
- flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
- flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
- flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
- flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
- flowfile/web/static/assets/Output-692dd25d.css +37 -0
- flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
- flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
- flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
- flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
- flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
- flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
- flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/Unique-2b705521.css +3 -0
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
- flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
- flowfile/web/static/assets/index-6b367bb5.js +38 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
- flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
- flowfile-0.5.4.dist-info/RECORD +407 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +64 -19
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +145 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/__init__.py +11 -0
- flowfile_core/flowfile/code_generator/code_generator.py +706 -247
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +920 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +379 -258
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +19 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +278 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +46 -4
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +96 -66
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +123 -18
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +117 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/database/__init__.py +36 -0
- flowfile_frame/database/connection_manager.py +205 -0
- flowfile_frame/database/frame_helpers.py +249 -0
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +41 -33
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +114 -21
- flowfile_worker/spawner.py +89 -54
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
- flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/Output-283fe388.css +0 -37
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/Sort-3643d625.css +0 -51
- flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
flowfile_worker/secrets.py
CHANGED
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Simplified secure storage module for FlowFile worker to read credentials and secrets.
|
|
3
3
|
"""
|
|
4
|
-
|
|
5
|
-
import
|
|
6
|
-
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import base64
|
|
7
6
|
import json
|
|
8
7
|
import logging
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from cryptography.fernet import Fernet
|
|
12
|
+
from cryptography.hazmat.primitives import hashes
|
|
13
|
+
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
|
|
9
14
|
from pydantic import SecretStr
|
|
15
|
+
|
|
10
16
|
from flowfile_worker.configs import TEST_MODE
|
|
11
17
|
|
|
12
18
|
# Set up logging
|
|
13
19
|
logger = logging.getLogger(__name__)
|
|
14
20
|
|
|
21
|
+
# Version identifier for key derivation scheme (must match flowfile_core)
|
|
22
|
+
KEY_DERIVATION_VERSION = b"flowfile-secrets-v1"
|
|
23
|
+
|
|
24
|
+
# Encrypted secret format: $ffsec$1${user_id}${fernet_token}
|
|
25
|
+
SECRET_FORMAT_PREFIX = "$ffsec$1$"
|
|
26
|
+
|
|
15
27
|
|
|
16
28
|
class SecureStorage:
|
|
17
29
|
"""A secure local storage mechanism for reading secrets using Fernet encryption."""
|
|
@@ -66,27 +78,42 @@ def get_password(service_name, username):
|
|
|
66
78
|
return _storage.get_password(service_name, username)
|
|
67
79
|
|
|
68
80
|
|
|
69
|
-
def get_docker_secret_key():
|
|
81
|
+
def get_docker_secret_key() -> str | None:
|
|
70
82
|
"""
|
|
71
|
-
Get the master key from Docker secret.
|
|
83
|
+
Get the master key from Docker secret or environment variable.
|
|
72
84
|
|
|
73
85
|
Returns:
|
|
74
|
-
str: The master key if successfully read
|
|
86
|
+
str: The master key if successfully read, None if not configured.
|
|
75
87
|
|
|
76
88
|
Raises:
|
|
77
|
-
RuntimeError: If
|
|
89
|
+
RuntimeError: If the secret file exists but cannot be read, or key is invalid.
|
|
78
90
|
"""
|
|
91
|
+
# First, check for environment variable (allows runtime configuration)
|
|
92
|
+
env_key = os.environ.get("FLOWFILE_MASTER_KEY")
|
|
93
|
+
if env_key:
|
|
94
|
+
# Validate it's a proper Fernet key
|
|
95
|
+
try:
|
|
96
|
+
Fernet(env_key.encode())
|
|
97
|
+
return env_key
|
|
98
|
+
except Exception:
|
|
99
|
+
logger.error("FLOWFILE_MASTER_KEY environment variable is not a valid Fernet key")
|
|
100
|
+
raise RuntimeError("FLOWFILE_MASTER_KEY is not a valid Fernet key")
|
|
101
|
+
|
|
102
|
+
# Then, check for Docker secret file
|
|
79
103
|
secret_path = "/run/secrets/flowfile_master_key"
|
|
80
104
|
if os.path.exists(secret_path):
|
|
81
105
|
try:
|
|
82
|
-
with open(secret_path
|
|
83
|
-
|
|
106
|
+
with open(secret_path) as f:
|
|
107
|
+
key = f.read().strip()
|
|
108
|
+
# Validate the key
|
|
109
|
+
Fernet(key.encode())
|
|
110
|
+
return key
|
|
84
111
|
except Exception as e:
|
|
85
|
-
logger.error(f"Failed to read master key from Docker secret: {e}")
|
|
112
|
+
logger.error(f"Failed to read or validate master key from Docker secret: {e}")
|
|
86
113
|
raise RuntimeError("Failed to read master key from Docker secret")
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
114
|
+
|
|
115
|
+
# No key configured
|
|
116
|
+
return None
|
|
90
117
|
|
|
91
118
|
|
|
92
119
|
def get_master_key() -> str:
|
|
@@ -94,22 +121,29 @@ def get_master_key() -> str:
|
|
|
94
121
|
Get the master encryption key.
|
|
95
122
|
|
|
96
123
|
If in TEST_MODE, returns a test key.
|
|
97
|
-
If running in Docker, retrieves the key from Docker secrets.
|
|
124
|
+
If running in Docker, retrieves the key from Docker secrets or environment.
|
|
98
125
|
Otherwise, retrieves the key from secure storage.
|
|
99
126
|
|
|
100
127
|
Returns:
|
|
101
128
|
str: The master encryption key
|
|
102
129
|
|
|
103
130
|
Raises:
|
|
131
|
+
RuntimeError: If in Docker mode and no key is configured.
|
|
104
132
|
ValueError: If the master key is not found in storage.
|
|
105
133
|
"""
|
|
106
134
|
# First check for test mode
|
|
107
135
|
if TEST_MODE:
|
|
108
|
-
return b
|
|
136
|
+
return b"06t640eu3AG2FmglZS0n0zrEdqadoT7lYDwgSmKyxE4=".decode()
|
|
109
137
|
|
|
110
138
|
# Next check if running in Docker
|
|
111
|
-
if os.environ.get("
|
|
112
|
-
|
|
139
|
+
if os.environ.get("FLOWFILE_MODE") == "docker":
|
|
140
|
+
key = get_docker_secret_key()
|
|
141
|
+
if key is None:
|
|
142
|
+
raise RuntimeError(
|
|
143
|
+
"Master key not configured. Set FLOWFILE_MASTER_KEY environment variable "
|
|
144
|
+
"or mount the flowfile_master_key Docker secret."
|
|
145
|
+
)
|
|
146
|
+
return key
|
|
113
147
|
|
|
114
148
|
# Otherwise read from local storage
|
|
115
149
|
key = get_password("flowfile", "master_key")
|
|
@@ -118,9 +152,41 @@ def get_master_key() -> str:
|
|
|
118
152
|
return key
|
|
119
153
|
|
|
120
154
|
|
|
121
|
-
def
|
|
155
|
+
def derive_user_key(user_id: int) -> bytes:
|
|
122
156
|
"""
|
|
123
|
-
|
|
157
|
+
Derive a user-specific encryption key from the master key using HKDF.
|
|
158
|
+
|
|
159
|
+
This provides cryptographic isolation between users - each user's secrets
|
|
160
|
+
are encrypted with a unique key derived from the master key.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
user_id: The unique identifier for the user
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
bytes: A 32-byte URL-safe base64-encoded key suitable for Fernet
|
|
167
|
+
"""
|
|
168
|
+
master_key = get_master_key().encode()
|
|
169
|
+
|
|
170
|
+
# Use HKDF to derive a user-specific key
|
|
171
|
+
hkdf = HKDF(
|
|
172
|
+
algorithm=hashes.SHA256(),
|
|
173
|
+
length=32, # Fernet requires 32 bytes
|
|
174
|
+
salt=KEY_DERIVATION_VERSION, # Static salt is fine for key derivation
|
|
175
|
+
info=f"user-{user_id}".encode(), # User-specific context
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Derive raw key material and encode for Fernet
|
|
179
|
+
derived_key = hkdf.derive(master_key)
|
|
180
|
+
return base64.urlsafe_b64encode(derived_key)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def decrypt_secret(encrypted_value: str) -> SecretStr:
|
|
184
|
+
"""
|
|
185
|
+
Decrypt an encrypted value.
|
|
186
|
+
|
|
187
|
+
Supports both new format (with embedded user_id) and legacy format.
|
|
188
|
+
- New format: $ffsec$1${user_id}${fernet_token} - user_id extracted automatically
|
|
189
|
+
- Legacy format: raw Fernet token - uses master key directly
|
|
124
190
|
|
|
125
191
|
Args:
|
|
126
192
|
encrypted_value: The encrypted value as a string
|
|
@@ -128,21 +194,48 @@ def decrypt_secret(encrypted_value) -> SecretStr:
|
|
|
128
194
|
Returns:
|
|
129
195
|
SecretStr: The decrypted value as a SecretStr
|
|
130
196
|
"""
|
|
197
|
+
# Check for new versioned format with embedded user_id
|
|
198
|
+
if encrypted_value.startswith(SECRET_FORMAT_PREFIX):
|
|
199
|
+
# Parse: $ffsec$1${user_id}${fernet_token}
|
|
200
|
+
remainder = encrypted_value[len(SECRET_FORMAT_PREFIX):]
|
|
201
|
+
parts = remainder.split("$", 1)
|
|
202
|
+
if len(parts) != 2:
|
|
203
|
+
raise ValueError("Invalid encrypted secret format")
|
|
204
|
+
|
|
205
|
+
embedded_user_id = int(parts[0])
|
|
206
|
+
fernet_token = parts[1]
|
|
207
|
+
|
|
208
|
+
key = derive_user_key(embedded_user_id)
|
|
209
|
+
f = Fernet(key)
|
|
210
|
+
return SecretStr(f.decrypt(fernet_token.encode()).decode())
|
|
211
|
+
|
|
212
|
+
# Legacy format - use master key directly
|
|
131
213
|
key = get_master_key().encode()
|
|
132
214
|
f = Fernet(key)
|
|
133
215
|
return SecretStr(f.decrypt(encrypted_value.encode()).decode())
|
|
134
216
|
|
|
135
217
|
|
|
136
|
-
def encrypt_secret(secret_value):
|
|
218
|
+
def encrypt_secret(secret_value: str, user_id: int | None = None) -> str:
|
|
137
219
|
"""
|
|
138
|
-
Encrypt a secret value
|
|
220
|
+
Encrypt a secret value.
|
|
221
|
+
|
|
222
|
+
If user_id is provided, uses per-user key derivation with embedded user_id format.
|
|
223
|
+
Otherwise, uses legacy master key encryption (for backward compatibility in tests).
|
|
139
224
|
|
|
140
225
|
Args:
|
|
141
226
|
secret_value: The secret value to encrypt
|
|
227
|
+
user_id: Optional user ID for per-user key derivation
|
|
142
228
|
|
|
143
229
|
Returns:
|
|
144
230
|
str: The encrypted value as a string
|
|
145
231
|
"""
|
|
232
|
+
if user_id is not None:
|
|
233
|
+
key = derive_user_key(user_id)
|
|
234
|
+
f = Fernet(key)
|
|
235
|
+
fernet_token = f.encrypt(secret_value.encode()).decode()
|
|
236
|
+
return f"{SECRET_FORMAT_PREFIX}{user_id}${fernet_token}"
|
|
237
|
+
|
|
238
|
+
# Legacy format for backward compatibility
|
|
146
239
|
key = get_master_key().encode()
|
|
147
240
|
f = Fernet(key)
|
|
148
241
|
return f.encrypt(secret_value.encode()).decode()
|
flowfile_worker/spawner.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
from flowfile_worker import status_dict
|
|
2
|
-
from time import sleep
|
|
3
1
|
import gc
|
|
4
|
-
from
|
|
5
|
-
from multiprocessing import
|
|
2
|
+
from multiprocessing import Process
|
|
3
|
+
from multiprocessing.queues import Queue
|
|
4
|
+
from time import sleep
|
|
5
|
+
|
|
6
|
+
from flowfile_worker import funcs, models, mp_context, status_dict, status_dict_lock
|
|
6
7
|
from flowfile_worker.process_manager import ProcessManager
|
|
7
|
-
from flowfile_worker import models, mp_context, funcs, status_dict_lock
|
|
8
8
|
|
|
9
9
|
# Initialize ProcessManager
|
|
10
10
|
process_manager = ProcessManager()
|
|
11
11
|
|
|
12
|
-
flowfile_node_id_type = int|str
|
|
12
|
+
flowfile_node_id_type = int | str
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_message: mp_context.Array, q: Queue):
|
|
@@ -48,7 +48,7 @@ def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_mess
|
|
|
48
48
|
with status_dict_lock:
|
|
49
49
|
status_dict[task_id].status = "Error"
|
|
50
50
|
with error_message.get_lock():
|
|
51
|
-
status_dict[task_id].error_message = error_message.value.decode().rstrip(
|
|
51
|
+
status_dict[task_id].error_message = error_message.value.decode().rstrip("\x00")
|
|
52
52
|
break
|
|
53
53
|
|
|
54
54
|
p.join()
|
|
@@ -56,11 +56,14 @@ def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_mess
|
|
|
56
56
|
with status_dict_lock:
|
|
57
57
|
status = status_dict[task_id]
|
|
58
58
|
if status.status != "Cancelled":
|
|
59
|
-
|
|
59
|
+
# Read progress value with lock to ensure consistency
|
|
60
|
+
with progress.get_lock():
|
|
61
|
+
final_progress = progress.value
|
|
62
|
+
if final_progress == 100:
|
|
60
63
|
status.status = "Completed"
|
|
61
64
|
if not q.empty():
|
|
62
65
|
status.results = q.get()
|
|
63
|
-
elif
|
|
66
|
+
elif final_progress != -1:
|
|
64
67
|
status_dict[task_id].status = "Unknown Error"
|
|
65
68
|
|
|
66
69
|
finally:
|
|
@@ -72,11 +75,15 @@ def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_mess
|
|
|
72
75
|
gc.collect()
|
|
73
76
|
|
|
74
77
|
|
|
75
|
-
def start_process(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
78
|
+
def start_process(
|
|
79
|
+
polars_serializable_object: bytes,
|
|
80
|
+
task_id: str,
|
|
81
|
+
operation: models.OperationType,
|
|
82
|
+
file_ref: str,
|
|
83
|
+
flowfile_flow_id: int,
|
|
84
|
+
flowfile_node_id: flowfile_node_id_type,
|
|
85
|
+
kwargs: dict = None,
|
|
86
|
+
) -> None:
|
|
80
87
|
"""
|
|
81
88
|
Starts a new process for handling Polars dataframe operations.
|
|
82
89
|
|
|
@@ -97,24 +104,31 @@ def start_process(polars_serializable_object: bytes, task_id: str,
|
|
|
97
104
|
if kwargs is None:
|
|
98
105
|
kwargs = {}
|
|
99
106
|
process_task = getattr(funcs, operation)
|
|
100
|
-
kwargs[
|
|
101
|
-
kwargs[
|
|
102
|
-
kwargs[
|
|
103
|
-
kwargs[
|
|
104
|
-
kwargs[
|
|
105
|
-
kwargs[
|
|
106
|
-
kwargs[
|
|
107
|
+
kwargs["polars_serializable_object"] = polars_serializable_object
|
|
108
|
+
kwargs["progress"] = mp_context.Value("i", 0)
|
|
109
|
+
kwargs["error_message"] = mp_context.Array("c", 1024)
|
|
110
|
+
kwargs["queue"] = mp_context.Queue(maxsize=1)
|
|
111
|
+
kwargs["file_path"] = file_ref
|
|
112
|
+
kwargs["flowfile_flow_id"] = flowfile_flow_id
|
|
113
|
+
kwargs["flowfile_node_id"] = flowfile_node_id
|
|
107
114
|
|
|
108
115
|
p: Process = mp_context.Process(target=process_task, kwargs=kwargs)
|
|
109
116
|
p.start()
|
|
110
117
|
|
|
111
118
|
process_manager.add_process(task_id, p)
|
|
112
|
-
handle_task(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
119
|
+
handle_task(
|
|
120
|
+
task_id=task_id, p=p, progress=kwargs["progress"], error_message=kwargs["error_message"], q=kwargs["queue"]
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def start_generic_process(
|
|
125
|
+
func_ref: callable,
|
|
126
|
+
task_id: str,
|
|
127
|
+
file_ref: str,
|
|
128
|
+
flowfile_flow_id: int,
|
|
129
|
+
flowfile_node_id: flowfile_node_id_type,
|
|
130
|
+
kwargs: dict = None,
|
|
131
|
+
) -> None:
|
|
118
132
|
"""
|
|
119
133
|
Starts a new process for handling generic function execution.
|
|
120
134
|
|
|
@@ -132,30 +146,33 @@ def start_generic_process(func_ref: callable, task_id: str,
|
|
|
132
146
|
- Delegates to handle_task for process monitoring
|
|
133
147
|
"""
|
|
134
148
|
kwargs = {} if kwargs is None else kwargs
|
|
135
|
-
kwargs[
|
|
136
|
-
kwargs[
|
|
137
|
-
kwargs[
|
|
138
|
-
kwargs[
|
|
139
|
-
kwargs[
|
|
140
|
-
kwargs[
|
|
141
|
-
kwargs[
|
|
142
|
-
|
|
143
|
-
process_task =
|
|
149
|
+
kwargs["func"] = func_ref
|
|
150
|
+
kwargs["progress"] = mp_context.Value("i", 0)
|
|
151
|
+
kwargs["error_message"] = mp_context.Array("c", 1024)
|
|
152
|
+
kwargs["queue"] = mp_context.Queue(maxsize=1)
|
|
153
|
+
kwargs["file_path"] = file_ref
|
|
154
|
+
kwargs["flowfile_flow_id"] = flowfile_flow_id
|
|
155
|
+
kwargs["flowfile_node_id"] = flowfile_node_id
|
|
156
|
+
|
|
157
|
+
process_task = funcs.generic_task
|
|
144
158
|
p: Process = mp_context.Process(target=process_task, kwargs=kwargs)
|
|
145
159
|
p.start()
|
|
146
160
|
|
|
147
161
|
process_manager.add_process(task_id, p) # Add process to process manager
|
|
148
|
-
handle_task(
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
162
|
+
handle_task(
|
|
163
|
+
task_id=task_id, p=p, progress=kwargs["progress"], error_message=kwargs["error_message"], q=kwargs["queue"]
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def start_fuzzy_process(
|
|
168
|
+
left_serializable_object: bytes,
|
|
169
|
+
right_serializable_object: bytes,
|
|
170
|
+
file_ref: str,
|
|
171
|
+
fuzzy_maps: list[models.FuzzyMapping],
|
|
172
|
+
task_id: str,
|
|
173
|
+
flowfile_flow_id: int,
|
|
174
|
+
flowfile_node_id: flowfile_node_id_type,
|
|
175
|
+
) -> None:
|
|
159
176
|
"""
|
|
160
177
|
Starts a new process for performing fuzzy joining operations on two datasets.
|
|
161
178
|
|
|
@@ -172,13 +189,31 @@ def start_fuzzy_process(left_serializable_object: bytes,
|
|
|
172
189
|
- Initializes and starts a new process for fuzzy joining operation
|
|
173
190
|
- Delegates to handle_task for process monitoring
|
|
174
191
|
"""
|
|
175
|
-
progress = mp_context.Value(
|
|
176
|
-
error_message = mp_context.Array(
|
|
177
|
-
q = Queue(maxsize=1)
|
|
178
|
-
|
|
179
|
-
args:
|
|
180
|
-
|
|
181
|
-
|
|
192
|
+
progress = mp_context.Value("i", 0)
|
|
193
|
+
error_message = mp_context.Array("c", 1024)
|
|
194
|
+
q = mp_context.Queue(maxsize=1)
|
|
195
|
+
|
|
196
|
+
args: tuple[
|
|
197
|
+
bytes,
|
|
198
|
+
bytes,
|
|
199
|
+
list[models.FuzzyMapping],
|
|
200
|
+
mp_context.Array,
|
|
201
|
+
str,
|
|
202
|
+
mp_context.Value,
|
|
203
|
+
Queue,
|
|
204
|
+
int,
|
|
205
|
+
flowfile_node_id_type,
|
|
206
|
+
] = (
|
|
207
|
+
left_serializable_object,
|
|
208
|
+
right_serializable_object,
|
|
209
|
+
fuzzy_maps,
|
|
210
|
+
error_message,
|
|
211
|
+
file_ref,
|
|
212
|
+
progress,
|
|
213
|
+
q,
|
|
214
|
+
flowfile_flow_id,
|
|
215
|
+
flowfile_node_id,
|
|
216
|
+
)
|
|
182
217
|
|
|
183
218
|
p: Process = mp_context.Process(target=funcs.fuzzy_join_task, args=args)
|
|
184
219
|
p.start()
|
flowfile_worker/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
1
3
|
import polars as pl
|
|
2
4
|
from polars.exceptions import PanicException
|
|
3
|
-
from dataclasses import dataclass
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def collect_lazy_frame(lf: pl.LazyFrame) -> pl.DataFrame:
|
|
@@ -12,7 +13,7 @@ def collect_lazy_frame(lf: pl.LazyFrame) -> pl.DataFrame:
|
|
|
12
13
|
|
|
13
14
|
@dataclass
|
|
14
15
|
class CollectStreamingInfo:
|
|
15
|
-
__slots__ =
|
|
16
|
+
__slots__ = "df", "streaming_collect_available"
|
|
16
17
|
df: pl.DataFrame
|
|
17
18
|
streaming_collect_available: bool
|
|
18
19
|
|
shared/__init__.py
CHANGED
|
@@ -5,11 +5,6 @@ flowfile_core, flowfile_worker, and other components without creating
|
|
|
5
5
|
circular dependencies.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from .storage_config import
|
|
8
|
+
from .storage_config import get_cache_directory, get_flows_directory, get_temp_directory, storage
|
|
9
9
|
|
|
10
|
-
__all__ = [
|
|
11
|
-
'storage',
|
|
12
|
-
'get_cache_directory',
|
|
13
|
-
'get_temp_directory',
|
|
14
|
-
'get_flows_directory'
|
|
15
|
-
]
|
|
10
|
+
__all__ = ["storage", "get_cache_directory", "get_temp_directory", "get_flows_directory"]
|
shared/storage_config.py
CHANGED
|
@@ -3,28 +3,40 @@
|
|
|
3
3
|
Centralized storage configuration for Flowfile.
|
|
4
4
|
This module can be imported by both core and worker without creating dependencies.
|
|
5
5
|
"""
|
|
6
|
+
|
|
6
7
|
import os
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
DirectoryOptions = Literal[
|
|
12
|
+
"temp_directory",
|
|
13
|
+
"logs_directory",
|
|
14
|
+
"system_logs_directory",
|
|
15
|
+
"database_directory",
|
|
16
|
+
"cache_directory",
|
|
17
|
+
"flows_directory",
|
|
18
|
+
"user_defined_nodes_directory",
|
|
19
|
+
]
|
|
20
|
+
|
|
9
21
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
22
|
+
def _is_docker_mode() -> bool:
|
|
23
|
+
"""Check if running in Docker mode based on FLOWFILE_MODE."""
|
|
24
|
+
return os.environ.get("FLOWFILE_MODE") == "docker"
|
|
13
25
|
|
|
14
26
|
|
|
15
27
|
class FlowfileStorage:
|
|
16
28
|
"""Centralized storage manager for Flowfile applications."""
|
|
17
29
|
|
|
18
30
|
def __init__(self):
|
|
19
|
-
self._base_dir:
|
|
20
|
-
self._user_data_dir:
|
|
31
|
+
self._base_dir: Path | None = None
|
|
32
|
+
self._user_data_dir: Path | None = None
|
|
21
33
|
self._ensure_directories()
|
|
22
34
|
|
|
23
35
|
@property
|
|
24
36
|
def base_directory(self) -> Path:
|
|
25
37
|
"""Get the base Flowfile storage directory (for internal container communication)."""
|
|
26
38
|
if self._base_dir is None:
|
|
27
|
-
if
|
|
39
|
+
if _is_docker_mode():
|
|
28
40
|
# In Docker, internal storage stays inside /app
|
|
29
41
|
base_path = os.environ.get("FLOWFILE_STORAGE_DIR", "/app/internal_storage")
|
|
30
42
|
else:
|
|
@@ -41,7 +53,7 @@ class FlowfileStorage:
|
|
|
41
53
|
def user_data_directory(self) -> Path:
|
|
42
54
|
"""Get the user data directory (completely separate from application code)."""
|
|
43
55
|
if self._user_data_dir is None:
|
|
44
|
-
if
|
|
56
|
+
if _is_docker_mode():
|
|
45
57
|
# In Docker, user data is at /data/user (completely outside /app)
|
|
46
58
|
user_data_path = os.environ.get("FLOWFILE_USER_DATA_DIR", "/data/user")
|
|
47
59
|
else:
|
|
@@ -70,7 +82,7 @@ class FlowfileStorage:
|
|
|
70
82
|
@property
|
|
71
83
|
def flows_directory(self) -> Path:
|
|
72
84
|
"""Directory for flow storage (user-accessible)."""
|
|
73
|
-
if
|
|
85
|
+
if _is_docker_mode():
|
|
74
86
|
# In Docker, flows are in separate user data area
|
|
75
87
|
return self.user_data_directory / "flows"
|
|
76
88
|
else:
|
|
@@ -80,7 +92,7 @@ class FlowfileStorage:
|
|
|
80
92
|
@property
|
|
81
93
|
def uploads_directory(self) -> Path:
|
|
82
94
|
"""Directory for user uploads (user-accessible)."""
|
|
83
|
-
if
|
|
95
|
+
if _is_docker_mode():
|
|
84
96
|
# In Docker, uploads are in separate user data area
|
|
85
97
|
return self.user_data_directory / "uploads"
|
|
86
98
|
else:
|
|
@@ -90,7 +102,7 @@ class FlowfileStorage:
|
|
|
90
102
|
@property
|
|
91
103
|
def user_defined_nodes_directory(self) -> Path:
|
|
92
104
|
"""Directory for user-defined custom nodes (user-accessible)."""
|
|
93
|
-
if
|
|
105
|
+
if _is_docker_mode():
|
|
94
106
|
return self.user_data_directory / "user_defined_nodes"
|
|
95
107
|
else:
|
|
96
108
|
return self.base_directory / "user_defined_nodes"
|
|
@@ -103,7 +115,7 @@ class FlowfileStorage:
|
|
|
103
115
|
@property
|
|
104
116
|
def outputs_directory(self) -> Path:
|
|
105
117
|
"""Directory for user outputs (user-accessible)."""
|
|
106
|
-
if
|
|
118
|
+
if _is_docker_mode():
|
|
107
119
|
# In Docker, outputs are in separate user data area
|
|
108
120
|
return self.user_data_directory / "outputs"
|
|
109
121
|
else:
|
|
@@ -184,8 +196,8 @@ class FlowfileStorage:
|
|
|
184
196
|
|
|
185
197
|
def cleanup_directory(self, directory_option: DirectoryOptions, storage_duration_hours: int = 24) -> None:
|
|
186
198
|
"""Clean up any directory of the folder"""
|
|
187
|
-
import time
|
|
188
199
|
import shutil
|
|
200
|
+
import time
|
|
189
201
|
|
|
190
202
|
if not hasattr(self, directory_option):
|
|
191
203
|
raise Exception(f"Directory does not exist in {self.base_directory}")
|
test_utils/postgres/commands.py
CHANGED
|
@@ -6,8 +6,9 @@ to start and stop PostgreSQL containers with sample data.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import argparse
|
|
9
|
-
import sys
|
|
10
9
|
import logging
|
|
10
|
+
import sys
|
|
11
|
+
|
|
11
12
|
from . import fixtures
|
|
12
13
|
|
|
13
14
|
# Set up logging
|
|
@@ -106,4 +107,4 @@ if __name__ == "__main__":
|
|
|
106
107
|
sys.exit(stop_postgres())
|
|
107
108
|
else:
|
|
108
109
|
print("Usage: python -m test_utils.postgres.commands [start|stop] [options]")
|
|
109
|
-
sys.exit(1)
|
|
110
|
+
sys.exit(1)
|
test_utils/postgres/fixtures.py
CHANGED
|
@@ -5,13 +5,13 @@ This module provides utilities to set up, manage, and tear down PostgreSQL
|
|
|
5
5
|
containers with sample data for testing.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import os
|
|
9
|
-
import time
|
|
10
8
|
import logging
|
|
11
|
-
import
|
|
9
|
+
import os
|
|
12
10
|
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import time
|
|
13
|
+
from collections.abc import Generator
|
|
13
14
|
from contextlib import contextmanager
|
|
14
|
-
from typing import Dict, Generator, Optional, Tuple
|
|
15
15
|
|
|
16
16
|
# Configure logging
|
|
17
17
|
logging.basicConfig(
|
|
@@ -208,7 +208,7 @@ def start_postgres_container(
|
|
|
208
208
|
container_name: str = POSTGRES_CONTAINER_NAME,
|
|
209
209
|
port: int = POSTGRES_PORT,
|
|
210
210
|
image_tag: str = POSTGRES_IMAGE_TAG
|
|
211
|
-
) ->
|
|
211
|
+
) -> tuple[subprocess.Popen | None, bool]:
|
|
212
212
|
"""
|
|
213
213
|
Start the PostgreSQL container with sample data.
|
|
214
214
|
|
|
@@ -225,7 +225,7 @@ def start_postgres_container(
|
|
|
225
225
|
logger.warning("Docker not available, skipping PostgreSQL container start")
|
|
226
226
|
return None, False
|
|
227
227
|
|
|
228
|
-
logger.info(
|
|
228
|
+
logger.info("Starting PostgreSQL container with sample data...")
|
|
229
229
|
|
|
230
230
|
# Check if container is already running
|
|
231
231
|
if is_container_running(container_name):
|
|
@@ -345,12 +345,12 @@ def print_connection_info(
|
|
|
345
345
|
print(f"Connection string: postgresql://{user}:{password}@{host}:{port}/{db}")
|
|
346
346
|
print("=" * 50)
|
|
347
347
|
print("\nTo stop the container, run:")
|
|
348
|
-
print(
|
|
348
|
+
print("poetry run stop_postgres")
|
|
349
349
|
print("=" * 50 + "\n")
|
|
350
350
|
|
|
351
351
|
|
|
352
352
|
@contextmanager
|
|
353
|
-
def managed_postgres() -> Generator[
|
|
353
|
+
def managed_postgres() -> Generator[dict[str, any], None, None]:
|
|
354
354
|
"""
|
|
355
355
|
Context manager for PostgreSQL container management.
|
|
356
356
|
Ensures proper cleanup even when tests fail.
|
|
@@ -414,4 +414,4 @@ def get_db_engine():
|
|
|
414
414
|
return engine
|
|
415
415
|
except ImportError:
|
|
416
416
|
logger.error("SQLAlchemy not installed. Run: pip install sqlalchemy")
|
|
417
|
-
raise
|
|
417
|
+
raise
|
test_utils/s3/commands.py
CHANGED
test_utils/s3/data_generator.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
|
|
2
|
-
import logging
|
|
3
2
|
import io
|
|
3
|
+
import logging
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
6
|
# Third-party libraries
|
|
7
7
|
import boto3
|
|
8
|
-
from botocore.client import Config
|
|
9
8
|
import polars as pl
|
|
10
9
|
import pyarrow as pa
|
|
10
|
+
from botocore.client import Config
|
|
11
11
|
from deltalake import write_deltalake
|
|
12
12
|
from pyiceberg.catalog import load_catalog
|
|
13
13
|
|
|
14
|
-
|
|
15
14
|
# Configure logging
|
|
16
15
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
@@ -289,4 +288,4 @@ if __name__ == '__main__':
|
|
|
289
288
|
populate_test_data(endpoint_url=MINIO_ENDPOINT_URL,
|
|
290
289
|
access_key=MINIO_ACCESS_KEY,
|
|
291
290
|
secret_key=MINIO_SECRET_KEY,
|
|
292
|
-
bucket_name="test-bucket")
|
|
291
|
+
bucket_name="test-bucket")
|