Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +179 -73
- flowfile/__main__.py +10 -7
- flowfile/api.py +52 -59
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
- flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
- flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
- flowfile-0.5.3.dist-info/RECORD +402 -0
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +13 -3
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +391 -279
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
- flowfile_core/flowfile/flow_graph.py +1011 -561
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +332 -232
- flowfile_core/flowfile/flow_node/models.py +54 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +82 -32
- flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
- flowfile_core/flowfile/manage/io_flowfile.py +391 -0
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +136 -35
- flowfile_core/flowfile/schema_callbacks.py +77 -54
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +72 -55
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +77 -43
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -55
- flowfile_core/schemas/input_schema.py +398 -154
- flowfile_core/schemas/output_model.py +50 -35
- flowfile_core/schemas/schemas.py +207 -67
- flowfile_core/schemas/transform_schema.py +1360 -435
- flowfile_core/schemas/yaml_types.py +117 -0
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +107 -50
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +581 -489
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +236 -252
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -4
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -27
- flowfile_worker/create/funcs.py +143 -94
- flowfile_worker/create/models.py +139 -68
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -93
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +118 -0
- tools/migrate/legacy_schemas.py +682 -0
- tools/migrate/migrate.py +610 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +21 -0
- tools/migrate/tests/test_migrate.py +622 -0
- tools/migrate/tests/test_migration_e2e.py +1009 -0
- tools/migrate/tests/test_node_migrations.py +843 -0
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-812dcbca.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-538058f3.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.4.1.dist-info/RECORD +0 -376
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
flowfile_worker/spawner.py
CHANGED
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
from flowfile_worker import status_dict
|
|
2
|
-
from time import sleep
|
|
3
1
|
import gc
|
|
4
|
-
from typing import List, Tuple
|
|
5
2
|
from multiprocessing import Process, Queue
|
|
3
|
+
from time import sleep
|
|
4
|
+
|
|
5
|
+
from flowfile_worker import funcs, models, mp_context, status_dict, status_dict_lock
|
|
6
6
|
from flowfile_worker.process_manager import ProcessManager
|
|
7
|
-
from flowfile_worker import models, mp_context, funcs, status_dict_lock
|
|
8
7
|
|
|
9
8
|
# Initialize ProcessManager
|
|
10
9
|
process_manager = ProcessManager()
|
|
11
10
|
|
|
12
|
-
flowfile_node_id_type = int|str
|
|
11
|
+
flowfile_node_id_type = int | str
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_message: mp_context.Array, q: Queue):
|
|
@@ -48,7 +47,7 @@ def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_mess
|
|
|
48
47
|
with status_dict_lock:
|
|
49
48
|
status_dict[task_id].status = "Error"
|
|
50
49
|
with error_message.get_lock():
|
|
51
|
-
status_dict[task_id].error_message = error_message.value.decode().rstrip(
|
|
50
|
+
status_dict[task_id].error_message = error_message.value.decode().rstrip("\x00")
|
|
52
51
|
break
|
|
53
52
|
|
|
54
53
|
p.join()
|
|
@@ -72,11 +71,15 @@ def handle_task(task_id: str, p: Process, progress: mp_context.Value, error_mess
|
|
|
72
71
|
gc.collect()
|
|
73
72
|
|
|
74
73
|
|
|
75
|
-
def start_process(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
def start_process(
|
|
75
|
+
polars_serializable_object: bytes,
|
|
76
|
+
task_id: str,
|
|
77
|
+
operation: models.OperationType,
|
|
78
|
+
file_ref: str,
|
|
79
|
+
flowfile_flow_id: int,
|
|
80
|
+
flowfile_node_id: flowfile_node_id_type,
|
|
81
|
+
kwargs: dict = None,
|
|
82
|
+
) -> None:
|
|
80
83
|
"""
|
|
81
84
|
Starts a new process for handling Polars dataframe operations.
|
|
82
85
|
|
|
@@ -97,24 +100,31 @@ def start_process(polars_serializable_object: bytes, task_id: str,
|
|
|
97
100
|
if kwargs is None:
|
|
98
101
|
kwargs = {}
|
|
99
102
|
process_task = getattr(funcs, operation)
|
|
100
|
-
kwargs[
|
|
101
|
-
kwargs[
|
|
102
|
-
kwargs[
|
|
103
|
-
kwargs[
|
|
104
|
-
kwargs[
|
|
105
|
-
kwargs[
|
|
106
|
-
kwargs[
|
|
103
|
+
kwargs["polars_serializable_object"] = polars_serializable_object
|
|
104
|
+
kwargs["progress"] = mp_context.Value("i", 0)
|
|
105
|
+
kwargs["error_message"] = mp_context.Array("c", 1024)
|
|
106
|
+
kwargs["queue"] = Queue(maxsize=1)
|
|
107
|
+
kwargs["file_path"] = file_ref
|
|
108
|
+
kwargs["flowfile_flow_id"] = flowfile_flow_id
|
|
109
|
+
kwargs["flowfile_node_id"] = flowfile_node_id
|
|
107
110
|
|
|
108
111
|
p: Process = mp_context.Process(target=process_task, kwargs=kwargs)
|
|
109
112
|
p.start()
|
|
110
113
|
|
|
111
114
|
process_manager.add_process(task_id, p)
|
|
112
|
-
handle_task(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
115
|
+
handle_task(
|
|
116
|
+
task_id=task_id, p=p, progress=kwargs["progress"], error_message=kwargs["error_message"], q=kwargs["queue"]
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def start_generic_process(
|
|
121
|
+
func_ref: callable,
|
|
122
|
+
task_id: str,
|
|
123
|
+
file_ref: str,
|
|
124
|
+
flowfile_flow_id: int,
|
|
125
|
+
flowfile_node_id: flowfile_node_id_type,
|
|
126
|
+
kwargs: dict = None,
|
|
127
|
+
) -> None:
|
|
118
128
|
"""
|
|
119
129
|
Starts a new process for handling generic function execution.
|
|
120
130
|
|
|
@@ -132,30 +142,33 @@ def start_generic_process(func_ref: callable, task_id: str,
|
|
|
132
142
|
- Delegates to handle_task for process monitoring
|
|
133
143
|
"""
|
|
134
144
|
kwargs = {} if kwargs is None else kwargs
|
|
135
|
-
kwargs[
|
|
136
|
-
kwargs[
|
|
137
|
-
kwargs[
|
|
138
|
-
kwargs[
|
|
139
|
-
kwargs[
|
|
140
|
-
kwargs[
|
|
141
|
-
kwargs[
|
|
142
|
-
|
|
143
|
-
process_task =
|
|
145
|
+
kwargs["func"] = func_ref
|
|
146
|
+
kwargs["progress"] = mp_context.Value("i", 0)
|
|
147
|
+
kwargs["error_message"] = mp_context.Array("c", 1024)
|
|
148
|
+
kwargs["queue"] = Queue(maxsize=1)
|
|
149
|
+
kwargs["file_path"] = file_ref
|
|
150
|
+
kwargs["flowfile_flow_id"] = flowfile_flow_id
|
|
151
|
+
kwargs["flowfile_node_id"] = flowfile_node_id
|
|
152
|
+
|
|
153
|
+
process_task = funcs.generic_task
|
|
144
154
|
p: Process = mp_context.Process(target=process_task, kwargs=kwargs)
|
|
145
155
|
p.start()
|
|
146
156
|
|
|
147
157
|
process_manager.add_process(task_id, p) # Add process to process manager
|
|
148
|
-
handle_task(
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
158
|
+
handle_task(
|
|
159
|
+
task_id=task_id, p=p, progress=kwargs["progress"], error_message=kwargs["error_message"], q=kwargs["queue"]
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def start_fuzzy_process(
|
|
164
|
+
left_serializable_object: bytes,
|
|
165
|
+
right_serializable_object: bytes,
|
|
166
|
+
file_ref: str,
|
|
167
|
+
fuzzy_maps: list[models.FuzzyMapping],
|
|
168
|
+
task_id: str,
|
|
169
|
+
flowfile_flow_id: int,
|
|
170
|
+
flowfile_node_id: flowfile_node_id_type,
|
|
171
|
+
) -> None:
|
|
159
172
|
"""
|
|
160
173
|
Starts a new process for performing fuzzy joining operations on two datasets.
|
|
161
174
|
|
|
@@ -172,13 +185,31 @@ def start_fuzzy_process(left_serializable_object: bytes,
|
|
|
172
185
|
- Initializes and starts a new process for fuzzy joining operation
|
|
173
186
|
- Delegates to handle_task for process monitoring
|
|
174
187
|
"""
|
|
175
|
-
progress = mp_context.Value(
|
|
176
|
-
error_message = mp_context.Array(
|
|
188
|
+
progress = mp_context.Value("i", 0)
|
|
189
|
+
error_message = mp_context.Array("c", 1024)
|
|
177
190
|
q = Queue(maxsize=1)
|
|
178
191
|
|
|
179
|
-
args:
|
|
180
|
-
|
|
181
|
-
|
|
192
|
+
args: tuple[
|
|
193
|
+
bytes,
|
|
194
|
+
bytes,
|
|
195
|
+
list[models.FuzzyMapping],
|
|
196
|
+
mp_context.Array,
|
|
197
|
+
str,
|
|
198
|
+
mp_context.Value,
|
|
199
|
+
Queue,
|
|
200
|
+
int,
|
|
201
|
+
flowfile_node_id_type,
|
|
202
|
+
] = (
|
|
203
|
+
left_serializable_object,
|
|
204
|
+
right_serializable_object,
|
|
205
|
+
fuzzy_maps,
|
|
206
|
+
error_message,
|
|
207
|
+
file_ref,
|
|
208
|
+
progress,
|
|
209
|
+
q,
|
|
210
|
+
flowfile_flow_id,
|
|
211
|
+
flowfile_node_id,
|
|
212
|
+
)
|
|
182
213
|
|
|
183
214
|
p: Process = mp_context.Process(target=funcs.fuzzy_join_task, args=args)
|
|
184
215
|
p.start()
|
flowfile_worker/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
1
3
|
import polars as pl
|
|
2
4
|
from polars.exceptions import PanicException
|
|
3
|
-
from dataclasses import dataclass
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def collect_lazy_frame(lf: pl.LazyFrame) -> pl.DataFrame:
|
|
@@ -12,7 +13,7 @@ def collect_lazy_frame(lf: pl.LazyFrame) -> pl.DataFrame:
|
|
|
12
13
|
|
|
13
14
|
@dataclass
|
|
14
15
|
class CollectStreamingInfo:
|
|
15
|
-
__slots__ =
|
|
16
|
+
__slots__ = "df", "streaming_collect_available"
|
|
16
17
|
df: pl.DataFrame
|
|
17
18
|
streaming_collect_available: bool
|
|
18
19
|
|
shared/__init__.py
CHANGED
|
@@ -5,11 +5,6 @@ flowfile_core, flowfile_worker, and other components without creating
|
|
|
5
5
|
circular dependencies.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from .storage_config import
|
|
8
|
+
from .storage_config import get_cache_directory, get_flows_directory, get_temp_directory, storage
|
|
9
9
|
|
|
10
|
-
__all__ = [
|
|
11
|
-
'storage',
|
|
12
|
-
'get_cache_directory',
|
|
13
|
-
'get_temp_directory',
|
|
14
|
-
'get_flows_directory'
|
|
15
|
-
]
|
|
10
|
+
__all__ = ["storage", "get_cache_directory", "get_temp_directory", "get_flows_directory"]
|
shared/storage_config.py
CHANGED
|
@@ -3,28 +3,40 @@
|
|
|
3
3
|
Centralized storage configuration for Flowfile.
|
|
4
4
|
This module can be imported by both core and worker without creating dependencies.
|
|
5
5
|
"""
|
|
6
|
+
|
|
6
7
|
import os
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
DirectoryOptions = Literal[
|
|
12
|
+
"temp_directory",
|
|
13
|
+
"logs_directory",
|
|
14
|
+
"system_logs_directory",
|
|
15
|
+
"database_directory",
|
|
16
|
+
"cache_directory",
|
|
17
|
+
"flows_directory",
|
|
18
|
+
"user_defined_nodes_directory",
|
|
19
|
+
]
|
|
20
|
+
|
|
9
21
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
22
|
+
def _is_docker_mode() -> bool:
|
|
23
|
+
"""Check if running in Docker mode based on FLOWFILE_MODE."""
|
|
24
|
+
return os.environ.get("FLOWFILE_MODE") == "docker"
|
|
13
25
|
|
|
14
26
|
|
|
15
27
|
class FlowfileStorage:
|
|
16
28
|
"""Centralized storage manager for Flowfile applications."""
|
|
17
29
|
|
|
18
30
|
def __init__(self):
|
|
19
|
-
self._base_dir:
|
|
20
|
-
self._user_data_dir:
|
|
31
|
+
self._base_dir: Path | None = None
|
|
32
|
+
self._user_data_dir: Path | None = None
|
|
21
33
|
self._ensure_directories()
|
|
22
34
|
|
|
23
35
|
@property
|
|
24
36
|
def base_directory(self) -> Path:
|
|
25
37
|
"""Get the base Flowfile storage directory (for internal container communication)."""
|
|
26
38
|
if self._base_dir is None:
|
|
27
|
-
if
|
|
39
|
+
if _is_docker_mode():
|
|
28
40
|
# In Docker, internal storage stays inside /app
|
|
29
41
|
base_path = os.environ.get("FLOWFILE_STORAGE_DIR", "/app/internal_storage")
|
|
30
42
|
else:
|
|
@@ -41,7 +53,7 @@ class FlowfileStorage:
|
|
|
41
53
|
def user_data_directory(self) -> Path:
|
|
42
54
|
"""Get the user data directory (completely separate from application code)."""
|
|
43
55
|
if self._user_data_dir is None:
|
|
44
|
-
if
|
|
56
|
+
if _is_docker_mode():
|
|
45
57
|
# In Docker, user data is at /data/user (completely outside /app)
|
|
46
58
|
user_data_path = os.environ.get("FLOWFILE_USER_DATA_DIR", "/data/user")
|
|
47
59
|
else:
|
|
@@ -70,7 +82,7 @@ class FlowfileStorage:
|
|
|
70
82
|
@property
|
|
71
83
|
def flows_directory(self) -> Path:
|
|
72
84
|
"""Directory for flow storage (user-accessible)."""
|
|
73
|
-
if
|
|
85
|
+
if _is_docker_mode():
|
|
74
86
|
# In Docker, flows are in separate user data area
|
|
75
87
|
return self.user_data_directory / "flows"
|
|
76
88
|
else:
|
|
@@ -80,7 +92,7 @@ class FlowfileStorage:
|
|
|
80
92
|
@property
|
|
81
93
|
def uploads_directory(self) -> Path:
|
|
82
94
|
"""Directory for user uploads (user-accessible)."""
|
|
83
|
-
if
|
|
95
|
+
if _is_docker_mode():
|
|
84
96
|
# In Docker, uploads are in separate user data area
|
|
85
97
|
return self.user_data_directory / "uploads"
|
|
86
98
|
else:
|
|
@@ -90,7 +102,7 @@ class FlowfileStorage:
|
|
|
90
102
|
@property
|
|
91
103
|
def user_defined_nodes_directory(self) -> Path:
|
|
92
104
|
"""Directory for user-defined custom nodes (user-accessible)."""
|
|
93
|
-
if
|
|
105
|
+
if _is_docker_mode():
|
|
94
106
|
return self.user_data_directory / "user_defined_nodes"
|
|
95
107
|
else:
|
|
96
108
|
return self.base_directory / "user_defined_nodes"
|
|
@@ -103,7 +115,7 @@ class FlowfileStorage:
|
|
|
103
115
|
@property
|
|
104
116
|
def outputs_directory(self) -> Path:
|
|
105
117
|
"""Directory for user outputs (user-accessible)."""
|
|
106
|
-
if
|
|
118
|
+
if _is_docker_mode():
|
|
107
119
|
# In Docker, outputs are in separate user data area
|
|
108
120
|
return self.user_data_directory / "outputs"
|
|
109
121
|
else:
|
|
@@ -184,8 +196,8 @@ class FlowfileStorage:
|
|
|
184
196
|
|
|
185
197
|
def cleanup_directory(self, directory_option: DirectoryOptions, storage_duration_hours: int = 24) -> None:
|
|
186
198
|
"""Clean up any directory of the folder"""
|
|
187
|
-
import time
|
|
188
199
|
import shutil
|
|
200
|
+
import time
|
|
189
201
|
|
|
190
202
|
if not hasattr(self, directory_option):
|
|
191
203
|
raise Exception(f"Directory does not exist in {self.base_directory}")
|
test_utils/postgres/commands.py
CHANGED
|
@@ -6,8 +6,9 @@ to start and stop PostgreSQL containers with sample data.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import argparse
|
|
9
|
-
import sys
|
|
10
9
|
import logging
|
|
10
|
+
import sys
|
|
11
|
+
|
|
11
12
|
from . import fixtures
|
|
12
13
|
|
|
13
14
|
# Set up logging
|
|
@@ -106,4 +107,4 @@ if __name__ == "__main__":
|
|
|
106
107
|
sys.exit(stop_postgres())
|
|
107
108
|
else:
|
|
108
109
|
print("Usage: python -m test_utils.postgres.commands [start|stop] [options]")
|
|
109
|
-
sys.exit(1)
|
|
110
|
+
sys.exit(1)
|
test_utils/postgres/fixtures.py
CHANGED
|
@@ -5,13 +5,13 @@ This module provides utilities to set up, manage, and tear down PostgreSQL
|
|
|
5
5
|
containers with sample data for testing.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
import os
|
|
9
|
-
import time
|
|
10
8
|
import logging
|
|
11
|
-
import
|
|
9
|
+
import os
|
|
12
10
|
import shutil
|
|
11
|
+
import subprocess
|
|
12
|
+
import time
|
|
13
|
+
from collections.abc import Generator
|
|
13
14
|
from contextlib import contextmanager
|
|
14
|
-
from typing import Dict, Generator, Optional, Tuple
|
|
15
15
|
|
|
16
16
|
# Configure logging
|
|
17
17
|
logging.basicConfig(
|
|
@@ -208,7 +208,7 @@ def start_postgres_container(
|
|
|
208
208
|
container_name: str = POSTGRES_CONTAINER_NAME,
|
|
209
209
|
port: int = POSTGRES_PORT,
|
|
210
210
|
image_tag: str = POSTGRES_IMAGE_TAG
|
|
211
|
-
) ->
|
|
211
|
+
) -> tuple[subprocess.Popen | None, bool]:
|
|
212
212
|
"""
|
|
213
213
|
Start the PostgreSQL container with sample data.
|
|
214
214
|
|
|
@@ -225,7 +225,7 @@ def start_postgres_container(
|
|
|
225
225
|
logger.warning("Docker not available, skipping PostgreSQL container start")
|
|
226
226
|
return None, False
|
|
227
227
|
|
|
228
|
-
logger.info(
|
|
228
|
+
logger.info("Starting PostgreSQL container with sample data...")
|
|
229
229
|
|
|
230
230
|
# Check if container is already running
|
|
231
231
|
if is_container_running(container_name):
|
|
@@ -345,12 +345,12 @@ def print_connection_info(
|
|
|
345
345
|
print(f"Connection string: postgresql://{user}:{password}@{host}:{port}/{db}")
|
|
346
346
|
print("=" * 50)
|
|
347
347
|
print("\nTo stop the container, run:")
|
|
348
|
-
print(
|
|
348
|
+
print("poetry run stop_postgres")
|
|
349
349
|
print("=" * 50 + "\n")
|
|
350
350
|
|
|
351
351
|
|
|
352
352
|
@contextmanager
|
|
353
|
-
def managed_postgres() -> Generator[
|
|
353
|
+
def managed_postgres() -> Generator[dict[str, any], None, None]:
|
|
354
354
|
"""
|
|
355
355
|
Context manager for PostgreSQL container management.
|
|
356
356
|
Ensures proper cleanup even when tests fail.
|
|
@@ -414,4 +414,4 @@ def get_db_engine():
|
|
|
414
414
|
return engine
|
|
415
415
|
except ImportError:
|
|
416
416
|
logger.error("SQLAlchemy not installed. Run: pip install sqlalchemy")
|
|
417
|
-
raise
|
|
417
|
+
raise
|
test_utils/s3/commands.py
CHANGED
test_utils/s3/data_generator.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
|
|
2
|
-
import logging
|
|
3
2
|
import io
|
|
3
|
+
import logging
|
|
4
4
|
import os
|
|
5
5
|
|
|
6
6
|
# Third-party libraries
|
|
7
7
|
import boto3
|
|
8
|
-
from botocore.client import Config
|
|
9
8
|
import polars as pl
|
|
10
9
|
import pyarrow as pa
|
|
10
|
+
from botocore.client import Config
|
|
11
11
|
from deltalake import write_deltalake
|
|
12
12
|
from pyiceberg.catalog import load_catalog
|
|
13
13
|
|
|
14
|
-
|
|
15
14
|
# Configure logging
|
|
16
15
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
17
16
|
logger = logging.getLogger(__name__)
|
|
@@ -289,4 +288,4 @@ if __name__ == '__main__':
|
|
|
289
288
|
populate_test_data(endpoint_url=MINIO_ENDPOINT_URL,
|
|
290
289
|
access_key=MINIO_ACCESS_KEY,
|
|
291
290
|
secret_key=MINIO_SECRET_KEY,
|
|
292
|
-
bucket_name="test-bucket")
|
|
291
|
+
bucket_name="test-bucket")
|
|
@@ -1,17 +1,14 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import io
|
|
2
|
+
import logging
|
|
3
3
|
import os
|
|
4
|
-
import tempfile
|
|
5
|
-
import shutil
|
|
6
4
|
import random
|
|
5
|
+
import tempfile
|
|
7
6
|
from datetime import datetime, timedelta
|
|
8
7
|
|
|
9
8
|
# Third-party libraries
|
|
10
9
|
import boto3
|
|
11
|
-
from botocore.client import Config
|
|
12
10
|
import polars as pl
|
|
13
|
-
|
|
14
|
-
from pyarrow import parquet as pq
|
|
11
|
+
from botocore.client import Config
|
|
15
12
|
|
|
16
13
|
# Configure logging
|
|
17
14
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -152,7 +149,7 @@ def create_demo_data(endpoint_url: str, access_key: str, secret_key: str, bucket
|
|
|
152
149
|
unique_product_ids = sales_df["product_id"].unique().to_list()
|
|
153
150
|
# Create a map of product_id to unit_price from the first occurrence in sales_df
|
|
154
151
|
product_price_map = sales_df.group_by("product_id").agg(pl.first("unit_price")).to_dict(as_series=False)
|
|
155
|
-
price_dict = dict(zip(product_price_map['product_id'], product_price_map['unit_price']))
|
|
152
|
+
price_dict = dict(zip(product_price_map['product_id'], product_price_map['unit_price'], strict=False))
|
|
156
153
|
|
|
157
154
|
products_df = pl.DataFrame({
|
|
158
155
|
"product_id": unique_product_ids,
|
test_utils/s3/fixtures.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
|
-
import
|
|
3
|
+
import shutil
|
|
3
4
|
import subprocess
|
|
4
|
-
import
|
|
5
|
+
import time
|
|
6
|
+
from collections.abc import Generator
|
|
5
7
|
from contextlib import contextmanager
|
|
6
|
-
|
|
7
|
-
import shutil
|
|
8
|
+
|
|
8
9
|
import boto3
|
|
9
10
|
from botocore.client import Config
|
|
11
|
+
|
|
10
12
|
from test_utils.s3.data_generator import populate_test_data
|
|
11
13
|
from test_utils.s3.demo_data_generator import create_demo_data
|
|
12
14
|
|
|
@@ -191,7 +193,7 @@ def start_minio_container() -> bool:
|
|
|
191
193
|
|
|
192
194
|
|
|
193
195
|
@contextmanager
|
|
194
|
-
def managed_minio() -> Generator[
|
|
196
|
+
def managed_minio() -> Generator[dict[str, any], None, None]:
|
|
195
197
|
"""Context manager for MinIO container with full connection info"""
|
|
196
198
|
if not start_minio_container():
|
|
197
199
|
yield {}
|
tools/migrate/README.md
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Flowfile Migration Tool
|
|
2
|
+
|
|
3
|
+
Migrates `.flowfile` (pickle format, used in v0.4.1 and earlier) to YAML (v0.5+).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install pyyaml # Required for YAML output
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Single file
|
|
15
|
+
python -m tools.migrate path/to/flow.flowfile
|
|
16
|
+
|
|
17
|
+
# Directory (recursive)
|
|
18
|
+
python -m tools.migrate ./flows/
|
|
19
|
+
|
|
20
|
+
# Specify output path
|
|
21
|
+
python -m tools.migrate flow.flowfile -o /output/path/flow.yaml
|
|
22
|
+
|
|
23
|
+
# Output as JSON
|
|
24
|
+
python -m tools.migrate flow.flowfile --format json
|
|
25
|
+
|
|
26
|
+
# Dry run
|
|
27
|
+
python -m tools.migrate ./flows/ --dry-run
|
|
28
|
+
|
|
29
|
+
# Verbose (show tracebacks)
|
|
30
|
+
python -m tools.migrate flow.flowfile -v
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Output Structure
|
|
34
|
+
|
|
35
|
+
```yaml
|
|
36
|
+
_version: '2.0'
|
|
37
|
+
_migrated_from: pickle
|
|
38
|
+
flow_id: 1
|
|
39
|
+
flow_name: my_analysis
|
|
40
|
+
flow_settings:
|
|
41
|
+
name: my_analysis
|
|
42
|
+
description: null
|
|
43
|
+
execution_mode: Development
|
|
44
|
+
nodes:
|
|
45
|
+
- id: 1
|
|
46
|
+
type: read
|
|
47
|
+
position: {x: 100, y: 200}
|
|
48
|
+
settings:
|
|
49
|
+
received_file:
|
|
50
|
+
path: data/input.csv
|
|
51
|
+
file_type: csv
|
|
52
|
+
connections:
|
|
53
|
+
- [1, 2]
|
|
54
|
+
node_starts:
|
|
55
|
+
- 1
|
|
56
|
+
```
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flowfile Migration Tool
|
|
3
|
+
|
|
4
|
+
Converts old pickle-based .flowfile format to new YAML format.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python -m tools.migrate <path>
|
|
8
|
+
python -m tools.migrate old_flow.flowfile
|
|
9
|
+
python -m tools.migrate ./flows/ # migrate entire directory
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Flowfile Migration Tool - CLI Entry Point
|
|
4
|
+
|
|
5
|
+
Converts old pickle-based .flowfile format to new YAML format.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python -m tools.migrate <path> [options]
|
|
9
|
+
|
|
10
|
+
Examples:
|
|
11
|
+
# Migrate a single file
|
|
12
|
+
python -m tools.migrate my_flow.flowfile
|
|
13
|
+
|
|
14
|
+
# Migrate to specific output
|
|
15
|
+
python -m tools.migrate my_flow.flowfile -o my_flow.yaml
|
|
16
|
+
|
|
17
|
+
# Migrate entire directory
|
|
18
|
+
python -m tools.migrate ./flows/
|
|
19
|
+
|
|
20
|
+
# Migrate to JSON instead of YAML
|
|
21
|
+
python -m tools.migrate my_flow.flowfile --format json
|
|
22
|
+
|
|
23
|
+
# Migrate directory to different output location
|
|
24
|
+
python -m tools.migrate ./old_flows/ -o ./new_flows/
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import sys
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
from tools.migrate.migrate import migrate_directory, migrate_flowfile
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def main():
|
|
35
|
+
parser = argparse.ArgumentParser(
|
|
36
|
+
prog="flowfile-migrate",
|
|
37
|
+
description="Migrate old .flowfile pickles to YAML format",
|
|
38
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
39
|
+
epilog="""
|
|
40
|
+
Examples:
|
|
41
|
+
%(prog)s my_flow.flowfile Migrate single file to YAML
|
|
42
|
+
%(prog)s ./flows/ Migrate all files in directory
|
|
43
|
+
%(prog)s flow.flowfile -o flow.yaml Specify output path
|
|
44
|
+
%(prog)s ./flows/ --format json Output as JSON instead of YAML
|
|
45
|
+
""",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
parser.add_argument("path", type=Path, help="Path to .flowfile or directory containing .flowfile files")
|
|
49
|
+
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"-o",
|
|
52
|
+
"--output",
|
|
53
|
+
type=Path,
|
|
54
|
+
default=None,
|
|
55
|
+
help="Output path (file or directory). Default: same location with new extension",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"-f", "--format", choices=["yaml", "json"], default="yaml", help="Output format (default: yaml)"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
|
63
|
+
|
|
64
|
+
parser.add_argument("--dry-run", action="store_true", help="Show what would be migrated without actually migrating")
|
|
65
|
+
|
|
66
|
+
args = parser.parse_args()
|
|
67
|
+
|
|
68
|
+
# Validate input path
|
|
69
|
+
if not args.path.exists():
|
|
70
|
+
print(f"Error: Path not found: {args.path}", file=sys.stderr)
|
|
71
|
+
sys.exit(1)
|
|
72
|
+
|
|
73
|
+
# Dry run mode
|
|
74
|
+
if args.dry_run:
|
|
75
|
+
if args.path.is_file():
|
|
76
|
+
print(f"Would migrate: {args.path}")
|
|
77
|
+
suffix = ".yaml" if args.format == "yaml" else ".json"
|
|
78
|
+
output = args.output or args.path.with_suffix(suffix)
|
|
79
|
+
print(f" → {output}")
|
|
80
|
+
else:
|
|
81
|
+
flowfiles = list(args.path.glob("**/*.flowfile"))
|
|
82
|
+
print(f"Would migrate {len(flowfiles)} file(s):")
|
|
83
|
+
for f in flowfiles:
|
|
84
|
+
print(f" - {f}")
|
|
85
|
+
sys.exit(0)
|
|
86
|
+
|
|
87
|
+
# Check for yaml dependency
|
|
88
|
+
if args.format == "yaml":
|
|
89
|
+
try:
|
|
90
|
+
import yaml
|
|
91
|
+
except ImportError:
|
|
92
|
+
print("Error: PyYAML is required for YAML output.", file=sys.stderr)
|
|
93
|
+
print("Install with: pip install pyyaml", file=sys.stderr)
|
|
94
|
+
sys.exit(1)
|
|
95
|
+
|
|
96
|
+
# Run migration
|
|
97
|
+
try:
|
|
98
|
+
if args.path.is_file():
|
|
99
|
+
migrate_flowfile(args.path, args.output, args.format)
|
|
100
|
+
elif args.path.is_dir():
|
|
101
|
+
migrate_directory(args.path, args.output, args.format)
|
|
102
|
+
else:
|
|
103
|
+
print(f"Error: {args.path} is neither a file nor a directory", file=sys.stderr)
|
|
104
|
+
sys.exit(1)
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
108
|
+
if args.verbose:
|
|
109
|
+
import traceback
|
|
110
|
+
|
|
111
|
+
traceback.print_exc()
|
|
112
|
+
sys.exit(1)
|
|
113
|
+
|
|
114
|
+
print("\nMigration complete!")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
if __name__ == "__main__":
|
|
118
|
+
main()
|