Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +179 -73
- flowfile/__main__.py +10 -7
- flowfile/api.py +52 -59
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
- flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
- flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
- flowfile-0.5.3.dist-info/RECORD +402 -0
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +13 -3
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +391 -279
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
- flowfile_core/flowfile/flow_graph.py +1011 -561
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +332 -232
- flowfile_core/flowfile/flow_node/models.py +54 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +82 -32
- flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
- flowfile_core/flowfile/manage/io_flowfile.py +391 -0
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +136 -35
- flowfile_core/flowfile/schema_callbacks.py +77 -54
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +72 -55
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +77 -43
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -55
- flowfile_core/schemas/input_schema.py +398 -154
- flowfile_core/schemas/output_model.py +50 -35
- flowfile_core/schemas/schemas.py +207 -67
- flowfile_core/schemas/transform_schema.py +1360 -435
- flowfile_core/schemas/yaml_types.py +117 -0
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +107 -50
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +581 -489
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +236 -252
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -4
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -27
- flowfile_worker/create/funcs.py +143 -94
- flowfile_worker/create/models.py +139 -68
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -93
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +118 -0
- tools/migrate/legacy_schemas.py +682 -0
- tools/migrate/migrate.py +610 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +21 -0
- tools/migrate/tests/test_migrate.py +622 -0
- tools/migrate/tests/test_migration_e2e.py +1009 -0
- tools/migrate/tests/test_node_migrations.py +843 -0
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-812dcbca.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-538058f3.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.4.1.dist-info/RECORD +0 -376
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -1,61 +1,103 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
import
|
|
3
|
-
|
|
2
|
+
import json
|
|
4
3
|
import os
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from functools import partial
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from time import time
|
|
10
|
+
from typing import Any, Literal, Union
|
|
11
|
+
from uuid import uuid1
|
|
7
12
|
|
|
8
13
|
import fastexcel
|
|
14
|
+
import polars as pl
|
|
15
|
+
import yaml
|
|
9
16
|
from fastapi.exceptions import HTTPException
|
|
10
|
-
from time import time
|
|
11
|
-
from functools import partial
|
|
12
|
-
from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
|
|
13
|
-
from uuid import uuid1
|
|
14
|
-
from copy import deepcopy
|
|
15
17
|
from pyarrow.parquet import ParquetFile
|
|
18
|
+
|
|
16
19
|
from flowfile_core.configs import logger
|
|
17
20
|
from flowfile_core.configs.flow_logger import FlowLogger
|
|
18
|
-
from flowfile_core.
|
|
19
|
-
from flowfile_core.flowfile.
|
|
20
|
-
|
|
21
|
+
from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
|
|
22
|
+
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
23
|
+
from flowfile_core.flowfile.database_connection_manager.db_connections import (
|
|
24
|
+
get_local_cloud_connection,
|
|
25
|
+
get_local_database_connection,
|
|
26
|
+
)
|
|
21
27
|
from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
|
|
22
|
-
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
23
28
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
|
|
24
|
-
from flowfile_core.flowfile.flow_data_engine.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
29
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
|
|
30
|
+
from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
|
|
31
|
+
from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (
|
|
32
|
+
get_calamine_xlsx_data_types,
|
|
33
|
+
get_open_xlsx_datatypes,
|
|
34
|
+
)
|
|
35
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
|
|
36
|
+
ExternalCloudWriter,
|
|
37
|
+
ExternalDatabaseFetcher,
|
|
38
|
+
ExternalDatabaseWriter,
|
|
39
|
+
ExternalDfFetcher,
|
|
40
|
+
)
|
|
41
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
42
|
+
from flowfile_core.flowfile.graph_tree.graph_tree import (
|
|
43
|
+
add_un_drawn_nodes,
|
|
44
|
+
build_flow_paths,
|
|
45
|
+
build_node_info,
|
|
46
|
+
calculate_depth,
|
|
47
|
+
define_node_connections,
|
|
48
|
+
draw_merged_paths,
|
|
49
|
+
draw_standalone_paths,
|
|
50
|
+
group_nodes_by_depth,
|
|
51
|
+
)
|
|
52
|
+
from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
|
|
53
|
+
from flowfile_core.flowfile.schema_callbacks import calculate_fuzzy_match_schema, pre_calculate_pivot_schema
|
|
28
54
|
from flowfile_core.flowfile.sources import external_sources
|
|
55
|
+
from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
|
|
56
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source import models as sql_models
|
|
57
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils
|
|
58
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import BaseSqlSource, SqlSource
|
|
59
|
+
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
60
|
+
from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
|
|
61
|
+
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
29
62
|
from flowfile_core.schemas import input_schema, schemas, transform_schema
|
|
63
|
+
from flowfile_core.schemas.cloud_storage_schemas import (
|
|
64
|
+
AuthMethod,
|
|
65
|
+
CloudStorageReadSettingsInternal,
|
|
66
|
+
CloudStorageWriteSettingsInternal,
|
|
67
|
+
FullCloudStorageConnection,
|
|
68
|
+
get_cloud_storage_write_settings_worker_interface,
|
|
69
|
+
)
|
|
30
70
|
from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
|
|
31
|
-
from flowfile_core.schemas.
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
|
|
50
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
|
|
51
|
-
from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
|
|
52
|
-
get_local_cloud_connection)
|
|
53
|
-
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
54
|
-
from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
|
|
71
|
+
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
72
|
+
from flowfile_core.secret_manager.secret_manager import decrypt_secret, get_encrypted_secret
|
|
73
|
+
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
__version__ = version("Flowfile")
|
|
77
|
+
except PackageNotFoundError:
|
|
78
|
+
__version__ = "0.5.0"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def represent_list_json(dumper, data):
|
|
82
|
+
"""Use inline style for short simple lists, block style for complex ones."""
|
|
83
|
+
if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
|
|
84
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
|
|
85
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
yaml.add_representer(list, represent_list_json)
|
|
55
89
|
|
|
56
90
|
|
|
57
|
-
def get_xlsx_schema(
|
|
58
|
-
|
|
91
|
+
def get_xlsx_schema(
|
|
92
|
+
engine: str,
|
|
93
|
+
file_path: str,
|
|
94
|
+
sheet_name: str,
|
|
95
|
+
start_row: int,
|
|
96
|
+
start_column: int,
|
|
97
|
+
end_row: int,
|
|
98
|
+
end_column: int,
|
|
99
|
+
has_headers: bool,
|
|
100
|
+
):
|
|
59
101
|
"""Calculates the schema of an XLSX file by reading a sample of rows.
|
|
60
102
|
|
|
61
103
|
Args:
|
|
@@ -72,27 +114,29 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
|
|
|
72
114
|
A list of FlowfileColumn objects representing the schema.
|
|
73
115
|
"""
|
|
74
116
|
try:
|
|
75
|
-
logger.info(
|
|
76
|
-
if engine ==
|
|
117
|
+
logger.info("Starting to calculate the schema")
|
|
118
|
+
if engine == "openpyxl":
|
|
77
119
|
max_col = end_column if end_column > 0 else None
|
|
78
|
-
return get_open_xlsx_datatypes(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
120
|
+
return get_open_xlsx_datatypes(
|
|
121
|
+
file_path=file_path,
|
|
122
|
+
sheet_name=sheet_name,
|
|
123
|
+
min_row=start_row + 1,
|
|
124
|
+
min_col=start_column + 1,
|
|
125
|
+
max_row=100,
|
|
126
|
+
max_col=max_col,
|
|
127
|
+
has_headers=has_headers,
|
|
128
|
+
)
|
|
129
|
+
elif engine == "calamine":
|
|
130
|
+
return get_calamine_xlsx_data_types(
|
|
131
|
+
file_path=file_path, sheet_name=sheet_name, start_row=start_row, end_row=end_row
|
|
132
|
+
)
|
|
133
|
+
logger.info("done calculating the schema")
|
|
90
134
|
except Exception as e:
|
|
91
135
|
logger.error(e)
|
|
92
136
|
return []
|
|
93
137
|
|
|
94
138
|
|
|
95
|
-
def skip_node_message(flow_logger: FlowLogger, nodes:
|
|
139
|
+
def skip_node_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
|
|
96
140
|
"""Logs a warning message listing all nodes that will be skipped during execution.
|
|
97
141
|
|
|
98
142
|
Args:
|
|
@@ -101,10 +145,10 @@ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
|
|
|
101
145
|
"""
|
|
102
146
|
if len(nodes) > 0:
|
|
103
147
|
msg = "\n".join(str(node) for node in nodes)
|
|
104
|
-
flow_logger.warning(f
|
|
148
|
+
flow_logger.warning(f"skipping nodes:\n{msg}")
|
|
105
149
|
|
|
106
150
|
|
|
107
|
-
def execution_order_message(flow_logger: FlowLogger, nodes:
|
|
151
|
+
def execution_order_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
|
|
108
152
|
"""Logs an informational message showing the determined execution order of nodes.
|
|
109
153
|
|
|
110
154
|
Args:
|
|
@@ -112,11 +156,19 @@ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> N
|
|
|
112
156
|
nodes: A list of FlowNode objects in the order they will be executed.
|
|
113
157
|
"""
|
|
114
158
|
msg = "\n".join(str(node) for node in nodes)
|
|
115
|
-
flow_logger.info(f
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def get_xlsx_schema_callback(
|
|
119
|
-
|
|
159
|
+
flow_logger.info(f"execution order:\n{msg}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def get_xlsx_schema_callback(
|
|
163
|
+
engine: str,
|
|
164
|
+
file_path: str,
|
|
165
|
+
sheet_name: str,
|
|
166
|
+
start_row: int,
|
|
167
|
+
start_column: int,
|
|
168
|
+
end_row: int,
|
|
169
|
+
end_column: int,
|
|
170
|
+
has_headers: bool,
|
|
171
|
+
):
|
|
120
172
|
"""Creates a partially applied function for lazy calculation of an XLSX schema.
|
|
121
173
|
|
|
122
174
|
Args:
|
|
@@ -132,12 +184,22 @@ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start
|
|
|
132
184
|
Returns:
|
|
133
185
|
A callable function that, when called, will execute `get_xlsx_schema`.
|
|
134
186
|
"""
|
|
135
|
-
return partial(
|
|
136
|
-
|
|
187
|
+
return partial(
|
|
188
|
+
get_xlsx_schema,
|
|
189
|
+
engine=engine,
|
|
190
|
+
file_path=file_path,
|
|
191
|
+
sheet_name=sheet_name,
|
|
192
|
+
start_row=start_row,
|
|
193
|
+
start_column=start_column,
|
|
194
|
+
end_row=end_row,
|
|
195
|
+
end_column=end_column,
|
|
196
|
+
has_headers=has_headers,
|
|
197
|
+
)
|
|
137
198
|
|
|
138
199
|
|
|
139
|
-
def get_cloud_connection_settings(
|
|
140
|
-
|
|
200
|
+
def get_cloud_connection_settings(
|
|
201
|
+
connection_name: str, user_id: int, auth_mode: AuthMethod
|
|
202
|
+
) -> FullCloudStorageConnection:
|
|
141
203
|
"""Retrieves cloud storage connection settings, falling back to environment variables if needed.
|
|
142
204
|
|
|
143
205
|
Args:
|
|
@@ -152,7 +214,7 @@ def get_cloud_connection_settings(connection_name: str,
|
|
|
152
214
|
HTTPException: If the connection settings cannot be found.
|
|
153
215
|
"""
|
|
154
216
|
cloud_connection_settings = get_local_cloud_connection(connection_name, user_id)
|
|
155
|
-
if cloud_connection_settings is None and auth_mode in ("env_vars",
|
|
217
|
+
if cloud_connection_settings is None and auth_mode in ("env_vars", transform_schema.AUTO_DATA_TYPE):
|
|
156
218
|
# If the auth mode is aws-cli, we do not need connection settings
|
|
157
219
|
cloud_connection_settings = FullCloudStorageConnection(storage_type="s3", auth_method="env_vars")
|
|
158
220
|
elif cloud_connection_settings is None and auth_mode == "aws-cli":
|
|
@@ -167,32 +229,44 @@ class FlowGraph:
|
|
|
167
229
|
|
|
168
230
|
It manages nodes, connections, and the execution of the entire flow.
|
|
169
231
|
"""
|
|
232
|
+
|
|
170
233
|
uuid: str
|
|
171
|
-
depends_on:
|
|
234
|
+
depends_on: dict[
|
|
235
|
+
int,
|
|
236
|
+
Union[
|
|
237
|
+
ParquetFile,
|
|
238
|
+
FlowDataEngine,
|
|
239
|
+
"FlowGraph",
|
|
240
|
+
pl.DataFrame,
|
|
241
|
+
],
|
|
242
|
+
]
|
|
172
243
|
_flow_id: int
|
|
173
244
|
_input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
|
|
174
|
-
_input_cols:
|
|
175
|
-
_output_cols:
|
|
176
|
-
_node_db:
|
|
177
|
-
_node_ids:
|
|
178
|
-
_results:
|
|
245
|
+
_input_cols: list[str]
|
|
246
|
+
_output_cols: list[str]
|
|
247
|
+
_node_db: dict[str | int, FlowNode]
|
|
248
|
+
_node_ids: list[str | int]
|
|
249
|
+
_results: FlowDataEngine | None = None
|
|
179
250
|
cache_results: bool = False
|
|
180
|
-
schema:
|
|
251
|
+
schema: list[FlowfileColumn] | None = None
|
|
181
252
|
has_over_row_function: bool = False
|
|
182
|
-
_flow_starts:
|
|
183
|
-
latest_run_info:
|
|
253
|
+
_flow_starts: list[int | str] = None
|
|
254
|
+
latest_run_info: RunInformation | None = None
|
|
184
255
|
start_datetime: datetime = None
|
|
185
256
|
end_datetime: datetime = None
|
|
186
257
|
_flow_settings: schemas.FlowSettings = None
|
|
187
258
|
flow_logger: FlowLogger
|
|
188
259
|
|
|
189
|
-
def __init__(
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
260
|
+
def __init__(
|
|
261
|
+
self,
|
|
262
|
+
flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
|
|
263
|
+
name: str = None,
|
|
264
|
+
input_cols: list[str] = None,
|
|
265
|
+
output_cols: list[str] = None,
|
|
266
|
+
path_ref: str = None,
|
|
267
|
+
input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
|
|
268
|
+
cache_results: bool = False,
|
|
269
|
+
):
|
|
196
270
|
"""Initializes a new FlowGraph instance.
|
|
197
271
|
|
|
198
272
|
Args:
|
|
@@ -214,7 +288,7 @@ class FlowGraph:
|
|
|
214
288
|
self.latest_run_info = None
|
|
215
289
|
self._flow_id = flow_settings.flow_id
|
|
216
290
|
self.flow_logger = FlowLogger(flow_settings.flow_id)
|
|
217
|
-
self._flow_starts:
|
|
291
|
+
self._flow_starts: list[FlowNode] = []
|
|
218
292
|
self._results = None
|
|
219
293
|
self.schema = None
|
|
220
294
|
self.has_over_row_function = False
|
|
@@ -223,7 +297,7 @@ class FlowGraph:
|
|
|
223
297
|
self._node_ids = []
|
|
224
298
|
self._node_db = {}
|
|
225
299
|
self.cache_results = cache_results
|
|
226
|
-
self.__name__ = name if name else id(self)
|
|
300
|
+
self.__name__ = name if name else "flow_" + str(id(self))
|
|
227
301
|
self.depends_on = {}
|
|
228
302
|
if path_ref is not None:
|
|
229
303
|
self.add_datasource(input_schema.NodeDatasource(file_path=path_ref))
|
|
@@ -236,13 +310,21 @@ class FlowGraph:
|
|
|
236
310
|
|
|
237
311
|
@flow_settings.setter
|
|
238
312
|
def flow_settings(self, flow_settings: schemas.FlowSettings):
|
|
239
|
-
if (
|
|
240
|
-
|
|
241
|
-
(self._flow_settings.execution_mode != flow_settings.execution_mode)
|
|
313
|
+
if (self._flow_settings.execution_location != flow_settings.execution_location) or (
|
|
314
|
+
self._flow_settings.execution_mode != flow_settings.execution_mode
|
|
242
315
|
):
|
|
243
316
|
self.reset()
|
|
244
317
|
self._flow_settings = flow_settings
|
|
245
318
|
|
|
319
|
+
def add_node_to_starting_list(self, node: FlowNode) -> None:
|
|
320
|
+
"""Adds a node to the list of starting nodes for the flow if not already present.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
node: The FlowNode to add as a starting node.
|
|
324
|
+
"""
|
|
325
|
+
if node.node_id not in {self_node.node_id for self_node in self._flow_starts}:
|
|
326
|
+
self._flow_starts.append(node)
|
|
327
|
+
|
|
246
328
|
def add_node_promise(self, node_promise: input_schema.NodePromise):
|
|
247
329
|
"""Adds a placeholder node to the graph that is not yet fully configured.
|
|
248
330
|
|
|
@@ -251,13 +333,31 @@ class FlowGraph:
|
|
|
251
333
|
Args:
|
|
252
334
|
node_promise: A promise object containing basic node information.
|
|
253
335
|
"""
|
|
336
|
+
|
|
254
337
|
def placeholder(n: FlowNode = None):
|
|
255
338
|
if n is None:
|
|
256
339
|
return FlowDataEngine()
|
|
257
340
|
return n
|
|
258
341
|
|
|
259
|
-
self.add_node_step(
|
|
260
|
-
|
|
342
|
+
self.add_node_step(
|
|
343
|
+
node_id=node_promise.node_id,
|
|
344
|
+
node_type=node_promise.node_type,
|
|
345
|
+
function=placeholder,
|
|
346
|
+
setting_input=node_promise,
|
|
347
|
+
)
|
|
348
|
+
if node_promise.is_user_defined:
|
|
349
|
+
node_needs_settings: bool
|
|
350
|
+
custom_node = CUSTOM_NODE_STORE.get(node_promise.node_type)
|
|
351
|
+
if custom_node is None:
|
|
352
|
+
raise Exception(f"Custom node type '{node_promise.node_type}' not found in registry.")
|
|
353
|
+
settings_schema = custom_node.model_fields["settings_schema"].default
|
|
354
|
+
node_needs_settings = settings_schema is not None and not settings_schema.is_empty()
|
|
355
|
+
if not node_needs_settings:
|
|
356
|
+
user_defined_node_settings = input_schema.UserDefinedNode(settings={}, **node_promise.model_dump())
|
|
357
|
+
initialized_model = custom_node()
|
|
358
|
+
self.add_user_defined_node(
|
|
359
|
+
custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings
|
|
360
|
+
)
|
|
261
361
|
|
|
262
362
|
def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
|
|
263
363
|
"""Calculates and applies a layered layout to all nodes in the graph.
|
|
@@ -285,20 +385,24 @@ class FlowGraph:
|
|
|
285
385
|
updated_count = 0
|
|
286
386
|
for node_id, (pos_x, pos_y) in new_positions.items():
|
|
287
387
|
node = self.get_node(node_id)
|
|
288
|
-
if node and hasattr(node,
|
|
388
|
+
if node and hasattr(node, "setting_input"):
|
|
289
389
|
setting = node.setting_input
|
|
290
|
-
if hasattr(setting,
|
|
390
|
+
if hasattr(setting, "pos_x") and hasattr(setting, "pos_y"):
|
|
291
391
|
setting.pos_x = pos_x
|
|
292
392
|
setting.pos_y = pos_y
|
|
293
393
|
updated_count += 1
|
|
294
394
|
else:
|
|
295
|
-
self.flow_logger.warning(
|
|
395
|
+
self.flow_logger.warning(
|
|
396
|
+
f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes."
|
|
397
|
+
)
|
|
296
398
|
elif node:
|
|
297
399
|
self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
|
|
298
400
|
# else: Node not found, already warned by calculate_layered_layout
|
|
299
401
|
|
|
300
402
|
end_time = time()
|
|
301
|
-
self.flow_logger.info(
|
|
403
|
+
self.flow_logger.info(
|
|
404
|
+
f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds."
|
|
405
|
+
)
|
|
302
406
|
|
|
303
407
|
except Exception as e:
|
|
304
408
|
self.flow_logger.error(f"Error applying layout: {e}")
|
|
@@ -318,13 +422,13 @@ class FlowGraph:
|
|
|
318
422
|
"""
|
|
319
423
|
self._flow_id = new_id
|
|
320
424
|
for node in self.nodes:
|
|
321
|
-
if hasattr(node.setting_input,
|
|
425
|
+
if hasattr(node.setting_input, "flow_id"):
|
|
322
426
|
node.setting_input.flow_id = new_id
|
|
323
427
|
self.flow_settings.flow_id = new_id
|
|
324
428
|
|
|
325
429
|
def __repr__(self):
|
|
326
430
|
"""Provides the official string representation of the FlowGraph instance."""
|
|
327
|
-
settings_str = " -" +
|
|
431
|
+
settings_str = " -" + "\n -".join(f"{k}: {v}" for k, v in self.flow_settings)
|
|
328
432
|
return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
|
|
329
433
|
|
|
330
434
|
def print_tree(self):
|
|
@@ -342,7 +446,7 @@ class FlowGraph:
|
|
|
342
446
|
|
|
343
447
|
# Group nodes by depth
|
|
344
448
|
depth_groups, max_depth = group_nodes_by_depth(node_info)
|
|
345
|
-
|
|
449
|
+
|
|
346
450
|
# Sort nodes within each depth group
|
|
347
451
|
for depth in depth_groups:
|
|
348
452
|
depth_groups[depth].sort()
|
|
@@ -352,7 +456,7 @@ class FlowGraph:
|
|
|
352
456
|
|
|
353
457
|
# Track which nodes connect to what
|
|
354
458
|
merge_points = define_node_connections(node_info)
|
|
355
|
-
|
|
459
|
+
|
|
356
460
|
# Build the flow paths
|
|
357
461
|
|
|
358
462
|
# Find the maximum label length for each depth level
|
|
@@ -361,15 +465,15 @@ class FlowGraph:
|
|
|
361
465
|
if depth in depth_groups:
|
|
362
466
|
max_len = max(len(node_info[nid].label) for nid in depth_groups[depth])
|
|
363
467
|
max_label_length[depth] = max_len
|
|
364
|
-
|
|
468
|
+
|
|
365
469
|
# Draw the paths
|
|
366
470
|
drawn_nodes = set()
|
|
367
471
|
merge_drawn = set()
|
|
368
|
-
|
|
472
|
+
|
|
369
473
|
# Group paths by their merge points
|
|
370
474
|
paths_by_merge = {}
|
|
371
475
|
standalone_paths = []
|
|
372
|
-
|
|
476
|
+
|
|
373
477
|
# Build flow paths
|
|
374
478
|
paths = build_flow_paths(node_info, self._flow_starts, merge_points)
|
|
375
479
|
|
|
@@ -391,22 +495,22 @@ class FlowGraph:
|
|
|
391
495
|
|
|
392
496
|
# Add undrawn nodes
|
|
393
497
|
add_un_drawn_nodes(drawn_nodes, node_info, lines)
|
|
394
|
-
|
|
498
|
+
|
|
395
499
|
try:
|
|
396
500
|
skip_nodes, ordered_nodes = compute_execution_plan(
|
|
397
|
-
nodes=self.nodes,
|
|
398
|
-
|
|
501
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
502
|
+
)
|
|
399
503
|
if ordered_nodes:
|
|
400
504
|
for i, node in enumerate(ordered_nodes, 1):
|
|
401
505
|
lines.append(f" {i:3d}. {node_info[node.node_id].label}")
|
|
402
506
|
except Exception as e:
|
|
403
507
|
lines.append(f" Could not determine execution order: {e}")
|
|
404
|
-
|
|
508
|
+
|
|
405
509
|
# Print everything
|
|
406
510
|
output = "\n".join(lines)
|
|
407
|
-
|
|
511
|
+
|
|
408
512
|
print(output)
|
|
409
|
-
|
|
513
|
+
|
|
410
514
|
def get_nodes_overview(self):
|
|
411
515
|
"""Gets a list of dictionary representations for all nodes in the graph."""
|
|
412
516
|
output = []
|
|
@@ -414,7 +518,7 @@ class FlowGraph:
|
|
|
414
518
|
output.append(v.get_repr())
|
|
415
519
|
return output
|
|
416
520
|
|
|
417
|
-
def remove_from_output_cols(self, columns:
|
|
521
|
+
def remove_from_output_cols(self, columns: list[str]):
|
|
418
522
|
"""Removes specified columns from the list of expected output columns.
|
|
419
523
|
|
|
420
524
|
Args:
|
|
@@ -423,7 +527,7 @@ class FlowGraph:
|
|
|
423
527
|
cols = set(columns)
|
|
424
528
|
self._output_cols = [c for c in self._output_cols if c not in cols]
|
|
425
529
|
|
|
426
|
-
def get_node(self, node_id:
|
|
530
|
+
def get_node(self, node_id: int | str = None) -> FlowNode | None:
|
|
427
531
|
"""Retrieves a node from the graph by its ID.
|
|
428
532
|
|
|
429
533
|
Args:
|
|
@@ -437,24 +541,43 @@ class FlowGraph:
|
|
|
437
541
|
node = self._node_db.get(node_id)
|
|
438
542
|
if node is not None:
|
|
439
543
|
return node
|
|
440
|
-
|
|
441
|
-
def add_user_defined_node(
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
544
|
+
|
|
545
|
+
def add_user_defined_node(
|
|
546
|
+
self, *, custom_node: CustomNodeBase, user_defined_node_settings: input_schema.UserDefinedNode
|
|
547
|
+
):
|
|
548
|
+
"""Adds a user-defined custom node to the graph.
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
custom_node: The custom node instance to add.
|
|
552
|
+
user_defined_node_settings: The settings for the user-defined node.
|
|
553
|
+
"""
|
|
554
|
+
|
|
555
|
+
def _func(*flow_data_engine: FlowDataEngine) -> FlowDataEngine | None:
|
|
556
|
+
user_id = user_defined_node_settings.user_id
|
|
557
|
+
if user_id is not None:
|
|
558
|
+
custom_node.set_execution_context(user_id)
|
|
559
|
+
if custom_node.settings_schema:
|
|
560
|
+
custom_node.settings_schema.set_secret_context(user_id, custom_node.accessed_secrets)
|
|
561
|
+
|
|
562
|
+
output = custom_node.process(*(fde.data_frame for fde in flow_data_engine))
|
|
563
|
+
|
|
564
|
+
accessed_secrets = custom_node.get_accessed_secrets()
|
|
565
|
+
if accessed_secrets:
|
|
566
|
+
logger.info(f"Node '{user_defined_node_settings.node_id}' accessed secrets: {accessed_secrets}")
|
|
567
|
+
if isinstance(output, (pl.LazyFrame, pl.DataFrame)):
|
|
449
568
|
return FlowDataEngine(output)
|
|
450
569
|
return None
|
|
451
|
-
|
|
452
|
-
self.add_node_step(
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
570
|
+
|
|
571
|
+
self.add_node_step(
|
|
572
|
+
node_id=user_defined_node_settings.node_id,
|
|
573
|
+
function=_func,
|
|
574
|
+
setting_input=user_defined_node_settings,
|
|
575
|
+
input_node_ids=user_defined_node_settings.depending_on_ids,
|
|
576
|
+
node_type=custom_node.item,
|
|
577
|
+
)
|
|
578
|
+
if custom_node.number_of_inputs == 0:
|
|
579
|
+
node = self.get_node(user_defined_node_settings.node_id)
|
|
580
|
+
self.add_node_to_starting_list(node)
|
|
458
581
|
|
|
459
582
|
def add_pivot(self, pivot_settings: input_schema.NodePivot):
|
|
460
583
|
"""Adds a pivot node to the graph.
|
|
@@ -466,11 +589,13 @@ class FlowGraph:
|
|
|
466
589
|
def _func(fl: FlowDataEngine):
|
|
467
590
|
return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
|
|
468
591
|
|
|
469
|
-
self.add_node_step(
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
592
|
+
self.add_node_step(
|
|
593
|
+
node_id=pivot_settings.node_id,
|
|
594
|
+
function=_func,
|
|
595
|
+
node_type="pivot",
|
|
596
|
+
setting_input=pivot_settings,
|
|
597
|
+
input_node_ids=[pivot_settings.depending_on_id],
|
|
598
|
+
)
|
|
474
599
|
|
|
475
600
|
node = self.get_node(pivot_settings.node_id)
|
|
476
601
|
|
|
@@ -479,6 +604,7 @@ class FlowGraph:
|
|
|
479
604
|
input_data.lazy = True # ensure the dataset is lazy
|
|
480
605
|
input_lf = input_data.data_frame # get the lazy frame
|
|
481
606
|
return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
|
|
607
|
+
|
|
482
608
|
node.schema_callback = schema_callback
|
|
483
609
|
|
|
484
610
|
def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
|
|
@@ -491,11 +617,13 @@ class FlowGraph:
|
|
|
491
617
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
492
618
|
return fl.unpivot(unpivot_settings.unpivot_input)
|
|
493
619
|
|
|
494
|
-
self.add_node_step(
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
620
|
+
self.add_node_step(
|
|
621
|
+
node_id=unpivot_settings.node_id,
|
|
622
|
+
function=_func,
|
|
623
|
+
node_type="unpivot",
|
|
624
|
+
setting_input=unpivot_settings,
|
|
625
|
+
input_node_ids=[unpivot_settings.depending_on_id],
|
|
626
|
+
)
|
|
499
627
|
|
|
500
628
|
def add_union(self, union_settings: input_schema.NodeUnion):
|
|
501
629
|
"""Adds a union node to combine multiple data streams.
|
|
@@ -505,14 +633,16 @@ class FlowGraph:
|
|
|
505
633
|
"""
|
|
506
634
|
|
|
507
635
|
def _func(*flowfile_tables: FlowDataEngine):
|
|
508
|
-
dfs:
|
|
509
|
-
return FlowDataEngine(pl.concat(dfs, how=
|
|
636
|
+
dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
|
|
637
|
+
return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
|
|
510
638
|
|
|
511
|
-
self.add_node_step(
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
639
|
+
self.add_node_step(
|
|
640
|
+
node_id=union_settings.node_id,
|
|
641
|
+
function=_func,
|
|
642
|
+
node_type="union",
|
|
643
|
+
setting_input=union_settings,
|
|
644
|
+
input_node_ids=union_settings.depending_on_ids,
|
|
645
|
+
)
|
|
516
646
|
|
|
517
647
|
def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
|
|
518
648
|
"""Adds a data exploration/analysis node based on a node promise.
|
|
@@ -540,13 +670,14 @@ class FlowGraph:
|
|
|
540
670
|
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
541
671
|
external_sampler = ExternalDfFetcher(
|
|
542
672
|
lf=flowfile_table.data_frame,
|
|
543
|
-
file_ref="__gf_walker"+node.hash,
|
|
673
|
+
file_ref="__gf_walker" + node.hash,
|
|
544
674
|
wait_on_completion=True,
|
|
545
675
|
node_id=node.node_id,
|
|
546
676
|
flow_id=self.flow_id,
|
|
547
677
|
)
|
|
548
|
-
node.results.analysis_data_generator = get_read_top_n(
|
|
549
|
-
|
|
678
|
+
node.results.analysis_data_generator = get_read_top_n(
|
|
679
|
+
external_sampler.status.file_ref, n=min(sample_size, number_of_records)
|
|
680
|
+
)
|
|
550
681
|
return flowfile_table
|
|
551
682
|
|
|
552
683
|
def schema_callback():
|
|
@@ -555,11 +686,15 @@ class FlowGraph:
|
|
|
555
686
|
input_node = node.all_inputs[0]
|
|
556
687
|
return input_node.schema
|
|
557
688
|
else:
|
|
558
|
-
return [FlowfileColumn.from_input(
|
|
689
|
+
return [FlowfileColumn.from_input("col_1", "na")]
|
|
559
690
|
|
|
560
|
-
self.add_node_step(
|
|
561
|
-
|
|
562
|
-
|
|
691
|
+
self.add_node_step(
|
|
692
|
+
node_id=node_analysis.node_id,
|
|
693
|
+
node_type="explore_data",
|
|
694
|
+
function=analysis_preparation,
|
|
695
|
+
setting_input=node_analysis,
|
|
696
|
+
schema_callback=schema_callback,
|
|
697
|
+
)
|
|
563
698
|
node = self.get_node(node_analysis.node_id)
|
|
564
699
|
|
|
565
700
|
def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
|
|
@@ -572,19 +707,20 @@ class FlowGraph:
|
|
|
572
707
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
573
708
|
return fl.do_group_by(group_by_settings.groupby_input, False)
|
|
574
709
|
|
|
575
|
-
self.add_node_step(
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
710
|
+
self.add_node_step(
|
|
711
|
+
node_id=group_by_settings.node_id,
|
|
712
|
+
function=_func,
|
|
713
|
+
node_type="group_by",
|
|
714
|
+
setting_input=group_by_settings,
|
|
715
|
+
input_node_ids=[group_by_settings.depending_on_id],
|
|
716
|
+
)
|
|
580
717
|
|
|
581
718
|
node = self.get_node(group_by_settings.node_id)
|
|
582
719
|
|
|
583
720
|
def schema_callback():
|
|
584
|
-
|
|
585
721
|
output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
|
|
586
722
|
depends_on = node.node_inputs.main_inputs[0]
|
|
587
|
-
input_schema_dict:
|
|
723
|
+
input_schema_dict: dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
|
|
588
724
|
output_schema = []
|
|
589
725
|
for old_name, new_name, data_type in output_columns:
|
|
590
726
|
data_type = input_schema_dict[old_name] if data_type is None else data_type
|
|
@@ -599,38 +735,148 @@ class FlowGraph:
|
|
|
599
735
|
Args:
|
|
600
736
|
filter_settings: The settings for the filter operation.
|
|
601
737
|
"""
|
|
738
|
+
from flowfile_core.schemas.transform_schema import FilterOperator
|
|
739
|
+
|
|
740
|
+
def _build_basic_filter_expression(
|
|
741
|
+
basic_filter: transform_schema.BasicFilter, field_data_type: str | None = None
|
|
742
|
+
) -> str:
|
|
743
|
+
"""Build a filter expression string from a BasicFilter object.
|
|
744
|
+
|
|
745
|
+
Uses the Flowfile expression language that is compatible with polars_expr_transformer.
|
|
746
|
+
|
|
747
|
+
Args:
|
|
748
|
+
basic_filter: The basic filter configuration.
|
|
749
|
+
field_data_type: The data type of the field (optional, for smart quoting).
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
A filter expression string compatible with polars_expr_transformer.
|
|
753
|
+
"""
|
|
754
|
+
field = f"[{basic_filter.field}]"
|
|
755
|
+
value = basic_filter.value
|
|
756
|
+
value2 = basic_filter.value2
|
|
757
|
+
|
|
758
|
+
is_numeric_value = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
|
|
759
|
+
should_quote = field_data_type == "str" or not is_numeric_value
|
|
760
|
+
|
|
761
|
+
try:
|
|
762
|
+
operator = basic_filter.get_operator()
|
|
763
|
+
except (ValueError, AttributeError):
|
|
764
|
+
operator = FilterOperator.from_symbol(str(basic_filter.operator))
|
|
765
|
+
|
|
766
|
+
if operator == FilterOperator.EQUALS:
|
|
767
|
+
if should_quote:
|
|
768
|
+
return f'{field}="{value}"'
|
|
769
|
+
return f"{field}={value}"
|
|
770
|
+
|
|
771
|
+
elif operator == FilterOperator.NOT_EQUALS:
|
|
772
|
+
if should_quote:
|
|
773
|
+
return f'{field}!="{value}"'
|
|
774
|
+
return f"{field}!={value}"
|
|
775
|
+
|
|
776
|
+
elif operator == FilterOperator.GREATER_THAN:
|
|
777
|
+
if should_quote:
|
|
778
|
+
return f'{field}>"{value}"'
|
|
779
|
+
return f"{field}>{value}"
|
|
780
|
+
|
|
781
|
+
elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
|
|
782
|
+
if should_quote:
|
|
783
|
+
return f'{field}>="{value}"'
|
|
784
|
+
return f"{field}>={value}"
|
|
785
|
+
|
|
786
|
+
elif operator == FilterOperator.LESS_THAN:
|
|
787
|
+
if should_quote:
|
|
788
|
+
return f'{field}<"{value}"'
|
|
789
|
+
return f"{field}<{value}"
|
|
790
|
+
|
|
791
|
+
elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
|
|
792
|
+
if should_quote:
|
|
793
|
+
return f'{field}<="{value}"'
|
|
794
|
+
return f"{field}<={value}"
|
|
795
|
+
|
|
796
|
+
elif operator == FilterOperator.CONTAINS:
|
|
797
|
+
return f'contains({field}, "{value}")'
|
|
798
|
+
|
|
799
|
+
elif operator == FilterOperator.NOT_CONTAINS:
|
|
800
|
+
return f'contains({field}, "{value}") = false'
|
|
801
|
+
|
|
802
|
+
elif operator == FilterOperator.STARTS_WITH:
|
|
803
|
+
return f'left({field}, {len(value)}) = "{value}"'
|
|
804
|
+
|
|
805
|
+
elif operator == FilterOperator.ENDS_WITH:
|
|
806
|
+
return f'right({field}, {len(value)}) = "{value}"'
|
|
807
|
+
|
|
808
|
+
elif operator == FilterOperator.IS_NULL:
|
|
809
|
+
return f"is_empty({field})"
|
|
810
|
+
|
|
811
|
+
elif operator == FilterOperator.IS_NOT_NULL:
|
|
812
|
+
return f"is_not_empty({field})"
|
|
813
|
+
|
|
814
|
+
elif operator == FilterOperator.IN:
|
|
815
|
+
values = [v.strip() for v in value.split(",")]
|
|
816
|
+
if len(values) == 1:
|
|
817
|
+
if should_quote:
|
|
818
|
+
return f'{field}="{values[0]}"'
|
|
819
|
+
return f"{field}={values[0]}"
|
|
820
|
+
if should_quote:
|
|
821
|
+
conditions = [f'({field}="{v}")' for v in values]
|
|
822
|
+
else:
|
|
823
|
+
conditions = [f"({field}={v})" for v in values]
|
|
824
|
+
return " | ".join(conditions)
|
|
825
|
+
|
|
826
|
+
elif operator == FilterOperator.NOT_IN:
|
|
827
|
+
values = [v.strip() for v in value.split(",")]
|
|
828
|
+
if len(values) == 1:
|
|
829
|
+
if should_quote:
|
|
830
|
+
return f'{field}!="{values[0]}"'
|
|
831
|
+
return f"{field}!={values[0]}"
|
|
832
|
+
if should_quote:
|
|
833
|
+
conditions = [f'({field}!="{v}")' for v in values]
|
|
834
|
+
else:
|
|
835
|
+
conditions = [f"({field}!={v})" for v in values]
|
|
836
|
+
return " & ".join(conditions)
|
|
602
837
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
838
|
+
elif operator == FilterOperator.BETWEEN:
|
|
839
|
+
if value2 is None:
|
|
840
|
+
raise ValueError("BETWEEN operator requires value2")
|
|
841
|
+
if should_quote:
|
|
842
|
+
return f'({field}>="{value}") & ({field}<="{value2}")'
|
|
843
|
+
return f"({field}>={value}) & ({field}<={value2})"
|
|
844
|
+
|
|
845
|
+
else:
|
|
846
|
+
# Fallback for unknown operators - use legacy format
|
|
847
|
+
if should_quote:
|
|
848
|
+
return f'{field}{operator.to_symbol()}"{value}"'
|
|
849
|
+
return f"{field}{operator.to_symbol()}{value}"
|
|
610
850
|
|
|
611
851
|
def _func(fl: FlowDataEngine):
|
|
612
|
-
is_advanced = filter_settings.filter_input.
|
|
852
|
+
is_advanced = filter_settings.filter_input.is_advanced()
|
|
853
|
+
|
|
613
854
|
if is_advanced:
|
|
855
|
+
predicate = filter_settings.filter_input.advanced_filter
|
|
614
856
|
return fl.do_filter(predicate)
|
|
615
857
|
else:
|
|
616
858
|
basic_filter = filter_settings.filter_input.basic_filter
|
|
617
|
-
if basic_filter
|
|
859
|
+
if basic_filter is None:
|
|
860
|
+
logger.warning("Basic filter is None, returning unfiltered data")
|
|
861
|
+
return fl
|
|
862
|
+
|
|
863
|
+
try:
|
|
618
864
|
field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
filter_settings.filter_input.advanced_filter = _f
|
|
626
|
-
return fl.do_filter(_f)
|
|
865
|
+
except Exception:
|
|
866
|
+
field_data_type = None
|
|
867
|
+
|
|
868
|
+
expression = _build_basic_filter_expression(basic_filter, field_data_type)
|
|
869
|
+
filter_settings.filter_input.advanced_filter = expression
|
|
870
|
+
return fl.do_filter(expression)
|
|
627
871
|
|
|
628
|
-
self.add_node_step(
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
872
|
+
self.add_node_step(
|
|
873
|
+
filter_settings.node_id,
|
|
874
|
+
_func,
|
|
875
|
+
node_type="filter",
|
|
876
|
+
renew_schema=False,
|
|
877
|
+
setting_input=filter_settings,
|
|
878
|
+
input_node_ids=[filter_settings.depending_on_id],
|
|
879
|
+
)
|
|
634
880
|
|
|
635
881
|
def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
|
|
636
882
|
"""Adds a filter node to the graph.
|
|
@@ -642,11 +888,13 @@ class FlowGraph:
|
|
|
642
888
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
643
889
|
return fl.get_record_count()
|
|
644
890
|
|
|
645
|
-
self.add_node_step(
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
891
|
+
self.add_node_step(
|
|
892
|
+
node_id=node_number_of_records.node_id,
|
|
893
|
+
function=_func,
|
|
894
|
+
node_type="record_count",
|
|
895
|
+
setting_input=node_number_of_records,
|
|
896
|
+
input_node_ids=[node_number_of_records.depending_on_id],
|
|
897
|
+
)
|
|
650
898
|
|
|
651
899
|
def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
|
|
652
900
|
"""Adds a node that executes custom Polars code.
|
|
@@ -657,11 +905,14 @@ class FlowGraph:
|
|
|
657
905
|
|
|
658
906
|
def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
|
|
659
907
|
return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
908
|
+
|
|
909
|
+
self.add_node_step(
|
|
910
|
+
node_id=node_polars_code.node_id,
|
|
911
|
+
function=_func,
|
|
912
|
+
node_type="polars_code",
|
|
913
|
+
setting_input=node_polars_code,
|
|
914
|
+
input_node_ids=node_polars_code.depending_on_ids,
|
|
915
|
+
)
|
|
665
916
|
|
|
666
917
|
try:
|
|
667
918
|
polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
|
|
@@ -669,9 +920,7 @@ class FlowGraph:
|
|
|
669
920
|
node = self.get_node(node_id=node_polars_code.node_id)
|
|
670
921
|
node.results.errors = str(e)
|
|
671
922
|
|
|
672
|
-
def add_dependency_on_polars_lazy_frame(self,
|
|
673
|
-
lazy_frame: pl.LazyFrame,
|
|
674
|
-
node_id: int):
|
|
923
|
+
def add_dependency_on_polars_lazy_frame(self, lazy_frame: pl.LazyFrame, node_id: int):
|
|
675
924
|
"""Adds a special node that directly injects a Polars LazyFrame into the graph.
|
|
676
925
|
|
|
677
926
|
Note: This is intended for backend use and will not work in the UI editor.
|
|
@@ -680,13 +929,16 @@ class FlowGraph:
|
|
|
680
929
|
lazy_frame: The Polars LazyFrame to inject.
|
|
681
930
|
node_id: The ID for the new node.
|
|
682
931
|
"""
|
|
932
|
+
|
|
683
933
|
def _func():
|
|
684
934
|
return FlowDataEngine(lazy_frame)
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
935
|
+
|
|
936
|
+
node_promise = input_schema.NodePromise(
|
|
937
|
+
flow_id=self.flow_id, node_id=node_id, node_type="polars_lazy_frame", is_setup=True
|
|
938
|
+
)
|
|
939
|
+
self.add_node_step(
|
|
940
|
+
node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func, setting_input=node_promise
|
|
941
|
+
)
|
|
690
942
|
|
|
691
943
|
def add_unique(self, unique_settings: input_schema.NodeUnique):
|
|
692
944
|
"""Adds a node to find and remove duplicate rows.
|
|
@@ -698,12 +950,14 @@ class FlowGraph:
|
|
|
698
950
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
699
951
|
return fl.make_unique(unique_settings.unique_input)
|
|
700
952
|
|
|
701
|
-
self.add_node_step(
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
953
|
+
self.add_node_step(
|
|
954
|
+
node_id=unique_settings.node_id,
|
|
955
|
+
function=_func,
|
|
956
|
+
input_columns=[],
|
|
957
|
+
node_type="unique",
|
|
958
|
+
setting_input=unique_settings,
|
|
959
|
+
input_node_ids=[unique_settings.depending_on_id],
|
|
960
|
+
)
|
|
707
961
|
|
|
708
962
|
def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
|
|
709
963
|
"""Adds a node that solves graph-like problems within the data.
|
|
@@ -716,14 +970,17 @@ class FlowGraph:
|
|
|
716
970
|
graph_solver_settings: The settings object defining the graph inputs
|
|
717
971
|
and the specific algorithm to apply.
|
|
718
972
|
"""
|
|
973
|
+
|
|
719
974
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
720
975
|
return fl.solve_graph(graph_solver_settings.graph_solver_input)
|
|
721
976
|
|
|
722
|
-
self.add_node_step(
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
977
|
+
self.add_node_step(
|
|
978
|
+
node_id=graph_solver_settings.node_id,
|
|
979
|
+
function=_func,
|
|
980
|
+
node_type="graph_solver",
|
|
981
|
+
setting_input=graph_solver_settings,
|
|
982
|
+
input_node_ids=[graph_solver_settings.depending_on_id],
|
|
983
|
+
)
|
|
727
984
|
|
|
728
985
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
729
986
|
"""Adds a node that applies a formula to create or modify a column.
|
|
@@ -733,28 +990,33 @@ class FlowGraph:
|
|
|
733
990
|
"""
|
|
734
991
|
|
|
735
992
|
error = ""
|
|
736
|
-
if function_settings.function.field.data_type not in (None,
|
|
993
|
+
if function_settings.function.field.data_type not in (None, transform_schema.AUTO_DATA_TYPE):
|
|
737
994
|
output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
|
|
738
995
|
else:
|
|
739
996
|
output_type = None
|
|
740
|
-
if output_type not in (None,
|
|
741
|
-
new_col = [
|
|
742
|
-
|
|
997
|
+
if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
|
|
998
|
+
new_col = [
|
|
999
|
+
FlowfileColumn.from_input(column_name=function_settings.function.field.name, data_type=str(output_type))
|
|
1000
|
+
]
|
|
743
1001
|
else:
|
|
744
|
-
new_col = [FlowfileColumn.from_input(function_settings.function.field.name,
|
|
1002
|
+
new_col = [FlowfileColumn.from_input(function_settings.function.field.name, "String")]
|
|
745
1003
|
|
|
746
1004
|
def _func(fl: FlowDataEngine):
|
|
747
|
-
return fl.apply_sql_formula(
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
1005
|
+
return fl.apply_sql_formula(
|
|
1006
|
+
func=function_settings.function.function,
|
|
1007
|
+
col_name=function_settings.function.field.name,
|
|
1008
|
+
output_data_type=output_type,
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
self.add_node_step(
|
|
1012
|
+
function_settings.node_id,
|
|
1013
|
+
_func,
|
|
1014
|
+
output_schema=new_col,
|
|
1015
|
+
node_type="formula",
|
|
1016
|
+
renew_schema=False,
|
|
1017
|
+
setting_input=function_settings,
|
|
1018
|
+
input_node_ids=[function_settings.depending_on_id],
|
|
1019
|
+
)
|
|
758
1020
|
if error != "":
|
|
759
1021
|
node = self.get_node(function_settings.node_id)
|
|
760
1022
|
node.results.errors = error
|
|
@@ -777,18 +1039,21 @@ class FlowGraph:
|
|
|
777
1039
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
778
1040
|
for right_select in cross_join_settings.cross_join_input.right_select.renames:
|
|
779
1041
|
right_select.is_available = True if right_select.old_name in right.schema else False
|
|
1042
|
+
return main.do_cross_join(
|
|
1043
|
+
cross_join_input=cross_join_settings.cross_join_input,
|
|
1044
|
+
auto_generate_selection=cross_join_settings.auto_generate_selection,
|
|
1045
|
+
verify_integrity=False,
|
|
1046
|
+
other=right,
|
|
1047
|
+
)
|
|
780
1048
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
node_type='cross_join',
|
|
790
|
-
setting_input=cross_join_settings,
|
|
791
|
-
input_node_ids=cross_join_settings.depending_on_ids)
|
|
1049
|
+
self.add_node_step(
|
|
1050
|
+
node_id=cross_join_settings.node_id,
|
|
1051
|
+
function=_func,
|
|
1052
|
+
input_columns=[],
|
|
1053
|
+
node_type="cross_join",
|
|
1054
|
+
setting_input=cross_join_settings,
|
|
1055
|
+
input_node_ids=cross_join_settings.depending_on_ids,
|
|
1056
|
+
)
|
|
792
1057
|
return self
|
|
793
1058
|
|
|
794
1059
|
def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
|
|
@@ -806,18 +1071,21 @@ class FlowGraph:
|
|
|
806
1071
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
807
1072
|
for right_select in join_settings.join_input.right_select.renames:
|
|
808
1073
|
right_select.is_available = True if right_select.old_name in right.schema else False
|
|
1074
|
+
return main.join(
|
|
1075
|
+
join_input=join_settings.join_input,
|
|
1076
|
+
auto_generate_selection=join_settings.auto_generate_selection,
|
|
1077
|
+
verify_integrity=False,
|
|
1078
|
+
other=right,
|
|
1079
|
+
)
|
|
809
1080
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
node_type='join',
|
|
819
|
-
setting_input=join_settings,
|
|
820
|
-
input_node_ids=join_settings.depending_on_ids)
|
|
1081
|
+
self.add_node_step(
|
|
1082
|
+
node_id=join_settings.node_id,
|
|
1083
|
+
function=_func,
|
|
1084
|
+
input_columns=[],
|
|
1085
|
+
node_type="join",
|
|
1086
|
+
setting_input=join_settings,
|
|
1087
|
+
input_node_ids=join_settings.depending_on_ids,
|
|
1088
|
+
)
|
|
821
1089
|
return self
|
|
822
1090
|
|
|
823
1091
|
def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
|
|
@@ -833,31 +1101,43 @@ class FlowGraph:
|
|
|
833
1101
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
834
1102
|
node = self.get_node(node_id=fuzzy_settings.node_id)
|
|
835
1103
|
if self.execution_location == "local":
|
|
836
|
-
return main.fuzzy_join(
|
|
837
|
-
|
|
838
|
-
|
|
1104
|
+
return main.fuzzy_join(
|
|
1105
|
+
fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
|
|
1106
|
+
other=right,
|
|
1107
|
+
node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id),
|
|
1108
|
+
)
|
|
839
1109
|
|
|
840
|
-
f = main.start_fuzzy_join(
|
|
841
|
-
|
|
1110
|
+
f = main.start_fuzzy_join(
|
|
1111
|
+
fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
|
|
1112
|
+
other=right,
|
|
1113
|
+
file_ref=node.hash,
|
|
1114
|
+
flow_id=self.flow_id,
|
|
1115
|
+
node_id=fuzzy_settings.node_id,
|
|
1116
|
+
)
|
|
842
1117
|
logger.info("Started the fuzzy match action")
|
|
843
1118
|
node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
|
|
844
1119
|
return FlowDataEngine(f.get_result())
|
|
845
1120
|
|
|
846
1121
|
def schema_callback():
|
|
847
|
-
fm_input_copy =
|
|
1122
|
+
fm_input_copy = FuzzyMatchInputManager(
|
|
1123
|
+
fuzzy_settings.join_input
|
|
1124
|
+
) # Deepcopy create an unique object per func
|
|
848
1125
|
node = self.get_node(node_id=fuzzy_settings.node_id)
|
|
849
|
-
return calculate_fuzzy_match_schema(
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
1126
|
+
return calculate_fuzzy_match_schema(
|
|
1127
|
+
fm_input_copy,
|
|
1128
|
+
left_schema=node.node_inputs.main_inputs[0].schema,
|
|
1129
|
+
right_schema=node.node_inputs.right_input.schema,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
self.add_node_step(
|
|
1133
|
+
node_id=fuzzy_settings.node_id,
|
|
1134
|
+
function=_func,
|
|
1135
|
+
input_columns=[],
|
|
1136
|
+
node_type="fuzzy_match",
|
|
1137
|
+
setting_input=fuzzy_settings,
|
|
1138
|
+
input_node_ids=fuzzy_settings.depending_on_ids,
|
|
1139
|
+
schema_callback=schema_callback,
|
|
1140
|
+
)
|
|
861
1141
|
|
|
862
1142
|
return self
|
|
863
1143
|
|
|
@@ -874,14 +1154,17 @@ class FlowGraph:
|
|
|
874
1154
|
Returns:
|
|
875
1155
|
The `FlowGraph` instance for method chaining.
|
|
876
1156
|
"""
|
|
1157
|
+
|
|
877
1158
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
878
1159
|
return table.split(node_text_to_rows.text_to_rows_input)
|
|
879
1160
|
|
|
880
|
-
self.add_node_step(
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
1161
|
+
self.add_node_step(
|
|
1162
|
+
node_id=node_text_to_rows.node_id,
|
|
1163
|
+
function=_func,
|
|
1164
|
+
node_type="text_to_rows",
|
|
1165
|
+
setting_input=node_text_to_rows,
|
|
1166
|
+
input_node_ids=[node_text_to_rows.depending_on_id],
|
|
1167
|
+
)
|
|
885
1168
|
return self
|
|
886
1169
|
|
|
887
1170
|
def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
|
|
@@ -897,11 +1180,13 @@ class FlowGraph:
|
|
|
897
1180
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
898
1181
|
return table.do_sort(sort_settings.sort_input)
|
|
899
1182
|
|
|
900
|
-
self.add_node_step(
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
1183
|
+
self.add_node_step(
|
|
1184
|
+
node_id=sort_settings.node_id,
|
|
1185
|
+
function=_func,
|
|
1186
|
+
node_type="sort",
|
|
1187
|
+
setting_input=sort_settings,
|
|
1188
|
+
input_node_ids=[sort_settings.depending_on_id],
|
|
1189
|
+
)
|
|
905
1190
|
return self
|
|
906
1191
|
|
|
907
1192
|
def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
|
|
@@ -913,15 +1198,17 @@ class FlowGraph:
|
|
|
913
1198
|
Returns:
|
|
914
1199
|
The `FlowGraph` instance for method chaining.
|
|
915
1200
|
"""
|
|
1201
|
+
|
|
916
1202
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
917
1203
|
return table.get_sample(sample_settings.sample_size)
|
|
918
1204
|
|
|
919
|
-
self.add_node_step(
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
1205
|
+
self.add_node_step(
|
|
1206
|
+
node_id=sample_settings.node_id,
|
|
1207
|
+
function=_func,
|
|
1208
|
+
node_type="sample",
|
|
1209
|
+
setting_input=sample_settings,
|
|
1210
|
+
input_node_ids=[sample_settings.depending_on_id],
|
|
1211
|
+
)
|
|
925
1212
|
return self
|
|
926
1213
|
|
|
927
1214
|
def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
|
|
@@ -938,12 +1225,13 @@ class FlowGraph:
|
|
|
938
1225
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
939
1226
|
return table.add_record_id(record_id_settings.record_id_input)
|
|
940
1227
|
|
|
941
|
-
self.add_node_step(
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
1228
|
+
self.add_node_step(
|
|
1229
|
+
node_id=record_id_settings.node_id,
|
|
1230
|
+
function=_func,
|
|
1231
|
+
node_type="record_id",
|
|
1232
|
+
setting_input=record_id_settings,
|
|
1233
|
+
input_node_ids=[record_id_settings.depending_on_id],
|
|
1234
|
+
)
|
|
947
1235
|
return self
|
|
948
1236
|
|
|
949
1237
|
def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
|
|
@@ -975,16 +1263,19 @@ class FlowGraph:
|
|
|
975
1263
|
for i in ids_to_remove:
|
|
976
1264
|
v = select_cols.pop(i)
|
|
977
1265
|
del v
|
|
978
|
-
return table.do_select(
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
1266
|
+
return table.do_select(
|
|
1267
|
+
select_inputs=transform_schema.SelectInputs(select_cols), keep_missing=select_settings.keep_missing
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
self.add_node_step(
|
|
1271
|
+
node_id=select_settings.node_id,
|
|
1272
|
+
function=_func,
|
|
1273
|
+
input_columns=[],
|
|
1274
|
+
node_type="select",
|
|
1275
|
+
drop_columns=list(drop_cols),
|
|
1276
|
+
setting_input=select_settings,
|
|
1277
|
+
input_node_ids=[select_settings.depending_on_id],
|
|
1278
|
+
)
|
|
988
1279
|
return self
|
|
989
1280
|
|
|
990
1281
|
@property
|
|
@@ -992,7 +1283,7 @@ class FlowGraph:
|
|
|
992
1283
|
"""Checks if the graph has any nodes."""
|
|
993
1284
|
return len(self._node_ids) > 0
|
|
994
1285
|
|
|
995
|
-
def delete_node(self, node_id:
|
|
1286
|
+
def delete_node(self, node_id: int | str):
|
|
996
1287
|
"""Deletes a node from the graph and updates all its connections.
|
|
997
1288
|
|
|
998
1289
|
Args:
|
|
@@ -1007,7 +1298,7 @@ class FlowGraph:
|
|
|
1007
1298
|
if node:
|
|
1008
1299
|
logger.info(f"Found node: {node_id}, processing deletion")
|
|
1009
1300
|
|
|
1010
|
-
lead_to_steps:
|
|
1301
|
+
lead_to_steps: list[FlowNode] = node.leads_to_nodes
|
|
1011
1302
|
logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
|
|
1012
1303
|
|
|
1013
1304
|
if len(lead_to_steps) > 0:
|
|
@@ -1016,7 +1307,7 @@ class FlowGraph:
|
|
|
1016
1307
|
lead_to_step.delete_input_node(node_id, complete=True)
|
|
1017
1308
|
|
|
1018
1309
|
if not node.is_start:
|
|
1019
|
-
depends_on:
|
|
1310
|
+
depends_on: list[FlowNode] = node.node_inputs.get_all_inputs()
|
|
1020
1311
|
logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
|
|
1021
1312
|
|
|
1022
1313
|
for depend_on in depends_on:
|
|
@@ -1036,18 +1327,20 @@ class FlowGraph:
|
|
|
1036
1327
|
"""Checks if the graph has an initial input data source."""
|
|
1037
1328
|
return self._input_data is not None
|
|
1038
1329
|
|
|
1039
|
-
def add_node_step(
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1330
|
+
def add_node_step(
|
|
1331
|
+
self,
|
|
1332
|
+
node_id: int | str,
|
|
1333
|
+
function: Callable,
|
|
1334
|
+
input_columns: list[str] = None,
|
|
1335
|
+
output_schema: list[FlowfileColumn] = None,
|
|
1336
|
+
node_type: str = None,
|
|
1337
|
+
drop_columns: list[str] = None,
|
|
1338
|
+
renew_schema: bool = True,
|
|
1339
|
+
setting_input: Any = None,
|
|
1340
|
+
cache_results: bool = None,
|
|
1341
|
+
schema_callback: Callable = None,
|
|
1342
|
+
input_node_ids: list[int] = None,
|
|
1343
|
+
) -> FlowNode:
|
|
1051
1344
|
"""The core method for adding or updating a node in the graph.
|
|
1052
1345
|
|
|
1053
1346
|
Args:
|
|
@@ -1080,29 +1373,33 @@ class FlowGraph:
|
|
|
1080
1373
|
if isinstance(input_columns, str):
|
|
1081
1374
|
input_columns = [input_columns]
|
|
1082
1375
|
if (
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1376
|
+
input_nodes is not None
|
|
1377
|
+
or function.__name__ in ("placeholder", "analysis_preparation")
|
|
1378
|
+
or node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
|
|
1086
1379
|
):
|
|
1087
1380
|
if not existing_node:
|
|
1088
|
-
node = FlowNode(
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1381
|
+
node = FlowNode(
|
|
1382
|
+
node_id=node_id,
|
|
1383
|
+
function=function,
|
|
1384
|
+
output_schema=output_schema,
|
|
1385
|
+
input_columns=input_columns,
|
|
1386
|
+
drop_columns=drop_columns,
|
|
1387
|
+
renew_schema=renew_schema,
|
|
1388
|
+
setting_input=setting_input,
|
|
1389
|
+
node_type=node_type,
|
|
1390
|
+
name=function.__name__,
|
|
1391
|
+
schema_callback=schema_callback,
|
|
1392
|
+
parent_uuid=self.uuid,
|
|
1393
|
+
)
|
|
1099
1394
|
else:
|
|
1100
|
-
existing_node.update_node(
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1395
|
+
existing_node.update_node(
|
|
1396
|
+
function=function,
|
|
1397
|
+
output_schema=output_schema,
|
|
1398
|
+
input_columns=input_columns,
|
|
1399
|
+
drop_columns=drop_columns,
|
|
1400
|
+
setting_input=setting_input,
|
|
1401
|
+
schema_callback=schema_callback,
|
|
1402
|
+
)
|
|
1106
1403
|
node = existing_node
|
|
1107
1404
|
else:
|
|
1108
1405
|
raise Exception("No data initialized")
|
|
@@ -1110,7 +1407,7 @@ class FlowGraph:
|
|
|
1110
1407
|
self._node_ids.append(node_id)
|
|
1111
1408
|
return node
|
|
1112
1409
|
|
|
1113
|
-
def add_include_cols(self, include_columns:
|
|
1410
|
+
def add_include_cols(self, include_columns: list[str]):
|
|
1114
1411
|
"""Adds columns to both the input and output column lists.
|
|
1115
1412
|
|
|
1116
1413
|
Args:
|
|
@@ -1131,24 +1428,30 @@ class FlowGraph:
|
|
|
1131
1428
|
"""
|
|
1132
1429
|
|
|
1133
1430
|
def _func(df: FlowDataEngine):
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1431
|
+
execute_remote = self.execution_location != "local"
|
|
1432
|
+
df.output(
|
|
1433
|
+
output_fs=output_file.output_settings,
|
|
1434
|
+
flow_id=self.flow_id,
|
|
1435
|
+
node_id=output_file.node_id,
|
|
1436
|
+
execute_remote=execute_remote,
|
|
1437
|
+
)
|
|
1138
1438
|
return df
|
|
1139
1439
|
|
|
1140
1440
|
def schema_callback():
|
|
1141
1441
|
input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
|
|
1142
1442
|
|
|
1143
1443
|
return input_node.schema
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1444
|
+
|
|
1445
|
+
input_node_id = output_file.depending_on_id if hasattr(output_file, "depending_on_id") else None
|
|
1446
|
+
self.add_node_step(
|
|
1447
|
+
node_id=output_file.node_id,
|
|
1448
|
+
function=_func,
|
|
1449
|
+
input_columns=[],
|
|
1450
|
+
node_type="output",
|
|
1451
|
+
setting_input=output_file,
|
|
1452
|
+
schema_callback=schema_callback,
|
|
1453
|
+
input_node_ids=[input_node_id],
|
|
1454
|
+
)
|
|
1152
1455
|
|
|
1153
1456
|
def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
|
|
1154
1457
|
"""Adds a node to write data to a database.
|
|
@@ -1157,18 +1460,20 @@ class FlowGraph:
|
|
|
1157
1460
|
node_database_writer: The settings for the database writer node.
|
|
1158
1461
|
"""
|
|
1159
1462
|
|
|
1160
|
-
node_type =
|
|
1463
|
+
node_type = "database_writer"
|
|
1161
1464
|
database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
|
|
1162
|
-
database_connection:
|
|
1163
|
-
if database_settings.connection_mode ==
|
|
1465
|
+
database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
|
|
1466
|
+
if database_settings.connection_mode == "inline":
|
|
1164
1467
|
database_connection: input_schema.DatabaseConnection = database_settings.database_connection
|
|
1165
|
-
encrypted_password = get_encrypted_secret(
|
|
1166
|
-
|
|
1468
|
+
encrypted_password = get_encrypted_secret(
|
|
1469
|
+
current_user_id=node_database_writer.user_id, secret_name=database_connection.password_ref
|
|
1470
|
+
)
|
|
1167
1471
|
if encrypted_password is None:
|
|
1168
1472
|
raise HTTPException(status_code=400, detail="Password not found")
|
|
1169
1473
|
else:
|
|
1170
|
-
database_reference_settings = get_local_database_connection(
|
|
1171
|
-
|
|
1474
|
+
database_reference_settings = get_local_database_connection(
|
|
1475
|
+
database_settings.database_connection_name, node_database_writer.user_id
|
|
1476
|
+
)
|
|
1172
1477
|
encrypted_password = database_reference_settings.password.get_secret_value()
|
|
1173
1478
|
|
|
1174
1479
|
def _func(df: FlowDataEngine):
|
|
@@ -1177,14 +1482,20 @@ class FlowGraph:
|
|
|
1177
1482
|
sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
|
|
1178
1483
|
node_database_writer=node_database_writer,
|
|
1179
1484
|
password=encrypted_password,
|
|
1180
|
-
table_name=(
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1485
|
+
table_name=(
|
|
1486
|
+
database_settings.schema_name + "." + database_settings.table_name
|
|
1487
|
+
if database_settings.schema_name
|
|
1488
|
+
else database_settings.table_name
|
|
1489
|
+
),
|
|
1490
|
+
database_reference_settings=(
|
|
1491
|
+
database_reference_settings if database_settings.connection_mode == "reference" else None
|
|
1492
|
+
),
|
|
1493
|
+
lf=df.data_frame,
|
|
1185
1494
|
)
|
|
1186
1495
|
)
|
|
1187
|
-
external_database_writer = ExternalDatabaseWriter(
|
|
1496
|
+
external_database_writer = ExternalDatabaseWriter(
|
|
1497
|
+
database_external_write_settings, wait_on_completion=False
|
|
1498
|
+
)
|
|
1188
1499
|
node._fetch_cached_df = external_database_writer
|
|
1189
1500
|
external_database_writer.get_result()
|
|
1190
1501
|
return df
|
|
@@ -1211,56 +1522,64 @@ class FlowGraph:
|
|
|
1211
1522
|
"""
|
|
1212
1523
|
|
|
1213
1524
|
logger.info("Adding database reader")
|
|
1214
|
-
node_type =
|
|
1525
|
+
node_type = "database_reader"
|
|
1215
1526
|
database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
|
|
1216
|
-
database_connection:
|
|
1217
|
-
if database_settings.connection_mode ==
|
|
1527
|
+
database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
|
|
1528
|
+
if database_settings.connection_mode == "inline":
|
|
1218
1529
|
database_connection: input_schema.DatabaseConnection = database_settings.database_connection
|
|
1219
|
-
encrypted_password = get_encrypted_secret(
|
|
1220
|
-
|
|
1530
|
+
encrypted_password = get_encrypted_secret(
|
|
1531
|
+
current_user_id=node_database_reader.user_id, secret_name=database_connection.password_ref
|
|
1532
|
+
)
|
|
1221
1533
|
if encrypted_password is None:
|
|
1222
1534
|
raise HTTPException(status_code=400, detail="Password not found")
|
|
1223
1535
|
else:
|
|
1224
|
-
database_reference_settings = get_local_database_connection(
|
|
1225
|
-
|
|
1536
|
+
database_reference_settings = get_local_database_connection(
|
|
1537
|
+
database_settings.database_connection_name, node_database_reader.user_id
|
|
1538
|
+
)
|
|
1226
1539
|
database_connection = database_reference_settings
|
|
1227
1540
|
encrypted_password = database_reference_settings.password.get_secret_value()
|
|
1228
1541
|
|
|
1229
1542
|
def _func():
|
|
1230
|
-
sql_source = BaseSqlSource(
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1543
|
+
sql_source = BaseSqlSource(
|
|
1544
|
+
query=None if database_settings.query_mode == "table" else database_settings.query,
|
|
1545
|
+
table_name=database_settings.table_name,
|
|
1546
|
+
schema_name=database_settings.schema_name,
|
|
1547
|
+
fields=node_database_reader.fields,
|
|
1548
|
+
)
|
|
1235
1549
|
database_external_read_settings = (
|
|
1236
1550
|
sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
|
|
1237
1551
|
node_database_reader=node_database_reader,
|
|
1238
1552
|
password=encrypted_password,
|
|
1239
1553
|
query=sql_source.query,
|
|
1240
|
-
database_reference_settings=(
|
|
1241
|
-
|
|
1554
|
+
database_reference_settings=(
|
|
1555
|
+
database_reference_settings if database_settings.connection_mode == "reference" else None
|
|
1556
|
+
),
|
|
1242
1557
|
)
|
|
1243
1558
|
)
|
|
1244
1559
|
|
|
1245
|
-
external_database_fetcher = ExternalDatabaseFetcher(
|
|
1560
|
+
external_database_fetcher = ExternalDatabaseFetcher(
|
|
1561
|
+
database_external_read_settings, wait_on_completion=False
|
|
1562
|
+
)
|
|
1246
1563
|
node._fetch_cached_df = external_database_fetcher
|
|
1247
1564
|
fl = FlowDataEngine(external_database_fetcher.get_result())
|
|
1248
1565
|
node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
|
|
1249
1566
|
return fl
|
|
1250
1567
|
|
|
1251
1568
|
def schema_callback():
|
|
1252
|
-
sql_source = SqlSource(
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1569
|
+
sql_source = SqlSource(
|
|
1570
|
+
connection_string=sql_utils.construct_sql_uri(
|
|
1571
|
+
database_type=database_connection.database_type,
|
|
1572
|
+
host=database_connection.host,
|
|
1573
|
+
port=database_connection.port,
|
|
1574
|
+
database=database_connection.database,
|
|
1575
|
+
username=database_connection.username,
|
|
1576
|
+
password=decrypt_secret(encrypted_password),
|
|
1577
|
+
),
|
|
1578
|
+
query=None if database_settings.query_mode == "table" else database_settings.query,
|
|
1579
|
+
table_name=database_settings.table_name,
|
|
1580
|
+
schema_name=database_settings.schema_name,
|
|
1581
|
+
fields=node_database_reader.fields,
|
|
1582
|
+
)
|
|
1264
1583
|
return sql_source.get_schema()
|
|
1265
1584
|
|
|
1266
1585
|
node = self.get_node(node_database_reader.node_id)
|
|
@@ -1270,16 +1589,20 @@ class FlowGraph:
|
|
|
1270
1589
|
node.function = _func
|
|
1271
1590
|
node.setting_input = node_database_reader
|
|
1272
1591
|
node.node_settings.cache_results = node_database_reader.cache_results
|
|
1273
|
-
|
|
1274
|
-
self._flow_starts.append(node)
|
|
1592
|
+
self.add_node_to_starting_list(node)
|
|
1275
1593
|
node.schema_callback = schema_callback
|
|
1276
1594
|
else:
|
|
1277
|
-
node = FlowNode(
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1595
|
+
node = FlowNode(
|
|
1596
|
+
node_database_reader.node_id,
|
|
1597
|
+
function=_func,
|
|
1598
|
+
setting_input=node_database_reader,
|
|
1599
|
+
name=node_type,
|
|
1600
|
+
node_type=node_type,
|
|
1601
|
+
parent_uuid=self.uuid,
|
|
1602
|
+
schema_callback=schema_callback,
|
|
1603
|
+
)
|
|
1281
1604
|
self._node_db[node_database_reader.node_id] = node
|
|
1282
|
-
self.
|
|
1605
|
+
self.add_node_to_starting_list(node)
|
|
1283
1606
|
self._node_ids.append(node_database_reader.node_id)
|
|
1284
1607
|
|
|
1285
1608
|
def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
@@ -1290,7 +1613,7 @@ class FlowGraph:
|
|
|
1290
1613
|
Args:
|
|
1291
1614
|
external_source_input: The settings for the external SQL source node.
|
|
1292
1615
|
"""
|
|
1293
|
-
logger.info(
|
|
1616
|
+
logger.info("Adding sql source")
|
|
1294
1617
|
self.add_external_source(external_source_input)
|
|
1295
1618
|
|
|
1296
1619
|
def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
|
|
@@ -1301,19 +1624,20 @@ class FlowGraph:
|
|
|
1301
1624
|
"""
|
|
1302
1625
|
|
|
1303
1626
|
node_type = "cloud_storage_writer"
|
|
1627
|
+
|
|
1304
1628
|
def _func(df: FlowDataEngine):
|
|
1305
1629
|
df.lazy = True
|
|
1306
|
-
execute_remote = self.execution_location !=
|
|
1630
|
+
execute_remote = self.execution_location != "local"
|
|
1307
1631
|
cloud_connection_settings = get_cloud_connection_settings(
|
|
1308
1632
|
connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
|
|
1309
1633
|
user_id=node_cloud_storage_writer.user_id,
|
|
1310
|
-
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
|
|
1634
|
+
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode,
|
|
1311
1635
|
)
|
|
1312
1636
|
full_cloud_storage_connection = FullCloudStorageConnection(
|
|
1313
1637
|
storage_type=cloud_connection_settings.storage_type,
|
|
1314
1638
|
auth_method=cloud_connection_settings.auth_method,
|
|
1315
1639
|
aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
|
|
1316
|
-
**CloudStorageReader.get_storage_options(cloud_connection_settings)
|
|
1640
|
+
**CloudStorageReader.get_storage_options(cloud_connection_settings),
|
|
1317
1641
|
)
|
|
1318
1642
|
if execute_remote:
|
|
1319
1643
|
settings = get_cloud_storage_write_settings_worker_interface(
|
|
@@ -1321,7 +1645,8 @@ class FlowGraph:
|
|
|
1321
1645
|
connection=full_cloud_storage_connection,
|
|
1322
1646
|
lf=df.data_frame,
|
|
1323
1647
|
flowfile_node_id=node_cloud_storage_writer.node_id,
|
|
1324
|
-
flowfile_flow_id=self.flow_id
|
|
1648
|
+
flowfile_flow_id=self.flow_id,
|
|
1649
|
+
)
|
|
1325
1650
|
external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
|
|
1326
1651
|
node._fetch_cached_df = external_database_writer
|
|
1327
1652
|
external_database_writer.get_result()
|
|
@@ -1347,7 +1672,7 @@ class FlowGraph:
|
|
|
1347
1672
|
node_type=node_type,
|
|
1348
1673
|
setting_input=node_cloud_storage_writer,
|
|
1349
1674
|
schema_callback=schema_callback,
|
|
1350
|
-
input_node_ids=[node_cloud_storage_writer.depending_on_id]
|
|
1675
|
+
input_node_ids=[node_cloud_storage_writer.depending_on_id],
|
|
1351
1676
|
)
|
|
1352
1677
|
|
|
1353
1678
|
node = self.get_node(node_cloud_storage_writer.node_id)
|
|
@@ -1365,49 +1690,53 @@ class FlowGraph:
|
|
|
1365
1690
|
def _func():
|
|
1366
1691
|
logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1367
1692
|
self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1368
|
-
settings = CloudStorageReadSettingsInternal(
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1693
|
+
settings = CloudStorageReadSettingsInternal(
|
|
1694
|
+
read_settings=cloud_storage_read_settings,
|
|
1695
|
+
connection=get_cloud_connection_settings(
|
|
1696
|
+
connection_name=cloud_storage_read_settings.connection_name,
|
|
1697
|
+
user_id=node_cloud_storage_reader.user_id,
|
|
1698
|
+
auth_mode=cloud_storage_read_settings.auth_mode,
|
|
1699
|
+
),
|
|
1700
|
+
)
|
|
1374
1701
|
fl = FlowDataEngine.from_cloud_storage_obj(settings)
|
|
1375
1702
|
return fl
|
|
1376
1703
|
|
|
1377
|
-
node = self.add_node_step(
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1704
|
+
node = self.add_node_step(
|
|
1705
|
+
node_id=node_cloud_storage_reader.node_id,
|
|
1706
|
+
function=_func,
|
|
1707
|
+
cache_results=node_cloud_storage_reader.cache_results,
|
|
1708
|
+
setting_input=node_cloud_storage_reader,
|
|
1709
|
+
node_type=node_type,
|
|
1710
|
+
)
|
|
1711
|
+
self.add_node_to_starting_list(node)
|
|
1385
1712
|
|
|
1386
|
-
def add_external_source(self,
|
|
1387
|
-
external_source_input: input_schema.NodeExternalSource):
|
|
1713
|
+
def add_external_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
1388
1714
|
"""Adds a node for a custom external data source.
|
|
1389
1715
|
|
|
1390
1716
|
Args:
|
|
1391
1717
|
external_source_input: The settings for the external source node.
|
|
1392
1718
|
"""
|
|
1393
1719
|
|
|
1394
|
-
node_type =
|
|
1720
|
+
node_type = "external_source"
|
|
1395
1721
|
external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
|
|
1396
|
-
source_settings =
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1722
|
+
source_settings = getattr(
|
|
1723
|
+
input_schema, snake_case_to_camel_case(external_source_input.identifier)
|
|
1724
|
+
).model_validate(external_source_input.source_settings)
|
|
1725
|
+
if hasattr(external_source_script, "initial_getter"):
|
|
1726
|
+
initial_getter = external_source_script.initial_getter(source_settings)
|
|
1400
1727
|
else:
|
|
1401
1728
|
initial_getter = None
|
|
1402
1729
|
data_getter = external_source_script.getter(source_settings)
|
|
1403
|
-
external_source = data_source_factory(
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1730
|
+
external_source = data_source_factory(
|
|
1731
|
+
source_type="custom",
|
|
1732
|
+
data_getter=data_getter,
|
|
1733
|
+
initial_data_getter=initial_getter,
|
|
1734
|
+
orientation=external_source_input.source_settings.orientation,
|
|
1735
|
+
schema=None,
|
|
1736
|
+
)
|
|
1408
1737
|
|
|
1409
1738
|
def _func():
|
|
1410
|
-
logger.info(
|
|
1739
|
+
logger.info("Calling external source")
|
|
1411
1740
|
fl = FlowDataEngine.create_from_external_source(external_source=external_source)
|
|
1412
1741
|
external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
|
|
1413
1742
|
return fl
|
|
@@ -1419,31 +1748,39 @@ class FlowGraph:
|
|
|
1419
1748
|
node.function = _func
|
|
1420
1749
|
node.setting_input = external_source_input
|
|
1421
1750
|
node.node_settings.cache_results = external_source_input.cache_results
|
|
1422
|
-
|
|
1423
|
-
|
|
1751
|
+
self.add_node_to_starting_list(node)
|
|
1752
|
+
|
|
1424
1753
|
else:
|
|
1425
|
-
node = FlowNode(
|
|
1426
|
-
|
|
1427
|
-
|
|
1754
|
+
node = FlowNode(
|
|
1755
|
+
external_source_input.node_id,
|
|
1756
|
+
function=_func,
|
|
1757
|
+
setting_input=external_source_input,
|
|
1758
|
+
name=node_type,
|
|
1759
|
+
node_type=node_type,
|
|
1760
|
+
parent_uuid=self.uuid,
|
|
1761
|
+
)
|
|
1428
1762
|
self._node_db[external_source_input.node_id] = node
|
|
1429
|
-
self.
|
|
1763
|
+
self.add_node_to_starting_list(node)
|
|
1430
1764
|
self._node_ids.append(external_source_input.node_id)
|
|
1431
1765
|
if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
|
|
1432
|
-
logger.info(
|
|
1766
|
+
logger.info("Using provided schema in the node")
|
|
1433
1767
|
|
|
1434
1768
|
def schema_callback():
|
|
1435
|
-
return [
|
|
1436
|
-
|
|
1769
|
+
return [
|
|
1770
|
+
FlowfileColumn.from_input(f.name, f.data_type) for f in external_source_input.source_settings.fields
|
|
1771
|
+
]
|
|
1437
1772
|
|
|
1438
1773
|
node.schema_callback = schema_callback
|
|
1439
1774
|
else:
|
|
1440
|
-
logger.warning(
|
|
1775
|
+
logger.warning("Removing schema")
|
|
1441
1776
|
node._schema_callback = None
|
|
1442
|
-
self.add_node_step(
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1777
|
+
self.add_node_step(
|
|
1778
|
+
node_id=external_source_input.node_id,
|
|
1779
|
+
function=_func,
|
|
1780
|
+
input_columns=[],
|
|
1781
|
+
node_type=node_type,
|
|
1782
|
+
setting_input=external_source_input,
|
|
1783
|
+
)
|
|
1447
1784
|
|
|
1448
1785
|
def add_read(self, input_file: input_schema.NodeRead):
|
|
1449
1786
|
"""Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
|
|
@@ -1451,24 +1788,29 @@ class FlowGraph:
|
|
|
1451
1788
|
Args:
|
|
1452
1789
|
input_file: The settings for the read operation.
|
|
1453
1790
|
"""
|
|
1454
|
-
|
|
1455
|
-
|
|
1791
|
+
if (
|
|
1792
|
+
input_file.received_file.file_type in ("xlsx", "excel")
|
|
1793
|
+
and input_file.received_file.table_settings.sheet_name == ""
|
|
1794
|
+
):
|
|
1456
1795
|
sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
|
|
1457
|
-
input_file.received_file.sheet_name = sheet_name
|
|
1796
|
+
input_file.received_file.table_settings.sheet_name = sheet_name
|
|
1458
1797
|
|
|
1459
1798
|
received_file = input_file.received_file
|
|
1460
1799
|
input_file.received_file.set_absolute_filepath()
|
|
1461
1800
|
|
|
1462
1801
|
def _func():
|
|
1463
1802
|
input_file.received_file.set_absolute_filepath()
|
|
1464
|
-
if input_file.received_file.file_type ==
|
|
1803
|
+
if input_file.received_file.file_type == "parquet":
|
|
1465
1804
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1466
|
-
elif
|
|
1805
|
+
elif (
|
|
1806
|
+
input_file.received_file.file_type == "csv"
|
|
1807
|
+
and "utf" in input_file.received_file.table_settings.encoding
|
|
1808
|
+
):
|
|
1467
1809
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1468
1810
|
else:
|
|
1469
|
-
input_data = FlowDataEngine.create_from_path_worker(
|
|
1470
|
-
|
|
1471
|
-
|
|
1811
|
+
input_data = FlowDataEngine.create_from_path_worker(
|
|
1812
|
+
input_file.received_file, node_id=input_file.node_id, flow_id=self.flow_id
|
|
1813
|
+
)
|
|
1472
1814
|
input_data.name = input_file.received_file.name
|
|
1473
1815
|
return input_data
|
|
1474
1816
|
|
|
@@ -1476,51 +1818,57 @@ class FlowGraph:
|
|
|
1476
1818
|
schema_callback = None
|
|
1477
1819
|
if node:
|
|
1478
1820
|
start_hash = node.hash
|
|
1479
|
-
node.node_type =
|
|
1480
|
-
node.name =
|
|
1821
|
+
node.node_type = "read"
|
|
1822
|
+
node.name = "read"
|
|
1481
1823
|
node.function = _func
|
|
1482
1824
|
node.setting_input = input_file
|
|
1483
|
-
|
|
1484
|
-
self._flow_starts.append(node)
|
|
1825
|
+
self.add_node_to_starting_list(node)
|
|
1485
1826
|
|
|
1486
1827
|
if start_hash != node.hash:
|
|
1487
|
-
logger.info(
|
|
1828
|
+
logger.info("Hash changed, updating schema")
|
|
1488
1829
|
if len(received_file.fields) > 0:
|
|
1489
1830
|
# If the file has fields defined, we can use them to create the schema
|
|
1490
1831
|
def schema_callback():
|
|
1491
1832
|
return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
|
|
1492
1833
|
|
|
1493
|
-
elif input_file.received_file.file_type in (
|
|
1834
|
+
elif input_file.received_file.file_type in ("csv", "json", "parquet"):
|
|
1494
1835
|
# everything that can be scanned by polars
|
|
1495
1836
|
def schema_callback():
|
|
1496
1837
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1497
1838
|
return input_data.schema
|
|
1498
1839
|
|
|
1499
|
-
elif input_file.received_file.file_type in (
|
|
1840
|
+
elif input_file.received_file.file_type in ("xlsx", "excel"):
|
|
1500
1841
|
# If the file is an Excel file, we need to use the openpyxl engine to read the schema
|
|
1501
|
-
schema_callback = get_xlsx_schema_callback(
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1842
|
+
schema_callback = get_xlsx_schema_callback(
|
|
1843
|
+
engine="openpyxl",
|
|
1844
|
+
file_path=received_file.file_path,
|
|
1845
|
+
sheet_name=received_file.table_settings.sheet_name,
|
|
1846
|
+
start_row=received_file.table_settings.start_row,
|
|
1847
|
+
end_row=received_file.table_settings.end_row,
|
|
1848
|
+
start_column=received_file.table_settings.start_column,
|
|
1849
|
+
end_column=received_file.table_settings.end_column,
|
|
1850
|
+
has_headers=received_file.table_settings.has_headers,
|
|
1851
|
+
)
|
|
1509
1852
|
else:
|
|
1510
1853
|
schema_callback = None
|
|
1511
1854
|
else:
|
|
1512
|
-
node = FlowNode(
|
|
1513
|
-
|
|
1514
|
-
|
|
1855
|
+
node = FlowNode(
|
|
1856
|
+
input_file.node_id,
|
|
1857
|
+
function=_func,
|
|
1858
|
+
setting_input=input_file,
|
|
1859
|
+
name="read",
|
|
1860
|
+
node_type="read",
|
|
1861
|
+
parent_uuid=self.uuid,
|
|
1862
|
+
)
|
|
1515
1863
|
self._node_db[input_file.node_id] = node
|
|
1516
|
-
self.
|
|
1864
|
+
self.add_node_to_starting_list(node)
|
|
1517
1865
|
self._node_ids.append(input_file.node_id)
|
|
1518
1866
|
|
|
1519
1867
|
if schema_callback is not None:
|
|
1520
1868
|
node.schema_callback = schema_callback
|
|
1521
1869
|
return self
|
|
1522
1870
|
|
|
1523
|
-
def add_datasource(self, input_file:
|
|
1871
|
+
def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
|
|
1524
1872
|
"""Adds a data source node to the graph.
|
|
1525
1873
|
|
|
1526
1874
|
This method serves as a factory for creating starting nodes, handling both
|
|
@@ -1534,25 +1882,30 @@ class FlowGraph:
|
|
|
1534
1882
|
"""
|
|
1535
1883
|
if isinstance(input_file, input_schema.NodeManualInput):
|
|
1536
1884
|
input_data = FlowDataEngine(input_file.raw_data_format)
|
|
1537
|
-
ref =
|
|
1885
|
+
ref = "manual_input"
|
|
1538
1886
|
else:
|
|
1539
1887
|
input_data = FlowDataEngine(path_ref=input_file.file_ref)
|
|
1540
|
-
ref =
|
|
1888
|
+
ref = "datasource"
|
|
1541
1889
|
node = self.get_node(input_file.node_id)
|
|
1542
1890
|
if node:
|
|
1543
1891
|
node.node_type = ref
|
|
1544
1892
|
node.name = ref
|
|
1545
1893
|
node.function = input_data
|
|
1546
1894
|
node.setting_input = input_file
|
|
1547
|
-
|
|
1548
|
-
|
|
1895
|
+
self.add_node_to_starting_list(node)
|
|
1896
|
+
|
|
1549
1897
|
else:
|
|
1550
1898
|
input_data.collect()
|
|
1551
|
-
node = FlowNode(
|
|
1552
|
-
|
|
1553
|
-
|
|
1899
|
+
node = FlowNode(
|
|
1900
|
+
input_file.node_id,
|
|
1901
|
+
function=input_data,
|
|
1902
|
+
setting_input=input_file,
|
|
1903
|
+
name=ref,
|
|
1904
|
+
node_type=ref,
|
|
1905
|
+
parent_uuid=self.uuid,
|
|
1906
|
+
)
|
|
1554
1907
|
self._node_db[input_file.node_id] = node
|
|
1555
|
-
self.
|
|
1908
|
+
self.add_node_to_starting_list(node)
|
|
1556
1909
|
self._node_ids.append(input_file.node_id)
|
|
1557
1910
|
return self
|
|
1558
1911
|
|
|
@@ -1567,7 +1920,7 @@ class FlowGraph:
|
|
|
1567
1920
|
self.add_datasource(input_file)
|
|
1568
1921
|
|
|
1569
1922
|
@property
|
|
1570
|
-
def nodes(self) ->
|
|
1923
|
+
def nodes(self) -> list[FlowNode]:
|
|
1571
1924
|
"""Gets a list of all FlowNode objects in the graph."""
|
|
1572
1925
|
|
|
1573
1926
|
return list(self._node_db.values())
|
|
@@ -1577,7 +1930,7 @@ class FlowGraph:
|
|
|
1577
1930
|
"""Gets the current execution mode ('Development' or 'Performance')."""
|
|
1578
1931
|
return self.flow_settings.execution_mode
|
|
1579
1932
|
|
|
1580
|
-
def get_implicit_starter_nodes(self) ->
|
|
1933
|
+
def get_implicit_starter_nodes(self) -> list[FlowNode]:
|
|
1581
1934
|
"""Finds nodes that can act as starting points but are not explicitly defined as such.
|
|
1582
1935
|
|
|
1583
1936
|
Some nodes, like the Polars Code node, can function without an input. This
|
|
@@ -1623,17 +1976,31 @@ class FlowGraph:
|
|
|
1623
1976
|
if not flow_node:
|
|
1624
1977
|
raise Exception("Node not found found")
|
|
1625
1978
|
skip_nodes, execution_order = compute_execution_plan(
|
|
1626
|
-
nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
|
|
1979
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
1627
1980
|
)
|
|
1628
1981
|
if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
|
|
1629
1982
|
raise Exception("Node can not be executed because it does not have it's inputs")
|
|
1630
1983
|
|
|
1631
|
-
def create_initial_run_information(self, number_of_nodes: int,
|
|
1632
|
-
|
|
1984
|
+
def create_initial_run_information(self, number_of_nodes: int, run_type: Literal["fetch_one", "full_run"]):
|
|
1985
|
+
return RunInformation(
|
|
1986
|
+
flow_id=self.flow_id,
|
|
1987
|
+
start_time=datetime.datetime.now(),
|
|
1988
|
+
end_time=None,
|
|
1989
|
+
success=None,
|
|
1990
|
+
number_of_nodes=number_of_nodes,
|
|
1991
|
+
node_step_result=[],
|
|
1992
|
+
run_type=run_type,
|
|
1993
|
+
)
|
|
1994
|
+
|
|
1995
|
+
def create_empty_run_information(self) -> RunInformation:
|
|
1633
1996
|
return RunInformation(
|
|
1634
|
-
flow_id=self.flow_id,
|
|
1635
|
-
|
|
1636
|
-
|
|
1997
|
+
flow_id=self.flow_id,
|
|
1998
|
+
start_time=None,
|
|
1999
|
+
end_time=None,
|
|
2000
|
+
success=None,
|
|
2001
|
+
number_of_nodes=0,
|
|
2002
|
+
node_step_result=[],
|
|
2003
|
+
run_type="init",
|
|
1637
2004
|
)
|
|
1638
2005
|
|
|
1639
2006
|
def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
|
|
@@ -1647,14 +2014,16 @@ class FlowGraph:
|
|
|
1647
2014
|
self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
|
|
1648
2015
|
node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
|
|
1649
2016
|
node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
|
|
1650
|
-
logger.info(f
|
|
2017
|
+
logger.info(f"Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}")
|
|
1651
2018
|
try:
|
|
1652
2019
|
self.latest_run_info.node_step_result.append(node_result)
|
|
1653
|
-
flow_node.execute_node(
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
2020
|
+
flow_node.execute_node(
|
|
2021
|
+
run_location=self.flow_settings.execution_location,
|
|
2022
|
+
performance_mode=False,
|
|
2023
|
+
node_logger=node_logger,
|
|
2024
|
+
optimize_for_downstream=False,
|
|
2025
|
+
reset_cache=True,
|
|
2026
|
+
)
|
|
1658
2027
|
node_result.error = str(flow_node.results.errors)
|
|
1659
2028
|
if self.flow_settings.is_canceled:
|
|
1660
2029
|
node_result.success = None
|
|
@@ -1669,12 +2038,12 @@ class FlowGraph:
|
|
|
1669
2038
|
self.flow_settings.is_running = False
|
|
1670
2039
|
return self.get_run_info()
|
|
1671
2040
|
except Exception as e:
|
|
1672
|
-
node_result.error =
|
|
2041
|
+
node_result.error = "Node did not run"
|
|
1673
2042
|
node_result.success = False
|
|
1674
2043
|
node_result.end_timestamp = time()
|
|
1675
2044
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1676
2045
|
node_result.is_running = False
|
|
1677
|
-
node_logger.error(f
|
|
2046
|
+
node_logger.error(f"Error in node {flow_node.node_id}: {e}")
|
|
1678
2047
|
finally:
|
|
1679
2048
|
self.flow_settings.is_running = False
|
|
1680
2049
|
|
|
@@ -1691,39 +2060,38 @@ class FlowGraph:
|
|
|
1691
2060
|
Exception: If the flow is already running.
|
|
1692
2061
|
"""
|
|
1693
2062
|
if self.flow_settings.is_running:
|
|
1694
|
-
raise Exception(
|
|
2063
|
+
raise Exception("Flow is already running")
|
|
1695
2064
|
try:
|
|
1696
|
-
|
|
1697
2065
|
self.flow_settings.is_running = True
|
|
1698
2066
|
self.flow_settings.is_canceled = False
|
|
1699
2067
|
self.flow_logger.clear_log_file()
|
|
1700
|
-
self.flow_logger.info(
|
|
1701
|
-
|
|
2068
|
+
self.flow_logger.info("Starting to run flowfile flow...")
|
|
1702
2069
|
skip_nodes, execution_order = compute_execution_plan(
|
|
1703
|
-
nodes=self.nodes,
|
|
1704
|
-
flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
|
|
2070
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
1705
2071
|
)
|
|
1706
2072
|
|
|
1707
2073
|
self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
|
|
1708
2074
|
|
|
1709
2075
|
skip_node_message(self.flow_logger, skip_nodes)
|
|
1710
2076
|
execution_order_message(self.flow_logger, execution_order)
|
|
1711
|
-
performance_mode = self.flow_settings.execution_mode ==
|
|
2077
|
+
performance_mode = self.flow_settings.execution_mode == "Performance"
|
|
1712
2078
|
|
|
1713
2079
|
for node in execution_order:
|
|
1714
2080
|
node_logger = self.flow_logger.get_node_logger(node.node_id)
|
|
1715
2081
|
if self.flow_settings.is_canceled:
|
|
1716
|
-
self.flow_logger.info(
|
|
2082
|
+
self.flow_logger.info("Flow canceled")
|
|
1717
2083
|
break
|
|
1718
2084
|
if node in skip_nodes:
|
|
1719
|
-
node_logger.info(f
|
|
2085
|
+
node_logger.info(f"Skipping node {node.node_id}")
|
|
1720
2086
|
continue
|
|
1721
2087
|
node_result = NodeResult(node_id=node.node_id, node_name=node.name)
|
|
1722
2088
|
self.latest_run_info.node_step_result.append(node_result)
|
|
1723
|
-
logger.info(f
|
|
1724
|
-
node.execute_node(
|
|
1725
|
-
|
|
1726
|
-
|
|
2089
|
+
logger.info(f"Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}")
|
|
2090
|
+
node.execute_node(
|
|
2091
|
+
run_location=self.flow_settings.execution_location,
|
|
2092
|
+
performance_mode=performance_mode,
|
|
2093
|
+
node_logger=node_logger,
|
|
2094
|
+
)
|
|
1727
2095
|
try:
|
|
1728
2096
|
node_result.error = str(node.results.errors)
|
|
1729
2097
|
if self.flow_settings.is_canceled:
|
|
@@ -1736,28 +2104,29 @@ class FlowGraph:
|
|
|
1736
2104
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1737
2105
|
node_result.is_running = False
|
|
1738
2106
|
except Exception as e:
|
|
1739
|
-
node_result.error =
|
|
2107
|
+
node_result.error = "Node did not run"
|
|
1740
2108
|
node_result.success = False
|
|
1741
2109
|
node_result.end_timestamp = time()
|
|
1742
2110
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1743
2111
|
node_result.is_running = False
|
|
1744
|
-
node_logger.error(f
|
|
2112
|
+
node_logger.error(f"Error in node {node.node_id}: {e}")
|
|
1745
2113
|
if not node_result.success:
|
|
1746
2114
|
skip_nodes.extend(list(node.get_all_dependent_nodes()))
|
|
1747
|
-
node_logger.info(f
|
|
2115
|
+
node_logger.info(f"Completed node with success: {node_result.success}")
|
|
1748
2116
|
self.latest_run_info.nodes_completed += 1
|
|
1749
|
-
self.
|
|
2117
|
+
self.latest_run_info.end_time = datetime.datetime.now()
|
|
2118
|
+
self.flow_logger.info("Flow completed!")
|
|
1750
2119
|
self.end_datetime = datetime.datetime.now()
|
|
1751
2120
|
self.flow_settings.is_running = False
|
|
1752
2121
|
if self.flow_settings.is_canceled:
|
|
1753
|
-
self.flow_logger.info(
|
|
2122
|
+
self.flow_logger.info("Flow canceled")
|
|
1754
2123
|
return self.get_run_info()
|
|
1755
2124
|
except Exception as e:
|
|
1756
2125
|
raise e
|
|
1757
2126
|
finally:
|
|
1758
2127
|
self.flow_settings.is_running = False
|
|
1759
2128
|
|
|
1760
|
-
def get_run_info(self) -> RunInformation
|
|
2129
|
+
def get_run_info(self) -> RunInformation:
|
|
1761
2130
|
"""Gets a summary of the most recent graph execution.
|
|
1762
2131
|
|
|
1763
2132
|
Returns:
|
|
@@ -1765,7 +2134,7 @@ class FlowGraph:
|
|
|
1765
2134
|
"""
|
|
1766
2135
|
is_running = self.flow_settings.is_running
|
|
1767
2136
|
if self.latest_run_info is None:
|
|
1768
|
-
return
|
|
2137
|
+
return self.create_empty_run_information()
|
|
1769
2138
|
|
|
1770
2139
|
elif not is_running and self.latest_run_info.success is not None:
|
|
1771
2140
|
return self.latest_run_info
|
|
@@ -1776,7 +2145,7 @@ class FlowGraph:
|
|
|
1776
2145
|
return run_info
|
|
1777
2146
|
|
|
1778
2147
|
@property
|
|
1779
|
-
def node_connections(self) ->
|
|
2148
|
+
def node_connections(self) -> list[tuple[int, int]]:
|
|
1780
2149
|
"""Computes and returns a list of all connections in the graph.
|
|
1781
2150
|
|
|
1782
2151
|
Returns:
|
|
@@ -1786,8 +2155,9 @@ class FlowGraph:
|
|
|
1786
2155
|
for node in self.nodes:
|
|
1787
2156
|
outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
|
|
1788
2157
|
incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
|
|
1789
|
-
node_connections = [
|
|
1790
|
-
|
|
2158
|
+
node_connections = [
|
|
2159
|
+
c for c in outgoing_connections + incoming_connections if (c[0] is not None and c[1] is not None)
|
|
2160
|
+
]
|
|
1791
2161
|
for node_connection in node_connections:
|
|
1792
2162
|
if node_connection not in connections:
|
|
1793
2163
|
connections.add(node_connection)
|
|
@@ -1806,22 +2176,60 @@ class FlowGraph:
|
|
|
1806
2176
|
node = self._node_db[node_id]
|
|
1807
2177
|
return node.get_node_data(flow_id=self.flow_id, include_example=include_example)
|
|
1808
2178
|
|
|
2179
|
+
def get_flowfile_data(self) -> schemas.FlowfileData:
|
|
2180
|
+
start_node_ids = {v.node_id for v in self._flow_starts}
|
|
2181
|
+
|
|
2182
|
+
nodes = []
|
|
2183
|
+
for node in self.nodes:
|
|
2184
|
+
node_info = node.get_node_information()
|
|
2185
|
+
flowfile_node = schemas.FlowfileNode(
|
|
2186
|
+
id=node_info.id,
|
|
2187
|
+
type=node_info.type,
|
|
2188
|
+
is_start_node=node.node_id in start_node_ids,
|
|
2189
|
+
description=node_info.description,
|
|
2190
|
+
x_position=int(node_info.x_position),
|
|
2191
|
+
y_position=int(node_info.y_position),
|
|
2192
|
+
left_input_id=node_info.left_input_id,
|
|
2193
|
+
right_input_id=node_info.right_input_id,
|
|
2194
|
+
input_ids=node_info.input_ids,
|
|
2195
|
+
outputs=node_info.outputs,
|
|
2196
|
+
setting_input=node_info.setting_input,
|
|
2197
|
+
)
|
|
2198
|
+
nodes.append(flowfile_node)
|
|
2199
|
+
|
|
2200
|
+
settings = schemas.FlowfileSettings(
|
|
2201
|
+
description=self.flow_settings.description,
|
|
2202
|
+
execution_mode=self.flow_settings.execution_mode,
|
|
2203
|
+
execution_location=self.flow_settings.execution_location,
|
|
2204
|
+
auto_save=self.flow_settings.auto_save,
|
|
2205
|
+
show_detailed_progress=self.flow_settings.show_detailed_progress,
|
|
2206
|
+
)
|
|
2207
|
+
return schemas.FlowfileData(
|
|
2208
|
+
flowfile_version=__version__,
|
|
2209
|
+
flowfile_id=self.flow_id,
|
|
2210
|
+
flowfile_name=self.__name__,
|
|
2211
|
+
flowfile_settings=settings,
|
|
2212
|
+
nodes=nodes,
|
|
2213
|
+
)
|
|
2214
|
+
|
|
1809
2215
|
def get_node_storage(self) -> schemas.FlowInformation:
|
|
1810
2216
|
"""Serializes the entire graph's state into a storable format.
|
|
1811
2217
|
|
|
1812
2218
|
Returns:
|
|
1813
2219
|
A FlowInformation object representing the complete graph.
|
|
1814
2220
|
"""
|
|
1815
|
-
node_information = {
|
|
1816
|
-
|
|
2221
|
+
node_information = {
|
|
2222
|
+
node.node_id: node.get_node_information() for node in self.nodes if node.is_setup and node.is_correct
|
|
2223
|
+
}
|
|
1817
2224
|
|
|
1818
|
-
return schemas.FlowInformation(
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
2225
|
+
return schemas.FlowInformation(
|
|
2226
|
+
flow_id=self.flow_id,
|
|
2227
|
+
flow_name=self.__name__,
|
|
2228
|
+
flow_settings=self.flow_settings,
|
|
2229
|
+
data=node_information,
|
|
2230
|
+
node_starts=[v.node_id for v in self._flow_starts],
|
|
2231
|
+
node_connections=self.node_connections,
|
|
2232
|
+
)
|
|
1825
2233
|
|
|
1826
2234
|
def cancel(self):
|
|
1827
2235
|
"""Cancels an ongoing graph execution."""
|
|
@@ -1838,19 +2246,67 @@ class FlowGraph:
|
|
|
1838
2246
|
for node in self.nodes:
|
|
1839
2247
|
node.remove_cache()
|
|
1840
2248
|
|
|
2249
|
+
def _handle_flow_renaming(self, new_name: str, new_path: Path):
|
|
2250
|
+
"""
|
|
2251
|
+
Handle the rename of a flow when it is being saved.
|
|
2252
|
+
"""
|
|
2253
|
+
if (
|
|
2254
|
+
self.flow_settings
|
|
2255
|
+
and self.flow_settings.path
|
|
2256
|
+
and Path(self.flow_settings.path).absolute() != new_path.absolute()
|
|
2257
|
+
):
|
|
2258
|
+
self.__name__ = new_name
|
|
2259
|
+
self.flow_settings.save_location = str(new_path.absolute())
|
|
2260
|
+
self.flow_settings.name = new_name
|
|
2261
|
+
if self.flow_settings and not self.flow_settings.save_location:
|
|
2262
|
+
self.flow_settings.save_location = str(new_path.absolute())
|
|
2263
|
+
self.__name__ = new_name
|
|
2264
|
+
self.flow_settings.name = new_name
|
|
2265
|
+
|
|
1841
2266
|
def save_flow(self, flow_path: str):
|
|
1842
2267
|
"""Saves the current state of the flow graph to a file.
|
|
1843
2268
|
|
|
2269
|
+
Supports multiple formats based on file extension:
|
|
2270
|
+
- .yaml / .yml: New YAML format
|
|
2271
|
+
- .json: JSON format
|
|
2272
|
+
|
|
1844
2273
|
Args:
|
|
1845
2274
|
flow_path: The path where the flow file will be saved.
|
|
1846
2275
|
"""
|
|
1847
2276
|
logger.info("Saving flow to %s", flow_path)
|
|
1848
|
-
|
|
2277
|
+
path = Path(flow_path)
|
|
2278
|
+
os.makedirs(path.parent, exist_ok=True)
|
|
2279
|
+
suffix = path.suffix.lower()
|
|
2280
|
+
new_flow_name = path.name.replace(suffix, "")
|
|
2281
|
+
self._handle_flow_renaming(new_flow_name, path)
|
|
2282
|
+
self.flow_settings.modified_on = datetime.datetime.now().timestamp()
|
|
1849
2283
|
try:
|
|
1850
|
-
|
|
1851
|
-
|
|
2284
|
+
if suffix == ".flowfile":
|
|
2285
|
+
raise DeprecationWarning(
|
|
2286
|
+
"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
|
|
2287
|
+
"Or stay on v0.4.1 if you still need .flowfile support.\n\n"
|
|
2288
|
+
)
|
|
2289
|
+
elif suffix in (".yaml", ".yml"):
|
|
2290
|
+
flowfile_data = self.get_flowfile_data()
|
|
2291
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2292
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
2293
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
2294
|
+
elif suffix == ".json":
|
|
2295
|
+
flowfile_data = self.get_flowfile_data()
|
|
2296
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2297
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
2298
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
2299
|
+
|
|
2300
|
+
else:
|
|
2301
|
+
flowfile_data = self.get_flowfile_data()
|
|
2302
|
+
logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
|
|
2303
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2304
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
2305
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
2306
|
+
|
|
1852
2307
|
except Exception as e:
|
|
1853
2308
|
logger.error(f"Error saving flow: {e}")
|
|
2309
|
+
raise
|
|
1854
2310
|
|
|
1855
2311
|
self.flow_settings.path = flow_path
|
|
1856
2312
|
|
|
@@ -1863,11 +2319,7 @@ class FlowGraph:
|
|
|
1863
2319
|
Returns:
|
|
1864
2320
|
A dictionary representing the graph in Drawflow format.
|
|
1865
2321
|
"""
|
|
1866
|
-
result = {
|
|
1867
|
-
'Home': {
|
|
1868
|
-
"data": {}
|
|
1869
|
-
}
|
|
1870
|
-
}
|
|
2322
|
+
result = {"Home": {"data": {}}}
|
|
1871
2323
|
flow_info: schemas.FlowInformation = self.get_node_storage()
|
|
1872
2324
|
|
|
1873
2325
|
for node_id, node_info in flow_info.data.items():
|
|
@@ -1886,7 +2338,7 @@ class FlowGraph:
|
|
|
1886
2338
|
"inputs": {},
|
|
1887
2339
|
"outputs": {},
|
|
1888
2340
|
"pos_x": pos_x,
|
|
1889
|
-
"pos_y": pos_y
|
|
2341
|
+
"pos_y": pos_y,
|
|
1890
2342
|
}
|
|
1891
2343
|
except Exception as e:
|
|
1892
2344
|
logger.error(e)
|
|
@@ -1900,24 +2352,27 @@ class FlowGraph:
|
|
|
1900
2352
|
leading_to_node = self.get_node(output_node_id)
|
|
1901
2353
|
input_types = leading_to_node.get_input_type(node_info.id)
|
|
1902
2354
|
for input_type in input_types:
|
|
1903
|
-
if input_type ==
|
|
1904
|
-
input_frontend_id =
|
|
1905
|
-
elif input_type ==
|
|
1906
|
-
input_frontend_id =
|
|
1907
|
-
elif input_type ==
|
|
1908
|
-
input_frontend_id =
|
|
2355
|
+
if input_type == "main":
|
|
2356
|
+
input_frontend_id = "input_1"
|
|
2357
|
+
elif input_type == "right":
|
|
2358
|
+
input_frontend_id = "input_2"
|
|
2359
|
+
elif input_type == "left":
|
|
2360
|
+
input_frontend_id = "input_3"
|
|
1909
2361
|
else:
|
|
1910
|
-
input_frontend_id =
|
|
2362
|
+
input_frontend_id = "input_1"
|
|
1911
2363
|
connection = {"node": str(output_node_id), "input": input_frontend_id}
|
|
1912
2364
|
connections.append(connection)
|
|
1913
2365
|
|
|
1914
|
-
result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
|
|
1915
|
-
"connections": connections}
|
|
2366
|
+
result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {"connections": connections}
|
|
1916
2367
|
else:
|
|
1917
2368
|
result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
|
|
1918
2369
|
|
|
1919
2370
|
# Add input to the node based on `depending_on_id` in your backend data
|
|
1920
|
-
if
|
|
2371
|
+
if (
|
|
2372
|
+
node_info.left_input_id is not None
|
|
2373
|
+
or node_info.right_input_id is not None
|
|
2374
|
+
or node_info.input_ids is not None
|
|
2375
|
+
):
|
|
1921
2376
|
main_inputs = node_info.main_input_ids
|
|
1922
2377
|
result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
|
|
1923
2378
|
"connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
|
|
@@ -1938,8 +2393,8 @@ class FlowGraph:
|
|
|
1938
2393
|
Returns:
|
|
1939
2394
|
A VueFlowInput object.
|
|
1940
2395
|
"""
|
|
1941
|
-
edges:
|
|
1942
|
-
nodes:
|
|
2396
|
+
edges: list[schemas.NodeEdge] = []
|
|
2397
|
+
nodes: list[schemas.NodeInput] = []
|
|
1943
2398
|
for node in self.nodes:
|
|
1944
2399
|
nodes.append(node.get_node_input())
|
|
1945
2400
|
edges.extend(node.get_edge_input())
|
|
@@ -1951,7 +2406,9 @@ class FlowGraph:
|
|
|
1951
2406
|
for node in self.nodes:
|
|
1952
2407
|
node.reset(True)
|
|
1953
2408
|
|
|
1954
|
-
def copy_node(
|
|
2409
|
+
def copy_node(
|
|
2410
|
+
self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str
|
|
2411
|
+
) -> None:
|
|
1955
2412
|
"""Creates a copy of an existing node.
|
|
1956
2413
|
|
|
1957
2414
|
Args:
|
|
@@ -1964,9 +2421,7 @@ class FlowGraph:
|
|
|
1964
2421
|
if isinstance(existing_setting_input, input_schema.NodePromise):
|
|
1965
2422
|
return
|
|
1966
2423
|
|
|
1967
|
-
combined_settings = combine_existing_settings_and_new_settings(
|
|
1968
|
-
existing_setting_input, new_node_settings
|
|
1969
|
-
)
|
|
2424
|
+
combined_settings = combine_existing_settings_and_new_settings(existing_setting_input, new_node_settings)
|
|
1970
2425
|
getattr(self, f"add_{node_type}")(combined_settings)
|
|
1971
2426
|
|
|
1972
2427
|
def generate_code(self):
|
|
@@ -1974,6 +2429,7 @@ class FlowGraph:
|
|
|
1974
2429
|
This method exports the flow graph to a Polars-compatible format.
|
|
1975
2430
|
"""
|
|
1976
2431
|
from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
|
|
2432
|
+
|
|
1977
2433
|
print(export_flow_to_polars(self))
|
|
1978
2434
|
|
|
1979
2435
|
|
|
@@ -1992,13 +2448,7 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
|
|
|
1992
2448
|
copied_setting_input = deepcopy(setting_input)
|
|
1993
2449
|
|
|
1994
2450
|
# Update only attributes that exist on new_settings
|
|
1995
|
-
fields_to_update = (
|
|
1996
|
-
"node_id",
|
|
1997
|
-
"pos_x",
|
|
1998
|
-
"pos_y",
|
|
1999
|
-
"description",
|
|
2000
|
-
"flow_id"
|
|
2001
|
-
)
|
|
2451
|
+
fields_to_update = ("node_id", "pos_x", "pos_y", "description", "flow_id")
|
|
2002
2452
|
|
|
2003
2453
|
for field in fields_to_update:
|
|
2004
2454
|
if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
|
|
@@ -2014,12 +2464,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
|
|
|
2014
2464
|
flow: The FlowGraph instance to modify.
|
|
2015
2465
|
node_connection: An object defining the source and target of the connection.
|
|
2016
2466
|
"""
|
|
2017
|
-
logger.info(
|
|
2467
|
+
logger.info("adding a connection")
|
|
2018
2468
|
from_node = flow.get_node(node_connection.output_connection.node_id)
|
|
2019
2469
|
to_node = flow.get_node(node_connection.input_connection.node_id)
|
|
2020
|
-
logger.info(f
|
|
2470
|
+
logger.info(f"from_node={from_node}, to_node={to_node}")
|
|
2021
2471
|
if not (from_node and to_node):
|
|
2022
|
-
raise HTTPException(404,
|
|
2472
|
+
raise HTTPException(404, "Not not available")
|
|
2023
2473
|
else:
|
|
2024
2474
|
to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
|
|
2025
2475
|
|