Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,80 +1,103 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import yaml
|
|
5
2
|
import json
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from functools import partial
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from time import time
|
|
10
|
+
from typing import Any, Literal, Union
|
|
11
|
+
from uuid import uuid1
|
|
9
12
|
|
|
10
13
|
import fastexcel
|
|
14
|
+
import polars as pl
|
|
15
|
+
import yaml
|
|
11
16
|
from fastapi.exceptions import HTTPException
|
|
12
|
-
from time import time
|
|
13
|
-
from functools import partial
|
|
14
|
-
from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
|
|
15
|
-
from uuid import uuid1
|
|
16
|
-
from copy import deepcopy
|
|
17
17
|
from pyarrow.parquet import ParquetFile
|
|
18
|
+
|
|
18
19
|
from flowfile_core.configs import logger
|
|
19
20
|
from flowfile_core.configs.flow_logger import FlowLogger
|
|
20
|
-
from flowfile_core.
|
|
21
|
-
from flowfile_core.flowfile.
|
|
22
|
-
|
|
21
|
+
from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
|
|
22
|
+
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
23
|
+
from flowfile_core.flowfile.database_connection_manager.db_connections import (
|
|
24
|
+
get_local_cloud_connection,
|
|
25
|
+
get_local_database_connection,
|
|
26
|
+
)
|
|
23
27
|
from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
|
|
24
|
-
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
25
|
-
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
26
28
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
|
|
27
|
-
from flowfile_core.flowfile.flow_data_engine.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
|
|
30
|
+
from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
|
|
31
|
+
from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (
|
|
32
|
+
get_calamine_xlsx_data_types,
|
|
33
|
+
get_open_xlsx_datatypes,
|
|
34
|
+
)
|
|
35
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
|
|
36
|
+
ExternalCloudWriter,
|
|
37
|
+
ExternalDatabaseFetcher,
|
|
38
|
+
ExternalDatabaseWriter,
|
|
39
|
+
ExternalDfFetcher,
|
|
40
|
+
)
|
|
41
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
42
|
+
from flowfile_core.flowfile.graph_tree.graph_tree import (
|
|
43
|
+
add_un_drawn_nodes,
|
|
44
|
+
build_flow_paths,
|
|
45
|
+
build_node_info,
|
|
46
|
+
calculate_depth,
|
|
47
|
+
define_node_connections,
|
|
48
|
+
draw_merged_paths,
|
|
49
|
+
draw_standalone_paths,
|
|
50
|
+
group_nodes_by_depth,
|
|
51
|
+
)
|
|
52
|
+
from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
|
|
53
|
+
from flowfile_core.flowfile.schema_callbacks import calculate_fuzzy_match_schema, pre_calculate_pivot_schema
|
|
31
54
|
from flowfile_core.flowfile.sources import external_sources
|
|
55
|
+
from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
|
|
56
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source import models as sql_models
|
|
57
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils
|
|
58
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import BaseSqlSource, SqlSource
|
|
59
|
+
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
60
|
+
from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
|
|
61
|
+
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
32
62
|
from flowfile_core.schemas import input_schema, schemas, transform_schema
|
|
63
|
+
from flowfile_core.schemas.cloud_storage_schemas import (
|
|
64
|
+
AuthMethod,
|
|
65
|
+
CloudStorageReadSettingsInternal,
|
|
66
|
+
CloudStorageWriteSettingsInternal,
|
|
67
|
+
FullCloudStorageConnection,
|
|
68
|
+
get_cloud_storage_write_settings_worker_interface,
|
|
69
|
+
)
|
|
33
70
|
from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
|
|
34
|
-
from flowfile_core.schemas.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
get_cloud_storage_write_settings_worker_interface, AuthMethod)
|
|
38
|
-
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
39
|
-
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
40
|
-
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
41
|
-
from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
|
|
42
|
-
from flowfile_core.flowfile.graph_tree.graph_tree import (add_un_drawn_nodes, build_flow_paths,
|
|
43
|
-
build_node_info, calculate_depth,
|
|
44
|
-
define_node_connections, draw_merged_paths,
|
|
45
|
-
draw_standalone_paths, group_nodes_by_depth)
|
|
46
|
-
from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
|
|
47
|
-
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalDatabaseFetcher,
|
|
48
|
-
ExternalDatabaseWriter,
|
|
49
|
-
ExternalDfFetcher,
|
|
50
|
-
ExternalCloudWriter)
|
|
51
|
-
from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
|
|
52
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
|
|
53
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
|
|
54
|
-
from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
|
|
55
|
-
get_local_cloud_connection)
|
|
56
|
-
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
57
|
-
from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
|
|
58
|
-
from importlib.metadata import version, PackageNotFoundError
|
|
71
|
+
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
72
|
+
from flowfile_core.secret_manager.secret_manager import decrypt_secret, get_encrypted_secret
|
|
73
|
+
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
59
74
|
|
|
60
75
|
try:
|
|
61
76
|
__version__ = version("Flowfile")
|
|
62
77
|
except PackageNotFoundError:
|
|
63
|
-
__version__ = "0.
|
|
78
|
+
__version__ = "0.5.0"
|
|
64
79
|
|
|
65
80
|
|
|
66
81
|
def represent_list_json(dumper, data):
|
|
67
82
|
"""Use inline style for short simple lists, block style for complex ones."""
|
|
68
83
|
if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
|
|
69
|
-
return dumper.represent_sequence(
|
|
70
|
-
return dumper.represent_sequence(
|
|
84
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
|
|
85
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
|
|
71
86
|
|
|
72
87
|
|
|
73
88
|
yaml.add_representer(list, represent_list_json)
|
|
74
89
|
|
|
75
90
|
|
|
76
|
-
def get_xlsx_schema(
|
|
77
|
-
|
|
91
|
+
def get_xlsx_schema(
|
|
92
|
+
engine: str,
|
|
93
|
+
file_path: str,
|
|
94
|
+
sheet_name: str,
|
|
95
|
+
start_row: int,
|
|
96
|
+
start_column: int,
|
|
97
|
+
end_row: int,
|
|
98
|
+
end_column: int,
|
|
99
|
+
has_headers: bool,
|
|
100
|
+
):
|
|
78
101
|
"""Calculates the schema of an XLSX file by reading a sample of rows.
|
|
79
102
|
|
|
80
103
|
Args:
|
|
@@ -91,27 +114,29 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
|
|
|
91
114
|
A list of FlowfileColumn objects representing the schema.
|
|
92
115
|
"""
|
|
93
116
|
try:
|
|
94
|
-
logger.info(
|
|
95
|
-
if engine ==
|
|
117
|
+
logger.info("Starting to calculate the schema")
|
|
118
|
+
if engine == "openpyxl":
|
|
96
119
|
max_col = end_column if end_column > 0 else None
|
|
97
|
-
return get_open_xlsx_datatypes(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
120
|
+
return get_open_xlsx_datatypes(
|
|
121
|
+
file_path=file_path,
|
|
122
|
+
sheet_name=sheet_name,
|
|
123
|
+
min_row=start_row + 1,
|
|
124
|
+
min_col=start_column + 1,
|
|
125
|
+
max_row=100,
|
|
126
|
+
max_col=max_col,
|
|
127
|
+
has_headers=has_headers,
|
|
128
|
+
)
|
|
129
|
+
elif engine == "calamine":
|
|
130
|
+
return get_calamine_xlsx_data_types(
|
|
131
|
+
file_path=file_path, sheet_name=sheet_name, start_row=start_row, end_row=end_row
|
|
132
|
+
)
|
|
133
|
+
logger.info("done calculating the schema")
|
|
109
134
|
except Exception as e:
|
|
110
135
|
logger.error(e)
|
|
111
136
|
return []
|
|
112
137
|
|
|
113
138
|
|
|
114
|
-
def skip_node_message(flow_logger: FlowLogger, nodes:
|
|
139
|
+
def skip_node_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
|
|
115
140
|
"""Logs a warning message listing all nodes that will be skipped during execution.
|
|
116
141
|
|
|
117
142
|
Args:
|
|
@@ -120,10 +145,10 @@ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
|
|
|
120
145
|
"""
|
|
121
146
|
if len(nodes) > 0:
|
|
122
147
|
msg = "\n".join(str(node) for node in nodes)
|
|
123
|
-
flow_logger.warning(f
|
|
148
|
+
flow_logger.warning(f"skipping nodes:\n{msg}")
|
|
124
149
|
|
|
125
150
|
|
|
126
|
-
def execution_order_message(flow_logger: FlowLogger, nodes:
|
|
151
|
+
def execution_order_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
|
|
127
152
|
"""Logs an informational message showing the determined execution order of nodes.
|
|
128
153
|
|
|
129
154
|
Args:
|
|
@@ -131,11 +156,19 @@ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> N
|
|
|
131
156
|
nodes: A list of FlowNode objects in the order they will be executed.
|
|
132
157
|
"""
|
|
133
158
|
msg = "\n".join(str(node) for node in nodes)
|
|
134
|
-
flow_logger.info(f
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def get_xlsx_schema_callback(
|
|
138
|
-
|
|
159
|
+
flow_logger.info(f"execution order:\n{msg}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def get_xlsx_schema_callback(
|
|
163
|
+
engine: str,
|
|
164
|
+
file_path: str,
|
|
165
|
+
sheet_name: str,
|
|
166
|
+
start_row: int,
|
|
167
|
+
start_column: int,
|
|
168
|
+
end_row: int,
|
|
169
|
+
end_column: int,
|
|
170
|
+
has_headers: bool,
|
|
171
|
+
):
|
|
139
172
|
"""Creates a partially applied function for lazy calculation of an XLSX schema.
|
|
140
173
|
|
|
141
174
|
Args:
|
|
@@ -151,12 +184,22 @@ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start
|
|
|
151
184
|
Returns:
|
|
152
185
|
A callable function that, when called, will execute `get_xlsx_schema`.
|
|
153
186
|
"""
|
|
154
|
-
return partial(
|
|
155
|
-
|
|
187
|
+
return partial(
|
|
188
|
+
get_xlsx_schema,
|
|
189
|
+
engine=engine,
|
|
190
|
+
file_path=file_path,
|
|
191
|
+
sheet_name=sheet_name,
|
|
192
|
+
start_row=start_row,
|
|
193
|
+
start_column=start_column,
|
|
194
|
+
end_row=end_row,
|
|
195
|
+
end_column=end_column,
|
|
196
|
+
has_headers=has_headers,
|
|
197
|
+
)
|
|
156
198
|
|
|
157
199
|
|
|
158
|
-
def get_cloud_connection_settings(
|
|
159
|
-
|
|
200
|
+
def get_cloud_connection_settings(
|
|
201
|
+
connection_name: str, user_id: int, auth_mode: AuthMethod
|
|
202
|
+
) -> FullCloudStorageConnection:
|
|
160
203
|
"""Retrieves cloud storage connection settings, falling back to environment variables if needed.
|
|
161
204
|
|
|
162
205
|
Args:
|
|
@@ -186,32 +229,44 @@ class FlowGraph:
|
|
|
186
229
|
|
|
187
230
|
It manages nodes, connections, and the execution of the entire flow.
|
|
188
231
|
"""
|
|
232
|
+
|
|
189
233
|
uuid: str
|
|
190
|
-
depends_on:
|
|
234
|
+
depends_on: dict[
|
|
235
|
+
int,
|
|
236
|
+
Union[
|
|
237
|
+
ParquetFile,
|
|
238
|
+
FlowDataEngine,
|
|
239
|
+
"FlowGraph",
|
|
240
|
+
pl.DataFrame,
|
|
241
|
+
],
|
|
242
|
+
]
|
|
191
243
|
_flow_id: int
|
|
192
244
|
_input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
|
|
193
|
-
_input_cols:
|
|
194
|
-
_output_cols:
|
|
195
|
-
_node_db:
|
|
196
|
-
_node_ids:
|
|
197
|
-
_results:
|
|
245
|
+
_input_cols: list[str]
|
|
246
|
+
_output_cols: list[str]
|
|
247
|
+
_node_db: dict[str | int, FlowNode]
|
|
248
|
+
_node_ids: list[str | int]
|
|
249
|
+
_results: FlowDataEngine | None = None
|
|
198
250
|
cache_results: bool = False
|
|
199
|
-
schema:
|
|
251
|
+
schema: list[FlowfileColumn] | None = None
|
|
200
252
|
has_over_row_function: bool = False
|
|
201
|
-
_flow_starts:
|
|
202
|
-
latest_run_info:
|
|
253
|
+
_flow_starts: list[int | str] = None
|
|
254
|
+
latest_run_info: RunInformation | None = None
|
|
203
255
|
start_datetime: datetime = None
|
|
204
256
|
end_datetime: datetime = None
|
|
205
257
|
_flow_settings: schemas.FlowSettings = None
|
|
206
258
|
flow_logger: FlowLogger
|
|
207
259
|
|
|
208
|
-
def __init__(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
260
|
+
def __init__(
|
|
261
|
+
self,
|
|
262
|
+
flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
|
|
263
|
+
name: str = None,
|
|
264
|
+
input_cols: list[str] = None,
|
|
265
|
+
output_cols: list[str] = None,
|
|
266
|
+
path_ref: str = None,
|
|
267
|
+
input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
|
|
268
|
+
cache_results: bool = False,
|
|
269
|
+
):
|
|
215
270
|
"""Initializes a new FlowGraph instance.
|
|
216
271
|
|
|
217
272
|
Args:
|
|
@@ -233,7 +288,7 @@ class FlowGraph:
|
|
|
233
288
|
self.latest_run_info = None
|
|
234
289
|
self._flow_id = flow_settings.flow_id
|
|
235
290
|
self.flow_logger = FlowLogger(flow_settings.flow_id)
|
|
236
|
-
self._flow_starts:
|
|
291
|
+
self._flow_starts: list[FlowNode] = []
|
|
237
292
|
self._results = None
|
|
238
293
|
self.schema = None
|
|
239
294
|
self.has_over_row_function = False
|
|
@@ -255,13 +310,21 @@ class FlowGraph:
|
|
|
255
310
|
|
|
256
311
|
@flow_settings.setter
|
|
257
312
|
def flow_settings(self, flow_settings: schemas.FlowSettings):
|
|
258
|
-
if (
|
|
259
|
-
|
|
260
|
-
(self._flow_settings.execution_mode != flow_settings.execution_mode)
|
|
313
|
+
if (self._flow_settings.execution_location != flow_settings.execution_location) or (
|
|
314
|
+
self._flow_settings.execution_mode != flow_settings.execution_mode
|
|
261
315
|
):
|
|
262
316
|
self.reset()
|
|
263
317
|
self._flow_settings = flow_settings
|
|
264
318
|
|
|
319
|
+
def add_node_to_starting_list(self, node: FlowNode) -> None:
|
|
320
|
+
"""Adds a node to the list of starting nodes for the flow if not already present.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
node: The FlowNode to add as a starting node.
|
|
324
|
+
"""
|
|
325
|
+
if node.node_id not in {self_node.node_id for self_node in self._flow_starts}:
|
|
326
|
+
self._flow_starts.append(node)
|
|
327
|
+
|
|
265
328
|
def add_node_promise(self, node_promise: input_schema.NodePromise):
|
|
266
329
|
"""Adds a placeholder node to the graph that is not yet fully configured.
|
|
267
330
|
|
|
@@ -270,13 +333,31 @@ class FlowGraph:
|
|
|
270
333
|
Args:
|
|
271
334
|
node_promise: A promise object containing basic node information.
|
|
272
335
|
"""
|
|
336
|
+
|
|
273
337
|
def placeholder(n: FlowNode = None):
|
|
274
338
|
if n is None:
|
|
275
339
|
return FlowDataEngine()
|
|
276
340
|
return n
|
|
277
341
|
|
|
278
|
-
self.add_node_step(
|
|
279
|
-
|
|
342
|
+
self.add_node_step(
|
|
343
|
+
node_id=node_promise.node_id,
|
|
344
|
+
node_type=node_promise.node_type,
|
|
345
|
+
function=placeholder,
|
|
346
|
+
setting_input=node_promise,
|
|
347
|
+
)
|
|
348
|
+
if node_promise.is_user_defined:
|
|
349
|
+
node_needs_settings: bool
|
|
350
|
+
custom_node = CUSTOM_NODE_STORE.get(node_promise.node_type)
|
|
351
|
+
if custom_node is None:
|
|
352
|
+
raise Exception(f"Custom node type '{node_promise.node_type}' not found in registry.")
|
|
353
|
+
settings_schema = custom_node.model_fields["settings_schema"].default
|
|
354
|
+
node_needs_settings = settings_schema is not None and not settings_schema.is_empty()
|
|
355
|
+
if not node_needs_settings:
|
|
356
|
+
user_defined_node_settings = input_schema.UserDefinedNode(settings={}, **node_promise.model_dump())
|
|
357
|
+
initialized_model = custom_node()
|
|
358
|
+
self.add_user_defined_node(
|
|
359
|
+
custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings
|
|
360
|
+
)
|
|
280
361
|
|
|
281
362
|
def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
|
|
282
363
|
"""Calculates and applies a layered layout to all nodes in the graph.
|
|
@@ -304,20 +385,24 @@ class FlowGraph:
|
|
|
304
385
|
updated_count = 0
|
|
305
386
|
for node_id, (pos_x, pos_y) in new_positions.items():
|
|
306
387
|
node = self.get_node(node_id)
|
|
307
|
-
if node and hasattr(node,
|
|
388
|
+
if node and hasattr(node, "setting_input"):
|
|
308
389
|
setting = node.setting_input
|
|
309
|
-
if hasattr(setting,
|
|
390
|
+
if hasattr(setting, "pos_x") and hasattr(setting, "pos_y"):
|
|
310
391
|
setting.pos_x = pos_x
|
|
311
392
|
setting.pos_y = pos_y
|
|
312
393
|
updated_count += 1
|
|
313
394
|
else:
|
|
314
|
-
self.flow_logger.warning(
|
|
395
|
+
self.flow_logger.warning(
|
|
396
|
+
f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes."
|
|
397
|
+
)
|
|
315
398
|
elif node:
|
|
316
399
|
self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
|
|
317
400
|
# else: Node not found, already warned by calculate_layered_layout
|
|
318
401
|
|
|
319
402
|
end_time = time()
|
|
320
|
-
self.flow_logger.info(
|
|
403
|
+
self.flow_logger.info(
|
|
404
|
+
f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds."
|
|
405
|
+
)
|
|
321
406
|
|
|
322
407
|
except Exception as e:
|
|
323
408
|
self.flow_logger.error(f"Error applying layout: {e}")
|
|
@@ -337,13 +422,13 @@ class FlowGraph:
|
|
|
337
422
|
"""
|
|
338
423
|
self._flow_id = new_id
|
|
339
424
|
for node in self.nodes:
|
|
340
|
-
if hasattr(node.setting_input,
|
|
425
|
+
if hasattr(node.setting_input, "flow_id"):
|
|
341
426
|
node.setting_input.flow_id = new_id
|
|
342
427
|
self.flow_settings.flow_id = new_id
|
|
343
428
|
|
|
344
429
|
def __repr__(self):
|
|
345
430
|
"""Provides the official string representation of the FlowGraph instance."""
|
|
346
|
-
settings_str = " -" +
|
|
431
|
+
settings_str = " -" + "\n -".join(f"{k}: {v}" for k, v in self.flow_settings)
|
|
347
432
|
return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
|
|
348
433
|
|
|
349
434
|
def print_tree(self):
|
|
@@ -361,7 +446,7 @@ class FlowGraph:
|
|
|
361
446
|
|
|
362
447
|
# Group nodes by depth
|
|
363
448
|
depth_groups, max_depth = group_nodes_by_depth(node_info)
|
|
364
|
-
|
|
449
|
+
|
|
365
450
|
# Sort nodes within each depth group
|
|
366
451
|
for depth in depth_groups:
|
|
367
452
|
depth_groups[depth].sort()
|
|
@@ -371,7 +456,7 @@ class FlowGraph:
|
|
|
371
456
|
|
|
372
457
|
# Track which nodes connect to what
|
|
373
458
|
merge_points = define_node_connections(node_info)
|
|
374
|
-
|
|
459
|
+
|
|
375
460
|
# Build the flow paths
|
|
376
461
|
|
|
377
462
|
# Find the maximum label length for each depth level
|
|
@@ -380,15 +465,15 @@ class FlowGraph:
|
|
|
380
465
|
if depth in depth_groups:
|
|
381
466
|
max_len = max(len(node_info[nid].label) for nid in depth_groups[depth])
|
|
382
467
|
max_label_length[depth] = max_len
|
|
383
|
-
|
|
468
|
+
|
|
384
469
|
# Draw the paths
|
|
385
470
|
drawn_nodes = set()
|
|
386
471
|
merge_drawn = set()
|
|
387
|
-
|
|
472
|
+
|
|
388
473
|
# Group paths by their merge points
|
|
389
474
|
paths_by_merge = {}
|
|
390
475
|
standalone_paths = []
|
|
391
|
-
|
|
476
|
+
|
|
392
477
|
# Build flow paths
|
|
393
478
|
paths = build_flow_paths(node_info, self._flow_starts, merge_points)
|
|
394
479
|
|
|
@@ -410,22 +495,22 @@ class FlowGraph:
|
|
|
410
495
|
|
|
411
496
|
# Add undrawn nodes
|
|
412
497
|
add_un_drawn_nodes(drawn_nodes, node_info, lines)
|
|
413
|
-
|
|
498
|
+
|
|
414
499
|
try:
|
|
415
500
|
skip_nodes, ordered_nodes = compute_execution_plan(
|
|
416
|
-
nodes=self.nodes,
|
|
417
|
-
|
|
501
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
502
|
+
)
|
|
418
503
|
if ordered_nodes:
|
|
419
504
|
for i, node in enumerate(ordered_nodes, 1):
|
|
420
505
|
lines.append(f" {i:3d}. {node_info[node.node_id].label}")
|
|
421
506
|
except Exception as e:
|
|
422
507
|
lines.append(f" Could not determine execution order: {e}")
|
|
423
|
-
|
|
508
|
+
|
|
424
509
|
# Print everything
|
|
425
510
|
output = "\n".join(lines)
|
|
426
|
-
|
|
511
|
+
|
|
427
512
|
print(output)
|
|
428
|
-
|
|
513
|
+
|
|
429
514
|
def get_nodes_overview(self):
|
|
430
515
|
"""Gets a list of dictionary representations for all nodes in the graph."""
|
|
431
516
|
output = []
|
|
@@ -433,7 +518,7 @@ class FlowGraph:
|
|
|
433
518
|
output.append(v.get_repr())
|
|
434
519
|
return output
|
|
435
520
|
|
|
436
|
-
def remove_from_output_cols(self, columns:
|
|
521
|
+
def remove_from_output_cols(self, columns: list[str]):
|
|
437
522
|
"""Removes specified columns from the list of expected output columns.
|
|
438
523
|
|
|
439
524
|
Args:
|
|
@@ -442,7 +527,7 @@ class FlowGraph:
|
|
|
442
527
|
cols = set(columns)
|
|
443
528
|
self._output_cols = [c for c in self._output_cols if c not in cols]
|
|
444
529
|
|
|
445
|
-
def get_node(self, node_id:
|
|
530
|
+
def get_node(self, node_id: int | str = None) -> FlowNode | None:
|
|
446
531
|
"""Retrieves a node from the graph by its ID.
|
|
447
532
|
|
|
448
533
|
Args:
|
|
@@ -456,24 +541,43 @@ class FlowGraph:
|
|
|
456
541
|
node = self._node_db.get(node_id)
|
|
457
542
|
if node is not None:
|
|
458
543
|
return node
|
|
459
|
-
|
|
460
|
-
def add_user_defined_node(
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
544
|
+
|
|
545
|
+
def add_user_defined_node(
|
|
546
|
+
self, *, custom_node: CustomNodeBase, user_defined_node_settings: input_schema.UserDefinedNode
|
|
547
|
+
):
|
|
548
|
+
"""Adds a user-defined custom node to the graph.
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
custom_node: The custom node instance to add.
|
|
552
|
+
user_defined_node_settings: The settings for the user-defined node.
|
|
553
|
+
"""
|
|
554
|
+
|
|
555
|
+
def _func(*flow_data_engine: FlowDataEngine) -> FlowDataEngine | None:
|
|
556
|
+
user_id = user_defined_node_settings.user_id
|
|
557
|
+
if user_id is not None:
|
|
558
|
+
custom_node.set_execution_context(user_id)
|
|
559
|
+
if custom_node.settings_schema:
|
|
560
|
+
custom_node.settings_schema.set_secret_context(user_id, custom_node.accessed_secrets)
|
|
561
|
+
|
|
562
|
+
output = custom_node.process(*(fde.data_frame for fde in flow_data_engine))
|
|
563
|
+
|
|
564
|
+
accessed_secrets = custom_node.get_accessed_secrets()
|
|
565
|
+
if accessed_secrets:
|
|
566
|
+
logger.info(f"Node '{user_defined_node_settings.node_id}' accessed secrets: {accessed_secrets}")
|
|
567
|
+
if isinstance(output, (pl.LazyFrame, pl.DataFrame)):
|
|
468
568
|
return FlowDataEngine(output)
|
|
469
569
|
return None
|
|
470
|
-
|
|
471
|
-
self.add_node_step(
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
570
|
+
|
|
571
|
+
self.add_node_step(
|
|
572
|
+
node_id=user_defined_node_settings.node_id,
|
|
573
|
+
function=_func,
|
|
574
|
+
setting_input=user_defined_node_settings,
|
|
575
|
+
input_node_ids=user_defined_node_settings.depending_on_ids,
|
|
576
|
+
node_type=custom_node.item,
|
|
577
|
+
)
|
|
578
|
+
if custom_node.number_of_inputs == 0:
|
|
579
|
+
node = self.get_node(user_defined_node_settings.node_id)
|
|
580
|
+
self.add_node_to_starting_list(node)
|
|
477
581
|
|
|
478
582
|
def add_pivot(self, pivot_settings: input_schema.NodePivot):
|
|
479
583
|
"""Adds a pivot node to the graph.
|
|
@@ -485,11 +589,13 @@ class FlowGraph:
|
|
|
485
589
|
def _func(fl: FlowDataEngine):
|
|
486
590
|
return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
|
|
487
591
|
|
|
488
|
-
self.add_node_step(
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
592
|
+
self.add_node_step(
|
|
593
|
+
node_id=pivot_settings.node_id,
|
|
594
|
+
function=_func,
|
|
595
|
+
node_type="pivot",
|
|
596
|
+
setting_input=pivot_settings,
|
|
597
|
+
input_node_ids=[pivot_settings.depending_on_id],
|
|
598
|
+
)
|
|
493
599
|
|
|
494
600
|
node = self.get_node(pivot_settings.node_id)
|
|
495
601
|
|
|
@@ -498,6 +604,7 @@ class FlowGraph:
|
|
|
498
604
|
input_data.lazy = True # ensure the dataset is lazy
|
|
499
605
|
input_lf = input_data.data_frame # get the lazy frame
|
|
500
606
|
return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
|
|
607
|
+
|
|
501
608
|
node.schema_callback = schema_callback
|
|
502
609
|
|
|
503
610
|
def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
|
|
@@ -510,11 +617,13 @@ class FlowGraph:
|
|
|
510
617
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
511
618
|
return fl.unpivot(unpivot_settings.unpivot_input)
|
|
512
619
|
|
|
513
|
-
self.add_node_step(
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
620
|
+
self.add_node_step(
|
|
621
|
+
node_id=unpivot_settings.node_id,
|
|
622
|
+
function=_func,
|
|
623
|
+
node_type="unpivot",
|
|
624
|
+
setting_input=unpivot_settings,
|
|
625
|
+
input_node_ids=[unpivot_settings.depending_on_id],
|
|
626
|
+
)
|
|
518
627
|
|
|
519
628
|
def add_union(self, union_settings: input_schema.NodeUnion):
|
|
520
629
|
"""Adds a union node to combine multiple data streams.
|
|
@@ -524,14 +633,16 @@ class FlowGraph:
|
|
|
524
633
|
"""
|
|
525
634
|
|
|
526
635
|
def _func(*flowfile_tables: FlowDataEngine):
|
|
527
|
-
dfs:
|
|
528
|
-
return FlowDataEngine(pl.concat(dfs, how=
|
|
636
|
+
dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
|
|
637
|
+
return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
|
|
529
638
|
|
|
530
|
-
self.add_node_step(
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
639
|
+
self.add_node_step(
|
|
640
|
+
node_id=union_settings.node_id,
|
|
641
|
+
function=_func,
|
|
642
|
+
node_type="union",
|
|
643
|
+
setting_input=union_settings,
|
|
644
|
+
input_node_ids=union_settings.depending_on_ids,
|
|
645
|
+
)
|
|
535
646
|
|
|
536
647
|
def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
|
|
537
648
|
"""Adds a data exploration/analysis node based on a node promise.
|
|
@@ -559,13 +670,14 @@ class FlowGraph:
|
|
|
559
670
|
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
560
671
|
external_sampler = ExternalDfFetcher(
|
|
561
672
|
lf=flowfile_table.data_frame,
|
|
562
|
-
file_ref="__gf_walker"+node.hash,
|
|
673
|
+
file_ref="__gf_walker" + node.hash,
|
|
563
674
|
wait_on_completion=True,
|
|
564
675
|
node_id=node.node_id,
|
|
565
676
|
flow_id=self.flow_id,
|
|
566
677
|
)
|
|
567
|
-
node.results.analysis_data_generator = get_read_top_n(
|
|
568
|
-
|
|
678
|
+
node.results.analysis_data_generator = get_read_top_n(
|
|
679
|
+
external_sampler.status.file_ref, n=min(sample_size, number_of_records)
|
|
680
|
+
)
|
|
569
681
|
return flowfile_table
|
|
570
682
|
|
|
571
683
|
def schema_callback():
|
|
@@ -574,11 +686,15 @@ class FlowGraph:
|
|
|
574
686
|
input_node = node.all_inputs[0]
|
|
575
687
|
return input_node.schema
|
|
576
688
|
else:
|
|
577
|
-
return [FlowfileColumn.from_input(
|
|
689
|
+
return [FlowfileColumn.from_input("col_1", "na")]
|
|
578
690
|
|
|
579
|
-
self.add_node_step(
|
|
580
|
-
|
|
581
|
-
|
|
691
|
+
self.add_node_step(
|
|
692
|
+
node_id=node_analysis.node_id,
|
|
693
|
+
node_type="explore_data",
|
|
694
|
+
function=analysis_preparation,
|
|
695
|
+
setting_input=node_analysis,
|
|
696
|
+
schema_callback=schema_callback,
|
|
697
|
+
)
|
|
582
698
|
node = self.get_node(node_analysis.node_id)
|
|
583
699
|
|
|
584
700
|
def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
|
|
@@ -591,19 +707,20 @@ class FlowGraph:
|
|
|
591
707
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
592
708
|
return fl.do_group_by(group_by_settings.groupby_input, False)
|
|
593
709
|
|
|
594
|
-
self.add_node_step(
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
710
|
+
self.add_node_step(
|
|
711
|
+
node_id=group_by_settings.node_id,
|
|
712
|
+
function=_func,
|
|
713
|
+
node_type="group_by",
|
|
714
|
+
setting_input=group_by_settings,
|
|
715
|
+
input_node_ids=[group_by_settings.depending_on_id],
|
|
716
|
+
)
|
|
599
717
|
|
|
600
718
|
node = self.get_node(group_by_settings.node_id)
|
|
601
719
|
|
|
602
720
|
def schema_callback():
|
|
603
|
-
|
|
604
721
|
output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
|
|
605
722
|
depends_on = node.node_inputs.main_inputs[0]
|
|
606
|
-
input_schema_dict:
|
|
723
|
+
input_schema_dict: dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
|
|
607
724
|
output_schema = []
|
|
608
725
|
for old_name, new_name, data_type in output_columns:
|
|
609
726
|
data_type = input_schema_dict[old_name] if data_type is None else data_type
|
|
@@ -618,38 +735,148 @@ class FlowGraph:
|
|
|
618
735
|
Args:
|
|
619
736
|
filter_settings: The settings for the filter operation.
|
|
620
737
|
"""
|
|
738
|
+
from flowfile_core.schemas.transform_schema import FilterOperator
|
|
739
|
+
|
|
740
|
+
def _build_basic_filter_expression(
|
|
741
|
+
basic_filter: transform_schema.BasicFilter, field_data_type: str | None = None
|
|
742
|
+
) -> str:
|
|
743
|
+
"""Build a filter expression string from a BasicFilter object.
|
|
744
|
+
|
|
745
|
+
Uses the Flowfile expression language that is compatible with polars_expr_transformer.
|
|
746
|
+
|
|
747
|
+
Args:
|
|
748
|
+
basic_filter: The basic filter configuration.
|
|
749
|
+
field_data_type: The data type of the field (optional, for smart quoting).
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
A filter expression string compatible with polars_expr_transformer.
|
|
753
|
+
"""
|
|
754
|
+
field = f"[{basic_filter.field}]"
|
|
755
|
+
value = basic_filter.value
|
|
756
|
+
value2 = basic_filter.value2
|
|
757
|
+
|
|
758
|
+
is_numeric_value = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
|
|
759
|
+
should_quote = field_data_type == "str" or not is_numeric_value
|
|
760
|
+
|
|
761
|
+
try:
|
|
762
|
+
operator = basic_filter.get_operator()
|
|
763
|
+
except (ValueError, AttributeError):
|
|
764
|
+
operator = FilterOperator.from_symbol(str(basic_filter.operator))
|
|
765
|
+
|
|
766
|
+
if operator == FilterOperator.EQUALS:
|
|
767
|
+
if should_quote:
|
|
768
|
+
return f'{field}="{value}"'
|
|
769
|
+
return f"{field}={value}"
|
|
770
|
+
|
|
771
|
+
elif operator == FilterOperator.NOT_EQUALS:
|
|
772
|
+
if should_quote:
|
|
773
|
+
return f'{field}!="{value}"'
|
|
774
|
+
return f"{field}!={value}"
|
|
775
|
+
|
|
776
|
+
elif operator == FilterOperator.GREATER_THAN:
|
|
777
|
+
if should_quote:
|
|
778
|
+
return f'{field}>"{value}"'
|
|
779
|
+
return f"{field}>{value}"
|
|
780
|
+
|
|
781
|
+
elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
|
|
782
|
+
if should_quote:
|
|
783
|
+
return f'{field}>="{value}"'
|
|
784
|
+
return f"{field}>={value}"
|
|
785
|
+
|
|
786
|
+
elif operator == FilterOperator.LESS_THAN:
|
|
787
|
+
if should_quote:
|
|
788
|
+
return f'{field}<"{value}"'
|
|
789
|
+
return f"{field}<{value}"
|
|
790
|
+
|
|
791
|
+
elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
|
|
792
|
+
if should_quote:
|
|
793
|
+
return f'{field}<="{value}"'
|
|
794
|
+
return f"{field}<={value}"
|
|
795
|
+
|
|
796
|
+
elif operator == FilterOperator.CONTAINS:
|
|
797
|
+
return f'contains({field}, "{value}")'
|
|
798
|
+
|
|
799
|
+
elif operator == FilterOperator.NOT_CONTAINS:
|
|
800
|
+
return f'contains({field}, "{value}") = false'
|
|
801
|
+
|
|
802
|
+
elif operator == FilterOperator.STARTS_WITH:
|
|
803
|
+
return f'left({field}, {len(value)}) = "{value}"'
|
|
804
|
+
|
|
805
|
+
elif operator == FilterOperator.ENDS_WITH:
|
|
806
|
+
return f'right({field}, {len(value)}) = "{value}"'
|
|
807
|
+
|
|
808
|
+
elif operator == FilterOperator.IS_NULL:
|
|
809
|
+
return f"is_empty({field})"
|
|
810
|
+
|
|
811
|
+
elif operator == FilterOperator.IS_NOT_NULL:
|
|
812
|
+
return f"is_not_empty({field})"
|
|
813
|
+
|
|
814
|
+
elif operator == FilterOperator.IN:
|
|
815
|
+
values = [v.strip() for v in value.split(",")]
|
|
816
|
+
if len(values) == 1:
|
|
817
|
+
if should_quote:
|
|
818
|
+
return f'{field}="{values[0]}"'
|
|
819
|
+
return f"{field}={values[0]}"
|
|
820
|
+
if should_quote:
|
|
821
|
+
conditions = [f'({field}="{v}")' for v in values]
|
|
822
|
+
else:
|
|
823
|
+
conditions = [f"({field}={v})" for v in values]
|
|
824
|
+
return " | ".join(conditions)
|
|
825
|
+
|
|
826
|
+
elif operator == FilterOperator.NOT_IN:
|
|
827
|
+
values = [v.strip() for v in value.split(",")]
|
|
828
|
+
if len(values) == 1:
|
|
829
|
+
if should_quote:
|
|
830
|
+
return f'{field}!="{values[0]}"'
|
|
831
|
+
return f"{field}!={values[0]}"
|
|
832
|
+
if should_quote:
|
|
833
|
+
conditions = [f'({field}!="{v}")' for v in values]
|
|
834
|
+
else:
|
|
835
|
+
conditions = [f"({field}!={v})" for v in values]
|
|
836
|
+
return " & ".join(conditions)
|
|
621
837
|
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
838
|
+
elif operator == FilterOperator.BETWEEN:
|
|
839
|
+
if value2 is None:
|
|
840
|
+
raise ValueError("BETWEEN operator requires value2")
|
|
841
|
+
if should_quote:
|
|
842
|
+
return f'({field}>="{value}") & ({field}<="{value2}")'
|
|
843
|
+
return f"({field}>={value}) & ({field}<={value2})"
|
|
844
|
+
|
|
845
|
+
else:
|
|
846
|
+
# Fallback for unknown operators - use legacy format
|
|
847
|
+
if should_quote:
|
|
848
|
+
return f'{field}{operator.to_symbol()}"{value}"'
|
|
849
|
+
return f"{field}{operator.to_symbol()}{value}"
|
|
629
850
|
|
|
630
851
|
def _func(fl: FlowDataEngine):
|
|
631
|
-
is_advanced = filter_settings.filter_input.
|
|
852
|
+
is_advanced = filter_settings.filter_input.is_advanced()
|
|
853
|
+
|
|
632
854
|
if is_advanced:
|
|
855
|
+
predicate = filter_settings.filter_input.advanced_filter
|
|
633
856
|
return fl.do_filter(predicate)
|
|
634
857
|
else:
|
|
635
858
|
basic_filter = filter_settings.filter_input.basic_filter
|
|
636
|
-
if basic_filter
|
|
859
|
+
if basic_filter is None:
|
|
860
|
+
logger.warning("Basic filter is None, returning unfiltered data")
|
|
861
|
+
return fl
|
|
862
|
+
|
|
863
|
+
try:
|
|
637
864
|
field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
else:
|
|
641
|
-
_f = f'[{basic_filter.field}]{basic_filter.filter_type}{basic_filter.filter_value}'
|
|
642
|
-
else:
|
|
643
|
-
_f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
|
|
644
|
-
filter_settings.filter_input.advanced_filter = _f
|
|
645
|
-
return fl.do_filter(_f)
|
|
865
|
+
except Exception:
|
|
866
|
+
field_data_type = None
|
|
646
867
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
868
|
+
expression = _build_basic_filter_expression(basic_filter, field_data_type)
|
|
869
|
+
filter_settings.filter_input.advanced_filter = expression
|
|
870
|
+
return fl.do_filter(expression)
|
|
871
|
+
|
|
872
|
+
self.add_node_step(
|
|
873
|
+
filter_settings.node_id,
|
|
874
|
+
_func,
|
|
875
|
+
node_type="filter",
|
|
876
|
+
renew_schema=False,
|
|
877
|
+
setting_input=filter_settings,
|
|
878
|
+
input_node_ids=[filter_settings.depending_on_id],
|
|
879
|
+
)
|
|
653
880
|
|
|
654
881
|
def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
|
|
655
882
|
"""Adds a filter node to the graph.
|
|
@@ -661,11 +888,13 @@ class FlowGraph:
|
|
|
661
888
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
662
889
|
return fl.get_record_count()
|
|
663
890
|
|
|
664
|
-
self.add_node_step(
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
891
|
+
self.add_node_step(
|
|
892
|
+
node_id=node_number_of_records.node_id,
|
|
893
|
+
function=_func,
|
|
894
|
+
node_type="record_count",
|
|
895
|
+
setting_input=node_number_of_records,
|
|
896
|
+
input_node_ids=[node_number_of_records.depending_on_id],
|
|
897
|
+
)
|
|
669
898
|
|
|
670
899
|
def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
|
|
671
900
|
"""Adds a node that executes custom Polars code.
|
|
@@ -676,11 +905,14 @@ class FlowGraph:
|
|
|
676
905
|
|
|
677
906
|
def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
|
|
678
907
|
return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
908
|
+
|
|
909
|
+
self.add_node_step(
|
|
910
|
+
node_id=node_polars_code.node_id,
|
|
911
|
+
function=_func,
|
|
912
|
+
node_type="polars_code",
|
|
913
|
+
setting_input=node_polars_code,
|
|
914
|
+
input_node_ids=node_polars_code.depending_on_ids,
|
|
915
|
+
)
|
|
684
916
|
|
|
685
917
|
try:
|
|
686
918
|
polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
|
|
@@ -688,9 +920,7 @@ class FlowGraph:
|
|
|
688
920
|
node = self.get_node(node_id=node_polars_code.node_id)
|
|
689
921
|
node.results.errors = str(e)
|
|
690
922
|
|
|
691
|
-
def add_dependency_on_polars_lazy_frame(self,
|
|
692
|
-
lazy_frame: pl.LazyFrame,
|
|
693
|
-
node_id: int):
|
|
923
|
+
def add_dependency_on_polars_lazy_frame(self, lazy_frame: pl.LazyFrame, node_id: int):
|
|
694
924
|
"""Adds a special node that directly injects a Polars LazyFrame into the graph.
|
|
695
925
|
|
|
696
926
|
Note: This is intended for backend use and will not work in the UI editor.
|
|
@@ -699,13 +929,16 @@ class FlowGraph:
|
|
|
699
929
|
lazy_frame: The Polars LazyFrame to inject.
|
|
700
930
|
node_id: The ID for the new node.
|
|
701
931
|
"""
|
|
932
|
+
|
|
702
933
|
def _func():
|
|
703
934
|
return FlowDataEngine(lazy_frame)
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
935
|
+
|
|
936
|
+
node_promise = input_schema.NodePromise(
|
|
937
|
+
flow_id=self.flow_id, node_id=node_id, node_type="polars_lazy_frame", is_setup=True
|
|
938
|
+
)
|
|
939
|
+
self.add_node_step(
|
|
940
|
+
node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func, setting_input=node_promise
|
|
941
|
+
)
|
|
709
942
|
|
|
710
943
|
def add_unique(self, unique_settings: input_schema.NodeUnique):
|
|
711
944
|
"""Adds a node to find and remove duplicate rows.
|
|
@@ -717,12 +950,14 @@ class FlowGraph:
|
|
|
717
950
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
718
951
|
return fl.make_unique(unique_settings.unique_input)
|
|
719
952
|
|
|
720
|
-
self.add_node_step(
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
953
|
+
self.add_node_step(
|
|
954
|
+
node_id=unique_settings.node_id,
|
|
955
|
+
function=_func,
|
|
956
|
+
input_columns=[],
|
|
957
|
+
node_type="unique",
|
|
958
|
+
setting_input=unique_settings,
|
|
959
|
+
input_node_ids=[unique_settings.depending_on_id],
|
|
960
|
+
)
|
|
726
961
|
|
|
727
962
|
def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
|
|
728
963
|
"""Adds a node that solves graph-like problems within the data.
|
|
@@ -735,14 +970,17 @@ class FlowGraph:
|
|
|
735
970
|
graph_solver_settings: The settings object defining the graph inputs
|
|
736
971
|
and the specific algorithm to apply.
|
|
737
972
|
"""
|
|
973
|
+
|
|
738
974
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
739
975
|
return fl.solve_graph(graph_solver_settings.graph_solver_input)
|
|
740
976
|
|
|
741
|
-
self.add_node_step(
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
977
|
+
self.add_node_step(
|
|
978
|
+
node_id=graph_solver_settings.node_id,
|
|
979
|
+
function=_func,
|
|
980
|
+
node_type="graph_solver",
|
|
981
|
+
setting_input=graph_solver_settings,
|
|
982
|
+
input_node_ids=[graph_solver_settings.depending_on_id],
|
|
983
|
+
)
|
|
746
984
|
|
|
747
985
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
748
986
|
"""Adds a node that applies a formula to create or modify a column.
|
|
@@ -757,24 +995,28 @@ class FlowGraph:
|
|
|
757
995
|
else:
|
|
758
996
|
output_type = None
|
|
759
997
|
if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
|
|
760
|
-
new_col = [
|
|
761
|
-
|
|
998
|
+
new_col = [
|
|
999
|
+
FlowfileColumn.from_input(column_name=function_settings.function.field.name, data_type=str(output_type))
|
|
1000
|
+
]
|
|
762
1001
|
else:
|
|
763
|
-
new_col = [FlowfileColumn.from_input(function_settings.function.field.name,
|
|
1002
|
+
new_col = [FlowfileColumn.from_input(function_settings.function.field.name, "String")]
|
|
764
1003
|
|
|
765
1004
|
def _func(fl: FlowDataEngine):
|
|
766
|
-
return fl.apply_sql_formula(
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
1005
|
+
return fl.apply_sql_formula(
|
|
1006
|
+
func=function_settings.function.function,
|
|
1007
|
+
col_name=function_settings.function.field.name,
|
|
1008
|
+
output_data_type=output_type,
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
self.add_node_step(
|
|
1012
|
+
function_settings.node_id,
|
|
1013
|
+
_func,
|
|
1014
|
+
output_schema=new_col,
|
|
1015
|
+
node_type="formula",
|
|
1016
|
+
renew_schema=False,
|
|
1017
|
+
setting_input=function_settings,
|
|
1018
|
+
input_node_ids=[function_settings.depending_on_id],
|
|
1019
|
+
)
|
|
778
1020
|
if error != "":
|
|
779
1021
|
node = self.get_node(function_settings.node_id)
|
|
780
1022
|
node.results.errors = error
|
|
@@ -791,22 +1033,27 @@ class FlowGraph:
|
|
|
791
1033
|
Returns:
|
|
792
1034
|
The `FlowGraph` instance for method chaining.
|
|
793
1035
|
"""
|
|
1036
|
+
|
|
794
1037
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
795
1038
|
for left_select in cross_join_settings.cross_join_input.left_select.renames:
|
|
796
1039
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
797
1040
|
for right_select in cross_join_settings.cross_join_input.right_select.renames:
|
|
798
1041
|
right_select.is_available = True if right_select.old_name in right.schema else False
|
|
799
|
-
return main.do_cross_join(
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
1042
|
+
return main.do_cross_join(
|
|
1043
|
+
cross_join_input=cross_join_settings.cross_join_input,
|
|
1044
|
+
auto_generate_selection=cross_join_settings.auto_generate_selection,
|
|
1045
|
+
verify_integrity=False,
|
|
1046
|
+
other=right,
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
self.add_node_step(
|
|
1050
|
+
node_id=cross_join_settings.node_id,
|
|
1051
|
+
function=_func,
|
|
1052
|
+
input_columns=[],
|
|
1053
|
+
node_type="cross_join",
|
|
1054
|
+
setting_input=cross_join_settings,
|
|
1055
|
+
input_node_ids=cross_join_settings.depending_on_ids,
|
|
1056
|
+
)
|
|
810
1057
|
return self
|
|
811
1058
|
|
|
812
1059
|
def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
|
|
@@ -818,22 +1065,27 @@ class FlowGraph:
|
|
|
818
1065
|
Returns:
|
|
819
1066
|
The `FlowGraph` instance for method chaining.
|
|
820
1067
|
"""
|
|
1068
|
+
|
|
821
1069
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
822
1070
|
for left_select in join_settings.join_input.left_select.renames:
|
|
823
1071
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
824
1072
|
for right_select in join_settings.join_input.right_select.renames:
|
|
825
1073
|
right_select.is_available = True if right_select.old_name in right.schema else False
|
|
826
|
-
return main.join(
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
1074
|
+
return main.join(
|
|
1075
|
+
join_input=join_settings.join_input,
|
|
1076
|
+
auto_generate_selection=join_settings.auto_generate_selection,
|
|
1077
|
+
verify_integrity=False,
|
|
1078
|
+
other=right,
|
|
1079
|
+
)
|
|
1080
|
+
|
|
1081
|
+
self.add_node_step(
|
|
1082
|
+
node_id=join_settings.node_id,
|
|
1083
|
+
function=_func,
|
|
1084
|
+
input_columns=[],
|
|
1085
|
+
node_type="join",
|
|
1086
|
+
setting_input=join_settings,
|
|
1087
|
+
input_node_ids=join_settings.depending_on_ids,
|
|
1088
|
+
)
|
|
837
1089
|
return self
|
|
838
1090
|
|
|
839
1091
|
def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
|
|
@@ -849,31 +1101,43 @@ class FlowGraph:
|
|
|
849
1101
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
850
1102
|
node = self.get_node(node_id=fuzzy_settings.node_id)
|
|
851
1103
|
if self.execution_location == "local":
|
|
852
|
-
return main.fuzzy_join(
|
|
853
|
-
|
|
854
|
-
|
|
1104
|
+
return main.fuzzy_join(
|
|
1105
|
+
fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
|
|
1106
|
+
other=right,
|
|
1107
|
+
node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id),
|
|
1108
|
+
)
|
|
855
1109
|
|
|
856
|
-
f = main.start_fuzzy_join(
|
|
857
|
-
|
|
1110
|
+
f = main.start_fuzzy_join(
|
|
1111
|
+
fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
|
|
1112
|
+
other=right,
|
|
1113
|
+
file_ref=node.hash,
|
|
1114
|
+
flow_id=self.flow_id,
|
|
1115
|
+
node_id=fuzzy_settings.node_id,
|
|
1116
|
+
)
|
|
858
1117
|
logger.info("Started the fuzzy match action")
|
|
859
1118
|
node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
|
|
860
1119
|
return FlowDataEngine(f.get_result())
|
|
861
1120
|
|
|
862
1121
|
def schema_callback():
|
|
863
|
-
fm_input_copy = FuzzyMatchInputManager(
|
|
1122
|
+
fm_input_copy = FuzzyMatchInputManager(
|
|
1123
|
+
fuzzy_settings.join_input
|
|
1124
|
+
) # Deepcopy create an unique object per func
|
|
864
1125
|
node = self.get_node(node_id=fuzzy_settings.node_id)
|
|
865
|
-
return calculate_fuzzy_match_schema(
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
1126
|
+
return calculate_fuzzy_match_schema(
|
|
1127
|
+
fm_input_copy,
|
|
1128
|
+
left_schema=node.node_inputs.main_inputs[0].schema,
|
|
1129
|
+
right_schema=node.node_inputs.right_input.schema,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
self.add_node_step(
|
|
1133
|
+
node_id=fuzzy_settings.node_id,
|
|
1134
|
+
function=_func,
|
|
1135
|
+
input_columns=[],
|
|
1136
|
+
node_type="fuzzy_match",
|
|
1137
|
+
setting_input=fuzzy_settings,
|
|
1138
|
+
input_node_ids=fuzzy_settings.depending_on_ids,
|
|
1139
|
+
schema_callback=schema_callback,
|
|
1140
|
+
)
|
|
877
1141
|
|
|
878
1142
|
return self
|
|
879
1143
|
|
|
@@ -890,14 +1154,17 @@ class FlowGraph:
|
|
|
890
1154
|
Returns:
|
|
891
1155
|
The `FlowGraph` instance for method chaining.
|
|
892
1156
|
"""
|
|
1157
|
+
|
|
893
1158
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
894
1159
|
return table.split(node_text_to_rows.text_to_rows_input)
|
|
895
1160
|
|
|
896
|
-
self.add_node_step(
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
1161
|
+
self.add_node_step(
|
|
1162
|
+
node_id=node_text_to_rows.node_id,
|
|
1163
|
+
function=_func,
|
|
1164
|
+
node_type="text_to_rows",
|
|
1165
|
+
setting_input=node_text_to_rows,
|
|
1166
|
+
input_node_ids=[node_text_to_rows.depending_on_id],
|
|
1167
|
+
)
|
|
901
1168
|
return self
|
|
902
1169
|
|
|
903
1170
|
def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
|
|
@@ -913,11 +1180,13 @@ class FlowGraph:
|
|
|
913
1180
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
914
1181
|
return table.do_sort(sort_settings.sort_input)
|
|
915
1182
|
|
|
916
|
-
self.add_node_step(
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
1183
|
+
self.add_node_step(
|
|
1184
|
+
node_id=sort_settings.node_id,
|
|
1185
|
+
function=_func,
|
|
1186
|
+
node_type="sort",
|
|
1187
|
+
setting_input=sort_settings,
|
|
1188
|
+
input_node_ids=[sort_settings.depending_on_id],
|
|
1189
|
+
)
|
|
921
1190
|
return self
|
|
922
1191
|
|
|
923
1192
|
def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
|
|
@@ -929,15 +1198,17 @@ class FlowGraph:
|
|
|
929
1198
|
Returns:
|
|
930
1199
|
The `FlowGraph` instance for method chaining.
|
|
931
1200
|
"""
|
|
1201
|
+
|
|
932
1202
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
933
1203
|
return table.get_sample(sample_settings.sample_size)
|
|
934
1204
|
|
|
935
|
-
self.add_node_step(
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
1205
|
+
self.add_node_step(
|
|
1206
|
+
node_id=sample_settings.node_id,
|
|
1207
|
+
function=_func,
|
|
1208
|
+
node_type="sample",
|
|
1209
|
+
setting_input=sample_settings,
|
|
1210
|
+
input_node_ids=[sample_settings.depending_on_id],
|
|
1211
|
+
)
|
|
941
1212
|
return self
|
|
942
1213
|
|
|
943
1214
|
def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
|
|
@@ -954,12 +1225,13 @@ class FlowGraph:
|
|
|
954
1225
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
955
1226
|
return table.add_record_id(record_id_settings.record_id_input)
|
|
956
1227
|
|
|
957
|
-
self.add_node_step(
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
1228
|
+
self.add_node_step(
|
|
1229
|
+
node_id=record_id_settings.node_id,
|
|
1230
|
+
function=_func,
|
|
1231
|
+
node_type="record_id",
|
|
1232
|
+
setting_input=record_id_settings,
|
|
1233
|
+
input_node_ids=[record_id_settings.depending_on_id],
|
|
1234
|
+
)
|
|
963
1235
|
return self
|
|
964
1236
|
|
|
965
1237
|
def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
|
|
@@ -991,16 +1263,19 @@ class FlowGraph:
|
|
|
991
1263
|
for i in ids_to_remove:
|
|
992
1264
|
v = select_cols.pop(i)
|
|
993
1265
|
del v
|
|
994
|
-
return table.do_select(
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1266
|
+
return table.do_select(
|
|
1267
|
+
select_inputs=transform_schema.SelectInputs(select_cols), keep_missing=select_settings.keep_missing
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
self.add_node_step(
|
|
1271
|
+
node_id=select_settings.node_id,
|
|
1272
|
+
function=_func,
|
|
1273
|
+
input_columns=[],
|
|
1274
|
+
node_type="select",
|
|
1275
|
+
drop_columns=list(drop_cols),
|
|
1276
|
+
setting_input=select_settings,
|
|
1277
|
+
input_node_ids=[select_settings.depending_on_id],
|
|
1278
|
+
)
|
|
1004
1279
|
return self
|
|
1005
1280
|
|
|
1006
1281
|
@property
|
|
@@ -1008,7 +1283,7 @@ class FlowGraph:
|
|
|
1008
1283
|
"""Checks if the graph has any nodes."""
|
|
1009
1284
|
return len(self._node_ids) > 0
|
|
1010
1285
|
|
|
1011
|
-
def delete_node(self, node_id:
|
|
1286
|
+
def delete_node(self, node_id: int | str):
|
|
1012
1287
|
"""Deletes a node from the graph and updates all its connections.
|
|
1013
1288
|
|
|
1014
1289
|
Args:
|
|
@@ -1023,7 +1298,7 @@ class FlowGraph:
|
|
|
1023
1298
|
if node:
|
|
1024
1299
|
logger.info(f"Found node: {node_id}, processing deletion")
|
|
1025
1300
|
|
|
1026
|
-
lead_to_steps:
|
|
1301
|
+
lead_to_steps: list[FlowNode] = node.leads_to_nodes
|
|
1027
1302
|
logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
|
|
1028
1303
|
|
|
1029
1304
|
if len(lead_to_steps) > 0:
|
|
@@ -1032,7 +1307,7 @@ class FlowGraph:
|
|
|
1032
1307
|
lead_to_step.delete_input_node(node_id, complete=True)
|
|
1033
1308
|
|
|
1034
1309
|
if not node.is_start:
|
|
1035
|
-
depends_on:
|
|
1310
|
+
depends_on: list[FlowNode] = node.node_inputs.get_all_inputs()
|
|
1036
1311
|
logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
|
|
1037
1312
|
|
|
1038
1313
|
for depend_on in depends_on:
|
|
@@ -1052,18 +1327,20 @@ class FlowGraph:
|
|
|
1052
1327
|
"""Checks if the graph has an initial input data source."""
|
|
1053
1328
|
return self._input_data is not None
|
|
1054
1329
|
|
|
1055
|
-
def add_node_step(
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1330
|
+
def add_node_step(
|
|
1331
|
+
self,
|
|
1332
|
+
node_id: int | str,
|
|
1333
|
+
function: Callable,
|
|
1334
|
+
input_columns: list[str] = None,
|
|
1335
|
+
output_schema: list[FlowfileColumn] = None,
|
|
1336
|
+
node_type: str = None,
|
|
1337
|
+
drop_columns: list[str] = None,
|
|
1338
|
+
renew_schema: bool = True,
|
|
1339
|
+
setting_input: Any = None,
|
|
1340
|
+
cache_results: bool = None,
|
|
1341
|
+
schema_callback: Callable = None,
|
|
1342
|
+
input_node_ids: list[int] = None,
|
|
1343
|
+
) -> FlowNode:
|
|
1067
1344
|
"""The core method for adding or updating a node in the graph.
|
|
1068
1345
|
|
|
1069
1346
|
Args:
|
|
@@ -1096,29 +1373,33 @@ class FlowGraph:
|
|
|
1096
1373
|
if isinstance(input_columns, str):
|
|
1097
1374
|
input_columns = [input_columns]
|
|
1098
1375
|
if (
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1376
|
+
input_nodes is not None
|
|
1377
|
+
or function.__name__ in ("placeholder", "analysis_preparation")
|
|
1378
|
+
or node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
|
|
1102
1379
|
):
|
|
1103
1380
|
if not existing_node:
|
|
1104
|
-
node = FlowNode(
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1381
|
+
node = FlowNode(
|
|
1382
|
+
node_id=node_id,
|
|
1383
|
+
function=function,
|
|
1384
|
+
output_schema=output_schema,
|
|
1385
|
+
input_columns=input_columns,
|
|
1386
|
+
drop_columns=drop_columns,
|
|
1387
|
+
renew_schema=renew_schema,
|
|
1388
|
+
setting_input=setting_input,
|
|
1389
|
+
node_type=node_type,
|
|
1390
|
+
name=function.__name__,
|
|
1391
|
+
schema_callback=schema_callback,
|
|
1392
|
+
parent_uuid=self.uuid,
|
|
1393
|
+
)
|
|
1115
1394
|
else:
|
|
1116
|
-
existing_node.update_node(
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1395
|
+
existing_node.update_node(
|
|
1396
|
+
function=function,
|
|
1397
|
+
output_schema=output_schema,
|
|
1398
|
+
input_columns=input_columns,
|
|
1399
|
+
drop_columns=drop_columns,
|
|
1400
|
+
setting_input=setting_input,
|
|
1401
|
+
schema_callback=schema_callback,
|
|
1402
|
+
)
|
|
1122
1403
|
node = existing_node
|
|
1123
1404
|
else:
|
|
1124
1405
|
raise Exception("No data initialized")
|
|
@@ -1126,7 +1407,7 @@ class FlowGraph:
|
|
|
1126
1407
|
self._node_ids.append(node_id)
|
|
1127
1408
|
return node
|
|
1128
1409
|
|
|
1129
|
-
def add_include_cols(self, include_columns:
|
|
1410
|
+
def add_include_cols(self, include_columns: list[str]):
|
|
1130
1411
|
"""Adds columns to both the input and output column lists.
|
|
1131
1412
|
|
|
1132
1413
|
Args:
|
|
@@ -1147,23 +1428,30 @@ class FlowGraph:
|
|
|
1147
1428
|
"""
|
|
1148
1429
|
|
|
1149
1430
|
def _func(df: FlowDataEngine):
|
|
1150
|
-
execute_remote = self.execution_location !=
|
|
1151
|
-
df.output(
|
|
1152
|
-
|
|
1431
|
+
execute_remote = self.execution_location != "local"
|
|
1432
|
+
df.output(
|
|
1433
|
+
output_fs=output_file.output_settings,
|
|
1434
|
+
flow_id=self.flow_id,
|
|
1435
|
+
node_id=output_file.node_id,
|
|
1436
|
+
execute_remote=execute_remote,
|
|
1437
|
+
)
|
|
1153
1438
|
return df
|
|
1154
1439
|
|
|
1155
1440
|
def schema_callback():
|
|
1156
1441
|
input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
|
|
1157
1442
|
|
|
1158
1443
|
return input_node.schema
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1444
|
+
|
|
1445
|
+
input_node_id = output_file.depending_on_id if hasattr(output_file, "depending_on_id") else None
|
|
1446
|
+
self.add_node_step(
|
|
1447
|
+
node_id=output_file.node_id,
|
|
1448
|
+
function=_func,
|
|
1449
|
+
input_columns=[],
|
|
1450
|
+
node_type="output",
|
|
1451
|
+
setting_input=output_file,
|
|
1452
|
+
schema_callback=schema_callback,
|
|
1453
|
+
input_node_ids=[input_node_id],
|
|
1454
|
+
)
|
|
1167
1455
|
|
|
1168
1456
|
def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
|
|
1169
1457
|
"""Adds a node to write data to a database.
|
|
@@ -1172,18 +1460,20 @@ class FlowGraph:
|
|
|
1172
1460
|
node_database_writer: The settings for the database writer node.
|
|
1173
1461
|
"""
|
|
1174
1462
|
|
|
1175
|
-
node_type =
|
|
1463
|
+
node_type = "database_writer"
|
|
1176
1464
|
database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
|
|
1177
|
-
database_connection:
|
|
1178
|
-
if database_settings.connection_mode ==
|
|
1465
|
+
database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
|
|
1466
|
+
if database_settings.connection_mode == "inline":
|
|
1179
1467
|
database_connection: input_schema.DatabaseConnection = database_settings.database_connection
|
|
1180
|
-
encrypted_password = get_encrypted_secret(
|
|
1181
|
-
|
|
1468
|
+
encrypted_password = get_encrypted_secret(
|
|
1469
|
+
current_user_id=node_database_writer.user_id, secret_name=database_connection.password_ref
|
|
1470
|
+
)
|
|
1182
1471
|
if encrypted_password is None:
|
|
1183
1472
|
raise HTTPException(status_code=400, detail="Password not found")
|
|
1184
1473
|
else:
|
|
1185
|
-
database_reference_settings = get_local_database_connection(
|
|
1186
|
-
|
|
1474
|
+
database_reference_settings = get_local_database_connection(
|
|
1475
|
+
database_settings.database_connection_name, node_database_writer.user_id
|
|
1476
|
+
)
|
|
1187
1477
|
encrypted_password = database_reference_settings.password.get_secret_value()
|
|
1188
1478
|
|
|
1189
1479
|
def _func(df: FlowDataEngine):
|
|
@@ -1192,14 +1482,20 @@ class FlowGraph:
|
|
|
1192
1482
|
sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
|
|
1193
1483
|
node_database_writer=node_database_writer,
|
|
1194
1484
|
password=encrypted_password,
|
|
1195
|
-
table_name=(
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1485
|
+
table_name=(
|
|
1486
|
+
database_settings.schema_name + "." + database_settings.table_name
|
|
1487
|
+
if database_settings.schema_name
|
|
1488
|
+
else database_settings.table_name
|
|
1489
|
+
),
|
|
1490
|
+
database_reference_settings=(
|
|
1491
|
+
database_reference_settings if database_settings.connection_mode == "reference" else None
|
|
1492
|
+
),
|
|
1493
|
+
lf=df.data_frame,
|
|
1200
1494
|
)
|
|
1201
1495
|
)
|
|
1202
|
-
external_database_writer = ExternalDatabaseWriter(
|
|
1496
|
+
external_database_writer = ExternalDatabaseWriter(
|
|
1497
|
+
database_external_write_settings, wait_on_completion=False
|
|
1498
|
+
)
|
|
1203
1499
|
node._fetch_cached_df = external_database_writer
|
|
1204
1500
|
external_database_writer.get_result()
|
|
1205
1501
|
return df
|
|
@@ -1226,56 +1522,64 @@ class FlowGraph:
|
|
|
1226
1522
|
"""
|
|
1227
1523
|
|
|
1228
1524
|
logger.info("Adding database reader")
|
|
1229
|
-
node_type =
|
|
1525
|
+
node_type = "database_reader"
|
|
1230
1526
|
database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
|
|
1231
|
-
database_connection:
|
|
1232
|
-
if database_settings.connection_mode ==
|
|
1527
|
+
database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
|
|
1528
|
+
if database_settings.connection_mode == "inline":
|
|
1233
1529
|
database_connection: input_schema.DatabaseConnection = database_settings.database_connection
|
|
1234
|
-
encrypted_password = get_encrypted_secret(
|
|
1235
|
-
|
|
1530
|
+
encrypted_password = get_encrypted_secret(
|
|
1531
|
+
current_user_id=node_database_reader.user_id, secret_name=database_connection.password_ref
|
|
1532
|
+
)
|
|
1236
1533
|
if encrypted_password is None:
|
|
1237
1534
|
raise HTTPException(status_code=400, detail="Password not found")
|
|
1238
1535
|
else:
|
|
1239
|
-
database_reference_settings = get_local_database_connection(
|
|
1240
|
-
|
|
1536
|
+
database_reference_settings = get_local_database_connection(
|
|
1537
|
+
database_settings.database_connection_name, node_database_reader.user_id
|
|
1538
|
+
)
|
|
1241
1539
|
database_connection = database_reference_settings
|
|
1242
1540
|
encrypted_password = database_reference_settings.password.get_secret_value()
|
|
1243
1541
|
|
|
1244
1542
|
def _func():
|
|
1245
|
-
sql_source = BaseSqlSource(
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1543
|
+
sql_source = BaseSqlSource(
|
|
1544
|
+
query=None if database_settings.query_mode == "table" else database_settings.query,
|
|
1545
|
+
table_name=database_settings.table_name,
|
|
1546
|
+
schema_name=database_settings.schema_name,
|
|
1547
|
+
fields=node_database_reader.fields,
|
|
1548
|
+
)
|
|
1250
1549
|
database_external_read_settings = (
|
|
1251
1550
|
sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
|
|
1252
1551
|
node_database_reader=node_database_reader,
|
|
1253
1552
|
password=encrypted_password,
|
|
1254
1553
|
query=sql_source.query,
|
|
1255
|
-
database_reference_settings=(
|
|
1256
|
-
|
|
1554
|
+
database_reference_settings=(
|
|
1555
|
+
database_reference_settings if database_settings.connection_mode == "reference" else None
|
|
1556
|
+
),
|
|
1257
1557
|
)
|
|
1258
1558
|
)
|
|
1259
1559
|
|
|
1260
|
-
external_database_fetcher = ExternalDatabaseFetcher(
|
|
1560
|
+
external_database_fetcher = ExternalDatabaseFetcher(
|
|
1561
|
+
database_external_read_settings, wait_on_completion=False
|
|
1562
|
+
)
|
|
1261
1563
|
node._fetch_cached_df = external_database_fetcher
|
|
1262
1564
|
fl = FlowDataEngine(external_database_fetcher.get_result())
|
|
1263
1565
|
node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
|
|
1264
1566
|
return fl
|
|
1265
1567
|
|
|
1266
1568
|
def schema_callback():
|
|
1267
|
-
sql_source = SqlSource(
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1569
|
+
sql_source = SqlSource(
|
|
1570
|
+
connection_string=sql_utils.construct_sql_uri(
|
|
1571
|
+
database_type=database_connection.database_type,
|
|
1572
|
+
host=database_connection.host,
|
|
1573
|
+
port=database_connection.port,
|
|
1574
|
+
database=database_connection.database,
|
|
1575
|
+
username=database_connection.username,
|
|
1576
|
+
password=decrypt_secret(encrypted_password),
|
|
1577
|
+
),
|
|
1578
|
+
query=None if database_settings.query_mode == "table" else database_settings.query,
|
|
1579
|
+
table_name=database_settings.table_name,
|
|
1580
|
+
schema_name=database_settings.schema_name,
|
|
1581
|
+
fields=node_database_reader.fields,
|
|
1582
|
+
)
|
|
1279
1583
|
return sql_source.get_schema()
|
|
1280
1584
|
|
|
1281
1585
|
node = self.get_node(node_database_reader.node_id)
|
|
@@ -1285,16 +1589,20 @@ class FlowGraph:
|
|
|
1285
1589
|
node.function = _func
|
|
1286
1590
|
node.setting_input = node_database_reader
|
|
1287
1591
|
node.node_settings.cache_results = node_database_reader.cache_results
|
|
1288
|
-
|
|
1289
|
-
self._flow_starts.append(node)
|
|
1592
|
+
self.add_node_to_starting_list(node)
|
|
1290
1593
|
node.schema_callback = schema_callback
|
|
1291
1594
|
else:
|
|
1292
|
-
node = FlowNode(
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1595
|
+
node = FlowNode(
|
|
1596
|
+
node_database_reader.node_id,
|
|
1597
|
+
function=_func,
|
|
1598
|
+
setting_input=node_database_reader,
|
|
1599
|
+
name=node_type,
|
|
1600
|
+
node_type=node_type,
|
|
1601
|
+
parent_uuid=self.uuid,
|
|
1602
|
+
schema_callback=schema_callback,
|
|
1603
|
+
)
|
|
1296
1604
|
self._node_db[node_database_reader.node_id] = node
|
|
1297
|
-
self.
|
|
1605
|
+
self.add_node_to_starting_list(node)
|
|
1298
1606
|
self._node_ids.append(node_database_reader.node_id)
|
|
1299
1607
|
|
|
1300
1608
|
def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
@@ -1305,7 +1613,7 @@ class FlowGraph:
|
|
|
1305
1613
|
Args:
|
|
1306
1614
|
external_source_input: The settings for the external SQL source node.
|
|
1307
1615
|
"""
|
|
1308
|
-
logger.info(
|
|
1616
|
+
logger.info("Adding sql source")
|
|
1309
1617
|
self.add_external_source(external_source_input)
|
|
1310
1618
|
|
|
1311
1619
|
def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
|
|
@@ -1316,19 +1624,20 @@ class FlowGraph:
|
|
|
1316
1624
|
"""
|
|
1317
1625
|
|
|
1318
1626
|
node_type = "cloud_storage_writer"
|
|
1627
|
+
|
|
1319
1628
|
def _func(df: FlowDataEngine):
|
|
1320
1629
|
df.lazy = True
|
|
1321
|
-
execute_remote = self.execution_location !=
|
|
1630
|
+
execute_remote = self.execution_location != "local"
|
|
1322
1631
|
cloud_connection_settings = get_cloud_connection_settings(
|
|
1323
1632
|
connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
|
|
1324
1633
|
user_id=node_cloud_storage_writer.user_id,
|
|
1325
|
-
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
|
|
1634
|
+
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode,
|
|
1326
1635
|
)
|
|
1327
1636
|
full_cloud_storage_connection = FullCloudStorageConnection(
|
|
1328
1637
|
storage_type=cloud_connection_settings.storage_type,
|
|
1329
1638
|
auth_method=cloud_connection_settings.auth_method,
|
|
1330
1639
|
aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
|
|
1331
|
-
**CloudStorageReader.get_storage_options(cloud_connection_settings)
|
|
1640
|
+
**CloudStorageReader.get_storage_options(cloud_connection_settings),
|
|
1332
1641
|
)
|
|
1333
1642
|
if execute_remote:
|
|
1334
1643
|
settings = get_cloud_storage_write_settings_worker_interface(
|
|
@@ -1336,7 +1645,8 @@ class FlowGraph:
|
|
|
1336
1645
|
connection=full_cloud_storage_connection,
|
|
1337
1646
|
lf=df.data_frame,
|
|
1338
1647
|
flowfile_node_id=node_cloud_storage_writer.node_id,
|
|
1339
|
-
flowfile_flow_id=self.flow_id
|
|
1648
|
+
flowfile_flow_id=self.flow_id,
|
|
1649
|
+
)
|
|
1340
1650
|
external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
|
|
1341
1651
|
node._fetch_cached_df = external_database_writer
|
|
1342
1652
|
external_database_writer.get_result()
|
|
@@ -1362,7 +1672,7 @@ class FlowGraph:
|
|
|
1362
1672
|
node_type=node_type,
|
|
1363
1673
|
setting_input=node_cloud_storage_writer,
|
|
1364
1674
|
schema_callback=schema_callback,
|
|
1365
|
-
input_node_ids=[node_cloud_storage_writer.depending_on_id]
|
|
1675
|
+
input_node_ids=[node_cloud_storage_writer.depending_on_id],
|
|
1366
1676
|
)
|
|
1367
1677
|
|
|
1368
1678
|
node = self.get_node(node_cloud_storage_writer.node_id)
|
|
@@ -1380,49 +1690,53 @@ class FlowGraph:
|
|
|
1380
1690
|
def _func():
|
|
1381
1691
|
logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1382
1692
|
self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1383
|
-
settings = CloudStorageReadSettingsInternal(
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1693
|
+
settings = CloudStorageReadSettingsInternal(
|
|
1694
|
+
read_settings=cloud_storage_read_settings,
|
|
1695
|
+
connection=get_cloud_connection_settings(
|
|
1696
|
+
connection_name=cloud_storage_read_settings.connection_name,
|
|
1697
|
+
user_id=node_cloud_storage_reader.user_id,
|
|
1698
|
+
auth_mode=cloud_storage_read_settings.auth_mode,
|
|
1699
|
+
),
|
|
1700
|
+
)
|
|
1389
1701
|
fl = FlowDataEngine.from_cloud_storage_obj(settings)
|
|
1390
1702
|
return fl
|
|
1391
1703
|
|
|
1392
|
-
node = self.add_node_step(
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1704
|
+
node = self.add_node_step(
|
|
1705
|
+
node_id=node_cloud_storage_reader.node_id,
|
|
1706
|
+
function=_func,
|
|
1707
|
+
cache_results=node_cloud_storage_reader.cache_results,
|
|
1708
|
+
setting_input=node_cloud_storage_reader,
|
|
1709
|
+
node_type=node_type,
|
|
1710
|
+
)
|
|
1711
|
+
self.add_node_to_starting_list(node)
|
|
1400
1712
|
|
|
1401
|
-
def add_external_source(self,
|
|
1402
|
-
external_source_input: input_schema.NodeExternalSource):
|
|
1713
|
+
def add_external_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
1403
1714
|
"""Adds a node for a custom external data source.
|
|
1404
1715
|
|
|
1405
1716
|
Args:
|
|
1406
1717
|
external_source_input: The settings for the external source node.
|
|
1407
1718
|
"""
|
|
1408
1719
|
|
|
1409
|
-
node_type =
|
|
1720
|
+
node_type = "external_source"
|
|
1410
1721
|
external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
|
|
1411
|
-
source_settings =
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1722
|
+
source_settings = getattr(
|
|
1723
|
+
input_schema, snake_case_to_camel_case(external_source_input.identifier)
|
|
1724
|
+
).model_validate(external_source_input.source_settings)
|
|
1725
|
+
if hasattr(external_source_script, "initial_getter"):
|
|
1726
|
+
initial_getter = external_source_script.initial_getter(source_settings)
|
|
1415
1727
|
else:
|
|
1416
1728
|
initial_getter = None
|
|
1417
1729
|
data_getter = external_source_script.getter(source_settings)
|
|
1418
|
-
external_source = data_source_factory(
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1730
|
+
external_source = data_source_factory(
|
|
1731
|
+
source_type="custom",
|
|
1732
|
+
data_getter=data_getter,
|
|
1733
|
+
initial_data_getter=initial_getter,
|
|
1734
|
+
orientation=external_source_input.source_settings.orientation,
|
|
1735
|
+
schema=None,
|
|
1736
|
+
)
|
|
1423
1737
|
|
|
1424
1738
|
def _func():
|
|
1425
|
-
logger.info(
|
|
1739
|
+
logger.info("Calling external source")
|
|
1426
1740
|
fl = FlowDataEngine.create_from_external_source(external_source=external_source)
|
|
1427
1741
|
external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
|
|
1428
1742
|
return fl
|
|
@@ -1434,31 +1748,39 @@ class FlowGraph:
|
|
|
1434
1748
|
node.function = _func
|
|
1435
1749
|
node.setting_input = external_source_input
|
|
1436
1750
|
node.node_settings.cache_results = external_source_input.cache_results
|
|
1437
|
-
|
|
1438
|
-
|
|
1751
|
+
self.add_node_to_starting_list(node)
|
|
1752
|
+
|
|
1439
1753
|
else:
|
|
1440
|
-
node = FlowNode(
|
|
1441
|
-
|
|
1442
|
-
|
|
1754
|
+
node = FlowNode(
|
|
1755
|
+
external_source_input.node_id,
|
|
1756
|
+
function=_func,
|
|
1757
|
+
setting_input=external_source_input,
|
|
1758
|
+
name=node_type,
|
|
1759
|
+
node_type=node_type,
|
|
1760
|
+
parent_uuid=self.uuid,
|
|
1761
|
+
)
|
|
1443
1762
|
self._node_db[external_source_input.node_id] = node
|
|
1444
|
-
self.
|
|
1763
|
+
self.add_node_to_starting_list(node)
|
|
1445
1764
|
self._node_ids.append(external_source_input.node_id)
|
|
1446
1765
|
if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
|
|
1447
|
-
logger.info(
|
|
1766
|
+
logger.info("Using provided schema in the node")
|
|
1448
1767
|
|
|
1449
1768
|
def schema_callback():
|
|
1450
|
-
return [
|
|
1451
|
-
|
|
1769
|
+
return [
|
|
1770
|
+
FlowfileColumn.from_input(f.name, f.data_type) for f in external_source_input.source_settings.fields
|
|
1771
|
+
]
|
|
1452
1772
|
|
|
1453
1773
|
node.schema_callback = schema_callback
|
|
1454
1774
|
else:
|
|
1455
|
-
logger.warning(
|
|
1775
|
+
logger.warning("Removing schema")
|
|
1456
1776
|
node._schema_callback = None
|
|
1457
|
-
self.add_node_step(
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1777
|
+
self.add_node_step(
|
|
1778
|
+
node_id=external_source_input.node_id,
|
|
1779
|
+
function=_func,
|
|
1780
|
+
input_columns=[],
|
|
1781
|
+
node_type=node_type,
|
|
1782
|
+
setting_input=external_source_input,
|
|
1783
|
+
)
|
|
1462
1784
|
|
|
1463
1785
|
def add_read(self, input_file: input_schema.NodeRead):
|
|
1464
1786
|
"""Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
|
|
@@ -1466,8 +1788,10 @@ class FlowGraph:
|
|
|
1466
1788
|
Args:
|
|
1467
1789
|
input_file: The settings for the read operation.
|
|
1468
1790
|
"""
|
|
1469
|
-
if (
|
|
1470
|
-
|
|
1791
|
+
if (
|
|
1792
|
+
input_file.received_file.file_type in ("xlsx", "excel")
|
|
1793
|
+
and input_file.received_file.table_settings.sheet_name == ""
|
|
1794
|
+
):
|
|
1471
1795
|
sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
|
|
1472
1796
|
input_file.received_file.table_settings.sheet_name = sheet_name
|
|
1473
1797
|
|
|
@@ -1476,14 +1800,17 @@ class FlowGraph:
|
|
|
1476
1800
|
|
|
1477
1801
|
def _func():
|
|
1478
1802
|
input_file.received_file.set_absolute_filepath()
|
|
1479
|
-
if input_file.received_file.file_type ==
|
|
1803
|
+
if input_file.received_file.file_type == "parquet":
|
|
1480
1804
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1481
|
-
elif
|
|
1805
|
+
elif (
|
|
1806
|
+
input_file.received_file.file_type == "csv"
|
|
1807
|
+
and "utf" in input_file.received_file.table_settings.encoding
|
|
1808
|
+
):
|
|
1482
1809
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1483
1810
|
else:
|
|
1484
|
-
input_data = FlowDataEngine.create_from_path_worker(
|
|
1485
|
-
|
|
1486
|
-
|
|
1811
|
+
input_data = FlowDataEngine.create_from_path_worker(
|
|
1812
|
+
input_file.received_file, node_id=input_file.node_id, flow_id=self.flow_id
|
|
1813
|
+
)
|
|
1487
1814
|
input_data.name = input_file.received_file.name
|
|
1488
1815
|
return input_data
|
|
1489
1816
|
|
|
@@ -1491,51 +1818,57 @@ class FlowGraph:
|
|
|
1491
1818
|
schema_callback = None
|
|
1492
1819
|
if node:
|
|
1493
1820
|
start_hash = node.hash
|
|
1494
|
-
node.node_type =
|
|
1495
|
-
node.name =
|
|
1821
|
+
node.node_type = "read"
|
|
1822
|
+
node.name = "read"
|
|
1496
1823
|
node.function = _func
|
|
1497
1824
|
node.setting_input = input_file
|
|
1498
|
-
|
|
1499
|
-
self._flow_starts.append(node)
|
|
1825
|
+
self.add_node_to_starting_list(node)
|
|
1500
1826
|
|
|
1501
1827
|
if start_hash != node.hash:
|
|
1502
|
-
logger.info(
|
|
1828
|
+
logger.info("Hash changed, updating schema")
|
|
1503
1829
|
if len(received_file.fields) > 0:
|
|
1504
1830
|
# If the file has fields defined, we can use them to create the schema
|
|
1505
1831
|
def schema_callback():
|
|
1506
1832
|
return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
|
|
1507
1833
|
|
|
1508
|
-
elif input_file.received_file.file_type in (
|
|
1834
|
+
elif input_file.received_file.file_type in ("csv", "json", "parquet"):
|
|
1509
1835
|
# everything that can be scanned by polars
|
|
1510
1836
|
def schema_callback():
|
|
1511
1837
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1512
1838
|
return input_data.schema
|
|
1513
1839
|
|
|
1514
|
-
elif input_file.received_file.file_type in (
|
|
1840
|
+
elif input_file.received_file.file_type in ("xlsx", "excel"):
|
|
1515
1841
|
# If the file is an Excel file, we need to use the openpyxl engine to read the schema
|
|
1516
|
-
schema_callback = get_xlsx_schema_callback(
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1842
|
+
schema_callback = get_xlsx_schema_callback(
|
|
1843
|
+
engine="openpyxl",
|
|
1844
|
+
file_path=received_file.file_path,
|
|
1845
|
+
sheet_name=received_file.table_settings.sheet_name,
|
|
1846
|
+
start_row=received_file.table_settings.start_row,
|
|
1847
|
+
end_row=received_file.table_settings.end_row,
|
|
1848
|
+
start_column=received_file.table_settings.start_column,
|
|
1849
|
+
end_column=received_file.table_settings.end_column,
|
|
1850
|
+
has_headers=received_file.table_settings.has_headers,
|
|
1851
|
+
)
|
|
1524
1852
|
else:
|
|
1525
1853
|
schema_callback = None
|
|
1526
1854
|
else:
|
|
1527
|
-
node = FlowNode(
|
|
1528
|
-
|
|
1529
|
-
|
|
1855
|
+
node = FlowNode(
|
|
1856
|
+
input_file.node_id,
|
|
1857
|
+
function=_func,
|
|
1858
|
+
setting_input=input_file,
|
|
1859
|
+
name="read",
|
|
1860
|
+
node_type="read",
|
|
1861
|
+
parent_uuid=self.uuid,
|
|
1862
|
+
)
|
|
1530
1863
|
self._node_db[input_file.node_id] = node
|
|
1531
|
-
self.
|
|
1864
|
+
self.add_node_to_starting_list(node)
|
|
1532
1865
|
self._node_ids.append(input_file.node_id)
|
|
1533
1866
|
|
|
1534
1867
|
if schema_callback is not None:
|
|
1535
1868
|
node.schema_callback = schema_callback
|
|
1536
1869
|
return self
|
|
1537
1870
|
|
|
1538
|
-
def add_datasource(self, input_file:
|
|
1871
|
+
def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
|
|
1539
1872
|
"""Adds a data source node to the graph.
|
|
1540
1873
|
|
|
1541
1874
|
This method serves as a factory for creating starting nodes, handling both
|
|
@@ -1549,25 +1882,30 @@ class FlowGraph:
|
|
|
1549
1882
|
"""
|
|
1550
1883
|
if isinstance(input_file, input_schema.NodeManualInput):
|
|
1551
1884
|
input_data = FlowDataEngine(input_file.raw_data_format)
|
|
1552
|
-
ref =
|
|
1885
|
+
ref = "manual_input"
|
|
1553
1886
|
else:
|
|
1554
1887
|
input_data = FlowDataEngine(path_ref=input_file.file_ref)
|
|
1555
|
-
ref =
|
|
1888
|
+
ref = "datasource"
|
|
1556
1889
|
node = self.get_node(input_file.node_id)
|
|
1557
1890
|
if node:
|
|
1558
1891
|
node.node_type = ref
|
|
1559
1892
|
node.name = ref
|
|
1560
1893
|
node.function = input_data
|
|
1561
1894
|
node.setting_input = input_file
|
|
1562
|
-
|
|
1563
|
-
|
|
1895
|
+
self.add_node_to_starting_list(node)
|
|
1896
|
+
|
|
1564
1897
|
else:
|
|
1565
1898
|
input_data.collect()
|
|
1566
|
-
node = FlowNode(
|
|
1567
|
-
|
|
1568
|
-
|
|
1899
|
+
node = FlowNode(
|
|
1900
|
+
input_file.node_id,
|
|
1901
|
+
function=input_data,
|
|
1902
|
+
setting_input=input_file,
|
|
1903
|
+
name=ref,
|
|
1904
|
+
node_type=ref,
|
|
1905
|
+
parent_uuid=self.uuid,
|
|
1906
|
+
)
|
|
1569
1907
|
self._node_db[input_file.node_id] = node
|
|
1570
|
-
self.
|
|
1908
|
+
self.add_node_to_starting_list(node)
|
|
1571
1909
|
self._node_ids.append(input_file.node_id)
|
|
1572
1910
|
return self
|
|
1573
1911
|
|
|
@@ -1582,7 +1920,7 @@ class FlowGraph:
|
|
|
1582
1920
|
self.add_datasource(input_file)
|
|
1583
1921
|
|
|
1584
1922
|
@property
|
|
1585
|
-
def nodes(self) ->
|
|
1923
|
+
def nodes(self) -> list[FlowNode]:
|
|
1586
1924
|
"""Gets a list of all FlowNode objects in the graph."""
|
|
1587
1925
|
|
|
1588
1926
|
return list(self._node_db.values())
|
|
@@ -1592,7 +1930,7 @@ class FlowGraph:
|
|
|
1592
1930
|
"""Gets the current execution mode ('Development' or 'Performance')."""
|
|
1593
1931
|
return self.flow_settings.execution_mode
|
|
1594
1932
|
|
|
1595
|
-
def get_implicit_starter_nodes(self) ->
|
|
1933
|
+
def get_implicit_starter_nodes(self) -> list[FlowNode]:
|
|
1596
1934
|
"""Finds nodes that can act as starting points but are not explicitly defined as such.
|
|
1597
1935
|
|
|
1598
1936
|
Some nodes, like the Polars Code node, can function without an input. This
|
|
@@ -1638,24 +1976,31 @@ class FlowGraph:
|
|
|
1638
1976
|
if not flow_node:
|
|
1639
1977
|
raise Exception("Node not found found")
|
|
1640
1978
|
skip_nodes, execution_order = compute_execution_plan(
|
|
1641
|
-
nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
|
|
1979
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
1642
1980
|
)
|
|
1643
1981
|
if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
|
|
1644
1982
|
raise Exception("Node can not be executed because it does not have it's inputs")
|
|
1645
1983
|
|
|
1646
|
-
def create_initial_run_information(self, number_of_nodes: int,
|
|
1647
|
-
run_type: Literal["fetch_one", "full_run"]):
|
|
1984
|
+
def create_initial_run_information(self, number_of_nodes: int, run_type: Literal["fetch_one", "full_run"]):
|
|
1648
1985
|
return RunInformation(
|
|
1649
|
-
flow_id=self.flow_id,
|
|
1650
|
-
|
|
1651
|
-
|
|
1986
|
+
flow_id=self.flow_id,
|
|
1987
|
+
start_time=datetime.datetime.now(),
|
|
1988
|
+
end_time=None,
|
|
1989
|
+
success=None,
|
|
1990
|
+
number_of_nodes=number_of_nodes,
|
|
1991
|
+
node_step_result=[],
|
|
1992
|
+
run_type=run_type,
|
|
1652
1993
|
)
|
|
1653
1994
|
|
|
1654
1995
|
def create_empty_run_information(self) -> RunInformation:
|
|
1655
1996
|
return RunInformation(
|
|
1656
|
-
flow_id=self.flow_id,
|
|
1657
|
-
|
|
1658
|
-
|
|
1997
|
+
flow_id=self.flow_id,
|
|
1998
|
+
start_time=None,
|
|
1999
|
+
end_time=None,
|
|
2000
|
+
success=None,
|
|
2001
|
+
number_of_nodes=0,
|
|
2002
|
+
node_step_result=[],
|
|
2003
|
+
run_type="init",
|
|
1659
2004
|
)
|
|
1660
2005
|
|
|
1661
2006
|
def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
|
|
@@ -1669,14 +2014,16 @@ class FlowGraph:
|
|
|
1669
2014
|
self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
|
|
1670
2015
|
node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
|
|
1671
2016
|
node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
|
|
1672
|
-
logger.info(f
|
|
2017
|
+
logger.info(f"Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}")
|
|
1673
2018
|
try:
|
|
1674
2019
|
self.latest_run_info.node_step_result.append(node_result)
|
|
1675
|
-
flow_node.execute_node(
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
2020
|
+
flow_node.execute_node(
|
|
2021
|
+
run_location=self.flow_settings.execution_location,
|
|
2022
|
+
performance_mode=False,
|
|
2023
|
+
node_logger=node_logger,
|
|
2024
|
+
optimize_for_downstream=False,
|
|
2025
|
+
reset_cache=True,
|
|
2026
|
+
)
|
|
1680
2027
|
node_result.error = str(flow_node.results.errors)
|
|
1681
2028
|
if self.flow_settings.is_canceled:
|
|
1682
2029
|
node_result.success = None
|
|
@@ -1691,12 +2038,12 @@ class FlowGraph:
|
|
|
1691
2038
|
self.flow_settings.is_running = False
|
|
1692
2039
|
return self.get_run_info()
|
|
1693
2040
|
except Exception as e:
|
|
1694
|
-
node_result.error =
|
|
2041
|
+
node_result.error = "Node did not run"
|
|
1695
2042
|
node_result.success = False
|
|
1696
2043
|
node_result.end_timestamp = time()
|
|
1697
2044
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1698
2045
|
node_result.is_running = False
|
|
1699
|
-
node_logger.error(f
|
|
2046
|
+
node_logger.error(f"Error in node {flow_node.node_id}: {e}")
|
|
1700
2047
|
finally:
|
|
1701
2048
|
self.flow_settings.is_running = False
|
|
1702
2049
|
|
|
@@ -1713,39 +2060,38 @@ class FlowGraph:
|
|
|
1713
2060
|
Exception: If the flow is already running.
|
|
1714
2061
|
"""
|
|
1715
2062
|
if self.flow_settings.is_running:
|
|
1716
|
-
raise Exception(
|
|
2063
|
+
raise Exception("Flow is already running")
|
|
1717
2064
|
try:
|
|
1718
|
-
|
|
1719
2065
|
self.flow_settings.is_running = True
|
|
1720
2066
|
self.flow_settings.is_canceled = False
|
|
1721
2067
|
self.flow_logger.clear_log_file()
|
|
1722
|
-
self.flow_logger.info(
|
|
1723
|
-
|
|
2068
|
+
self.flow_logger.info("Starting to run flowfile flow...")
|
|
1724
2069
|
skip_nodes, execution_order = compute_execution_plan(
|
|
1725
|
-
nodes=self.nodes,
|
|
1726
|
-
flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
|
|
2070
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
1727
2071
|
)
|
|
1728
2072
|
|
|
1729
2073
|
self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
|
|
1730
2074
|
|
|
1731
2075
|
skip_node_message(self.flow_logger, skip_nodes)
|
|
1732
2076
|
execution_order_message(self.flow_logger, execution_order)
|
|
1733
|
-
performance_mode = self.flow_settings.execution_mode ==
|
|
2077
|
+
performance_mode = self.flow_settings.execution_mode == "Performance"
|
|
1734
2078
|
|
|
1735
2079
|
for node in execution_order:
|
|
1736
2080
|
node_logger = self.flow_logger.get_node_logger(node.node_id)
|
|
1737
2081
|
if self.flow_settings.is_canceled:
|
|
1738
|
-
self.flow_logger.info(
|
|
2082
|
+
self.flow_logger.info("Flow canceled")
|
|
1739
2083
|
break
|
|
1740
2084
|
if node in skip_nodes:
|
|
1741
|
-
node_logger.info(f
|
|
2085
|
+
node_logger.info(f"Skipping node {node.node_id}")
|
|
1742
2086
|
continue
|
|
1743
2087
|
node_result = NodeResult(node_id=node.node_id, node_name=node.name)
|
|
1744
2088
|
self.latest_run_info.node_step_result.append(node_result)
|
|
1745
|
-
logger.info(f
|
|
1746
|
-
node.execute_node(
|
|
1747
|
-
|
|
1748
|
-
|
|
2089
|
+
logger.info(f"Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}")
|
|
2090
|
+
node.execute_node(
|
|
2091
|
+
run_location=self.flow_settings.execution_location,
|
|
2092
|
+
performance_mode=performance_mode,
|
|
2093
|
+
node_logger=node_logger,
|
|
2094
|
+
)
|
|
1749
2095
|
try:
|
|
1750
2096
|
node_result.error = str(node.results.errors)
|
|
1751
2097
|
if self.flow_settings.is_canceled:
|
|
@@ -1758,22 +2104,22 @@ class FlowGraph:
|
|
|
1758
2104
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1759
2105
|
node_result.is_running = False
|
|
1760
2106
|
except Exception as e:
|
|
1761
|
-
node_result.error =
|
|
2107
|
+
node_result.error = "Node did not run"
|
|
1762
2108
|
node_result.success = False
|
|
1763
2109
|
node_result.end_timestamp = time()
|
|
1764
2110
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1765
2111
|
node_result.is_running = False
|
|
1766
|
-
node_logger.error(f
|
|
2112
|
+
node_logger.error(f"Error in node {node.node_id}: {e}")
|
|
1767
2113
|
if not node_result.success:
|
|
1768
2114
|
skip_nodes.extend(list(node.get_all_dependent_nodes()))
|
|
1769
|
-
node_logger.info(f
|
|
2115
|
+
node_logger.info(f"Completed node with success: {node_result.success}")
|
|
1770
2116
|
self.latest_run_info.nodes_completed += 1
|
|
1771
2117
|
self.latest_run_info.end_time = datetime.datetime.now()
|
|
1772
|
-
self.flow_logger.info(
|
|
2118
|
+
self.flow_logger.info("Flow completed!")
|
|
1773
2119
|
self.end_datetime = datetime.datetime.now()
|
|
1774
2120
|
self.flow_settings.is_running = False
|
|
1775
2121
|
if self.flow_settings.is_canceled:
|
|
1776
|
-
self.flow_logger.info(
|
|
2122
|
+
self.flow_logger.info("Flow canceled")
|
|
1777
2123
|
return self.get_run_info()
|
|
1778
2124
|
except Exception as e:
|
|
1779
2125
|
raise e
|
|
@@ -1799,7 +2145,7 @@ class FlowGraph:
|
|
|
1799
2145
|
return run_info
|
|
1800
2146
|
|
|
1801
2147
|
@property
|
|
1802
|
-
def node_connections(self) ->
|
|
2148
|
+
def node_connections(self) -> list[tuple[int, int]]:
|
|
1803
2149
|
"""Computes and returns a list of all connections in the graph.
|
|
1804
2150
|
|
|
1805
2151
|
Returns:
|
|
@@ -1809,8 +2155,9 @@ class FlowGraph:
|
|
|
1809
2155
|
for node in self.nodes:
|
|
1810
2156
|
outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
|
|
1811
2157
|
incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
|
|
1812
|
-
node_connections = [
|
|
1813
|
-
|
|
2158
|
+
node_connections = [
|
|
2159
|
+
c for c in outgoing_connections + incoming_connections if (c[0] is not None and c[1] is not None)
|
|
2160
|
+
]
|
|
1814
2161
|
for node_connection in node_connections:
|
|
1815
2162
|
if node_connection not in connections:
|
|
1816
2163
|
connections.add(node_connection)
|
|
@@ -1871,16 +2218,18 @@ class FlowGraph:
|
|
|
1871
2218
|
Returns:
|
|
1872
2219
|
A FlowInformation object representing the complete graph.
|
|
1873
2220
|
"""
|
|
1874
|
-
node_information = {
|
|
1875
|
-
|
|
2221
|
+
node_information = {
|
|
2222
|
+
node.node_id: node.get_node_information() for node in self.nodes if node.is_setup and node.is_correct
|
|
2223
|
+
}
|
|
1876
2224
|
|
|
1877
|
-
return schemas.FlowInformation(
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
2225
|
+
return schemas.FlowInformation(
|
|
2226
|
+
flow_id=self.flow_id,
|
|
2227
|
+
flow_name=self.__name__,
|
|
2228
|
+
flow_settings=self.flow_settings,
|
|
2229
|
+
data=node_information,
|
|
2230
|
+
node_starts=[v.node_id for v in self._flow_starts],
|
|
2231
|
+
node_connections=self.node_connections,
|
|
2232
|
+
)
|
|
1884
2233
|
|
|
1885
2234
|
def cancel(self):
|
|
1886
2235
|
"""Cancels an ongoing graph execution."""
|
|
@@ -1901,7 +2250,11 @@ class FlowGraph:
|
|
|
1901
2250
|
"""
|
|
1902
2251
|
Handle the rename of a flow when it is being saved.
|
|
1903
2252
|
"""
|
|
1904
|
-
if
|
|
2253
|
+
if (
|
|
2254
|
+
self.flow_settings
|
|
2255
|
+
and self.flow_settings.path
|
|
2256
|
+
and Path(self.flow_settings.path).absolute() != new_path.absolute()
|
|
2257
|
+
):
|
|
1905
2258
|
self.__name__ = new_name
|
|
1906
2259
|
self.flow_settings.save_location = str(new_path.absolute())
|
|
1907
2260
|
self.flow_settings.name = new_name
|
|
@@ -1928,27 +2281,27 @@ class FlowGraph:
|
|
|
1928
2281
|
self._handle_flow_renaming(new_flow_name, path)
|
|
1929
2282
|
self.flow_settings.modified_on = datetime.datetime.now().timestamp()
|
|
1930
2283
|
try:
|
|
1931
|
-
if suffix ==
|
|
2284
|
+
if suffix == ".flowfile":
|
|
1932
2285
|
raise DeprecationWarning(
|
|
1933
|
-
|
|
2286
|
+
"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
|
|
1934
2287
|
"Or stay on v0.4.1 if you still need .flowfile support.\n\n"
|
|
1935
2288
|
)
|
|
1936
|
-
elif suffix in (
|
|
2289
|
+
elif suffix in (".yaml", ".yml"):
|
|
1937
2290
|
flowfile_data = self.get_flowfile_data()
|
|
1938
|
-
data = flowfile_data.model_dump(mode=
|
|
1939
|
-
with open(flow_path,
|
|
2291
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2292
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
1940
2293
|
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
1941
|
-
elif suffix ==
|
|
2294
|
+
elif suffix == ".json":
|
|
1942
2295
|
flowfile_data = self.get_flowfile_data()
|
|
1943
|
-
data = flowfile_data.model_dump(mode=
|
|
1944
|
-
with open(flow_path,
|
|
2296
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2297
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
1945
2298
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
1946
2299
|
|
|
1947
2300
|
else:
|
|
1948
2301
|
flowfile_data = self.get_flowfile_data()
|
|
1949
2302
|
logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
|
|
1950
|
-
data = flowfile_data.model_dump(mode=
|
|
1951
|
-
with open(flow_path,
|
|
2303
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2304
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
1952
2305
|
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
1953
2306
|
|
|
1954
2307
|
except Exception as e:
|
|
@@ -1966,11 +2319,7 @@ class FlowGraph:
|
|
|
1966
2319
|
Returns:
|
|
1967
2320
|
A dictionary representing the graph in Drawflow format.
|
|
1968
2321
|
"""
|
|
1969
|
-
result = {
|
|
1970
|
-
'Home': {
|
|
1971
|
-
"data": {}
|
|
1972
|
-
}
|
|
1973
|
-
}
|
|
2322
|
+
result = {"Home": {"data": {}}}
|
|
1974
2323
|
flow_info: schemas.FlowInformation = self.get_node_storage()
|
|
1975
2324
|
|
|
1976
2325
|
for node_id, node_info in flow_info.data.items():
|
|
@@ -1989,7 +2338,7 @@ class FlowGraph:
|
|
|
1989
2338
|
"inputs": {},
|
|
1990
2339
|
"outputs": {},
|
|
1991
2340
|
"pos_x": pos_x,
|
|
1992
|
-
"pos_y": pos_y
|
|
2341
|
+
"pos_y": pos_y,
|
|
1993
2342
|
}
|
|
1994
2343
|
except Exception as e:
|
|
1995
2344
|
logger.error(e)
|
|
@@ -2003,24 +2352,27 @@ class FlowGraph:
|
|
|
2003
2352
|
leading_to_node = self.get_node(output_node_id)
|
|
2004
2353
|
input_types = leading_to_node.get_input_type(node_info.id)
|
|
2005
2354
|
for input_type in input_types:
|
|
2006
|
-
if input_type ==
|
|
2007
|
-
input_frontend_id =
|
|
2008
|
-
elif input_type ==
|
|
2009
|
-
input_frontend_id =
|
|
2010
|
-
elif input_type ==
|
|
2011
|
-
input_frontend_id =
|
|
2355
|
+
if input_type == "main":
|
|
2356
|
+
input_frontend_id = "input_1"
|
|
2357
|
+
elif input_type == "right":
|
|
2358
|
+
input_frontend_id = "input_2"
|
|
2359
|
+
elif input_type == "left":
|
|
2360
|
+
input_frontend_id = "input_3"
|
|
2012
2361
|
else:
|
|
2013
|
-
input_frontend_id =
|
|
2362
|
+
input_frontend_id = "input_1"
|
|
2014
2363
|
connection = {"node": str(output_node_id), "input": input_frontend_id}
|
|
2015
2364
|
connections.append(connection)
|
|
2016
2365
|
|
|
2017
|
-
result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
|
|
2018
|
-
"connections": connections}
|
|
2366
|
+
result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {"connections": connections}
|
|
2019
2367
|
else:
|
|
2020
2368
|
result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
|
|
2021
2369
|
|
|
2022
2370
|
# Add input to the node based on `depending_on_id` in your backend data
|
|
2023
|
-
if
|
|
2371
|
+
if (
|
|
2372
|
+
node_info.left_input_id is not None
|
|
2373
|
+
or node_info.right_input_id is not None
|
|
2374
|
+
or node_info.input_ids is not None
|
|
2375
|
+
):
|
|
2024
2376
|
main_inputs = node_info.main_input_ids
|
|
2025
2377
|
result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
|
|
2026
2378
|
"connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
|
|
@@ -2041,8 +2393,8 @@ class FlowGraph:
|
|
|
2041
2393
|
Returns:
|
|
2042
2394
|
A VueFlowInput object.
|
|
2043
2395
|
"""
|
|
2044
|
-
edges:
|
|
2045
|
-
nodes:
|
|
2396
|
+
edges: list[schemas.NodeEdge] = []
|
|
2397
|
+
nodes: list[schemas.NodeInput] = []
|
|
2046
2398
|
for node in self.nodes:
|
|
2047
2399
|
nodes.append(node.get_node_input())
|
|
2048
2400
|
edges.extend(node.get_edge_input())
|
|
@@ -2054,7 +2406,9 @@ class FlowGraph:
|
|
|
2054
2406
|
for node in self.nodes:
|
|
2055
2407
|
node.reset(True)
|
|
2056
2408
|
|
|
2057
|
-
def copy_node(
|
|
2409
|
+
def copy_node(
|
|
2410
|
+
self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str
|
|
2411
|
+
) -> None:
|
|
2058
2412
|
"""Creates a copy of an existing node.
|
|
2059
2413
|
|
|
2060
2414
|
Args:
|
|
@@ -2067,9 +2421,7 @@ class FlowGraph:
|
|
|
2067
2421
|
if isinstance(existing_setting_input, input_schema.NodePromise):
|
|
2068
2422
|
return
|
|
2069
2423
|
|
|
2070
|
-
combined_settings = combine_existing_settings_and_new_settings(
|
|
2071
|
-
existing_setting_input, new_node_settings
|
|
2072
|
-
)
|
|
2424
|
+
combined_settings = combine_existing_settings_and_new_settings(existing_setting_input, new_node_settings)
|
|
2073
2425
|
getattr(self, f"add_{node_type}")(combined_settings)
|
|
2074
2426
|
|
|
2075
2427
|
def generate_code(self):
|
|
@@ -2077,6 +2429,7 @@ class FlowGraph:
|
|
|
2077
2429
|
This method exports the flow graph to a Polars-compatible format.
|
|
2078
2430
|
"""
|
|
2079
2431
|
from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
|
|
2432
|
+
|
|
2080
2433
|
print(export_flow_to_polars(self))
|
|
2081
2434
|
|
|
2082
2435
|
|
|
@@ -2095,13 +2448,7 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
|
|
|
2095
2448
|
copied_setting_input = deepcopy(setting_input)
|
|
2096
2449
|
|
|
2097
2450
|
# Update only attributes that exist on new_settings
|
|
2098
|
-
fields_to_update = (
|
|
2099
|
-
"node_id",
|
|
2100
|
-
"pos_x",
|
|
2101
|
-
"pos_y",
|
|
2102
|
-
"description",
|
|
2103
|
-
"flow_id"
|
|
2104
|
-
)
|
|
2451
|
+
fields_to_update = ("node_id", "pos_x", "pos_y", "description", "flow_id")
|
|
2105
2452
|
|
|
2106
2453
|
for field in fields_to_update:
|
|
2107
2454
|
if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
|
|
@@ -2117,12 +2464,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
|
|
|
2117
2464
|
flow: The FlowGraph instance to modify.
|
|
2118
2465
|
node_connection: An object defining the source and target of the connection.
|
|
2119
2466
|
"""
|
|
2120
|
-
logger.info(
|
|
2467
|
+
logger.info("adding a connection")
|
|
2121
2468
|
from_node = flow.get_node(node_connection.output_connection.node_id)
|
|
2122
2469
|
to_node = flow.get_node(node_connection.input_connection.node_id)
|
|
2123
|
-
logger.info(f
|
|
2470
|
+
logger.info(f"from_node={from_node}, to_node={to_node}")
|
|
2124
2471
|
if not (from_node and to_node):
|
|
2125
|
-
raise HTTPException(404,
|
|
2472
|
+
raise HTTPException(404, "Not not available")
|
|
2126
2473
|
else:
|
|
2127
2474
|
to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
|
|
2128
2475
|
|