Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +194 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
- flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
- flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
- flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
- flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
- flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
- flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
- flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
- flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
- flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
- flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
- flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
- flowfile/web/static/assets/Output-692dd25d.css +37 -0
- flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
- flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
- flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
- flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
- flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
- flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
- flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/Unique-2b705521.css +3 -0
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
- flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
- flowfile/web/static/assets/index-6b367bb5.js +38 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
- flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
- flowfile-0.5.4.dist-info/RECORD +407 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +64 -19
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +145 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/__init__.py +11 -0
- flowfile_core/flowfile/code_generator/code_generator.py +706 -247
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +920 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +379 -258
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +19 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +278 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +46 -4
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +96 -66
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +123 -18
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +117 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/database/__init__.py +36 -0
- flowfile_frame/database/connection_manager.py +205 -0
- flowfile_frame/database/frame_helpers.py +249 -0
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +41 -33
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +114 -21
- flowfile_worker/spawner.py +89 -54
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
- flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/Output-283fe388.css +0 -37
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/Sort-3643d625.css +0 -51
- flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,80 +1,103 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import yaml
|
|
5
2
|
import json
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
import os
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from functools import partial
|
|
7
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from time import time
|
|
10
|
+
from typing import Any, Literal, Union
|
|
11
|
+
from uuid import uuid1
|
|
9
12
|
|
|
10
13
|
import fastexcel
|
|
14
|
+
import polars as pl
|
|
15
|
+
import yaml
|
|
11
16
|
from fastapi.exceptions import HTTPException
|
|
12
|
-
from time import time
|
|
13
|
-
from functools import partial
|
|
14
|
-
from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
|
|
15
|
-
from uuid import uuid1
|
|
16
|
-
from copy import deepcopy
|
|
17
17
|
from pyarrow.parquet import ParquetFile
|
|
18
|
+
|
|
18
19
|
from flowfile_core.configs import logger
|
|
19
20
|
from flowfile_core.configs.flow_logger import FlowLogger
|
|
20
|
-
from flowfile_core.
|
|
21
|
-
from flowfile_core.flowfile.
|
|
22
|
-
|
|
21
|
+
from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
|
|
22
|
+
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
23
|
+
from flowfile_core.flowfile.database_connection_manager.db_connections import (
|
|
24
|
+
get_local_cloud_connection,
|
|
25
|
+
get_local_database_connection,
|
|
26
|
+
)
|
|
23
27
|
from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
|
|
24
|
-
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
25
|
-
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
26
28
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
|
|
27
|
-
from flowfile_core.flowfile.flow_data_engine.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
|
|
30
|
+
from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
|
|
31
|
+
from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (
|
|
32
|
+
get_calamine_xlsx_data_types,
|
|
33
|
+
get_open_xlsx_datatypes,
|
|
34
|
+
)
|
|
35
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
|
|
36
|
+
ExternalCloudWriter,
|
|
37
|
+
ExternalDatabaseFetcher,
|
|
38
|
+
ExternalDatabaseWriter,
|
|
39
|
+
ExternalDfFetcher,
|
|
40
|
+
)
|
|
41
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
42
|
+
from flowfile_core.flowfile.graph_tree.graph_tree import (
|
|
43
|
+
add_un_drawn_nodes,
|
|
44
|
+
build_flow_paths,
|
|
45
|
+
build_node_info,
|
|
46
|
+
calculate_depth,
|
|
47
|
+
define_node_connections,
|
|
48
|
+
draw_merged_paths,
|
|
49
|
+
draw_standalone_paths,
|
|
50
|
+
group_nodes_by_depth,
|
|
51
|
+
)
|
|
52
|
+
from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
|
|
53
|
+
from flowfile_core.flowfile.schema_callbacks import calculate_fuzzy_match_schema, pre_calculate_pivot_schema
|
|
31
54
|
from flowfile_core.flowfile.sources import external_sources
|
|
55
|
+
from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
|
|
56
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source import models as sql_models
|
|
57
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils
|
|
58
|
+
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import BaseSqlSource, SqlSource
|
|
59
|
+
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
60
|
+
from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
|
|
61
|
+
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
32
62
|
from flowfile_core.schemas import input_schema, schemas, transform_schema
|
|
63
|
+
from flowfile_core.schemas.cloud_storage_schemas import (
|
|
64
|
+
AuthMethod,
|
|
65
|
+
CloudStorageReadSettingsInternal,
|
|
66
|
+
CloudStorageWriteSettingsInternal,
|
|
67
|
+
FullCloudStorageConnection,
|
|
68
|
+
get_cloud_storage_write_settings_worker_interface,
|
|
69
|
+
)
|
|
33
70
|
from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
|
|
34
|
-
from flowfile_core.schemas.
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
get_cloud_storage_write_settings_worker_interface, AuthMethod)
|
|
38
|
-
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
39
|
-
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
40
|
-
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
41
|
-
from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
|
|
42
|
-
from flowfile_core.flowfile.graph_tree.graph_tree import (add_un_drawn_nodes, build_flow_paths,
|
|
43
|
-
build_node_info, calculate_depth,
|
|
44
|
-
define_node_connections, draw_merged_paths,
|
|
45
|
-
draw_standalone_paths, group_nodes_by_depth)
|
|
46
|
-
from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
|
|
47
|
-
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalDatabaseFetcher,
|
|
48
|
-
ExternalDatabaseWriter,
|
|
49
|
-
ExternalDfFetcher,
|
|
50
|
-
ExternalCloudWriter)
|
|
51
|
-
from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
|
|
52
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
|
|
53
|
-
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
|
|
54
|
-
from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
|
|
55
|
-
get_local_cloud_connection)
|
|
56
|
-
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
57
|
-
from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
|
|
58
|
-
from importlib.metadata import version, PackageNotFoundError
|
|
71
|
+
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
72
|
+
from flowfile_core.secret_manager.secret_manager import decrypt_secret, get_encrypted_secret
|
|
73
|
+
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
59
74
|
|
|
60
75
|
try:
|
|
61
76
|
__version__ = version("Flowfile")
|
|
62
77
|
except PackageNotFoundError:
|
|
63
|
-
__version__ = "0.
|
|
78
|
+
__version__ = "0.5.0"
|
|
64
79
|
|
|
65
80
|
|
|
66
81
|
def represent_list_json(dumper, data):
|
|
67
82
|
"""Use inline style for short simple lists, block style for complex ones."""
|
|
68
83
|
if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
|
|
69
|
-
return dumper.represent_sequence(
|
|
70
|
-
return dumper.represent_sequence(
|
|
84
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
|
|
85
|
+
return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
|
|
71
86
|
|
|
72
87
|
|
|
73
88
|
yaml.add_representer(list, represent_list_json)
|
|
74
89
|
|
|
75
90
|
|
|
76
|
-
def get_xlsx_schema(
|
|
77
|
-
|
|
91
|
+
def get_xlsx_schema(
|
|
92
|
+
engine: str,
|
|
93
|
+
file_path: str,
|
|
94
|
+
sheet_name: str,
|
|
95
|
+
start_row: int,
|
|
96
|
+
start_column: int,
|
|
97
|
+
end_row: int,
|
|
98
|
+
end_column: int,
|
|
99
|
+
has_headers: bool,
|
|
100
|
+
):
|
|
78
101
|
"""Calculates the schema of an XLSX file by reading a sample of rows.
|
|
79
102
|
|
|
80
103
|
Args:
|
|
@@ -91,27 +114,29 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
|
|
|
91
114
|
A list of FlowfileColumn objects representing the schema.
|
|
92
115
|
"""
|
|
93
116
|
try:
|
|
94
|
-
logger.info(
|
|
95
|
-
if engine ==
|
|
117
|
+
logger.info("Starting to calculate the schema")
|
|
118
|
+
if engine == "openpyxl":
|
|
96
119
|
max_col = end_column if end_column > 0 else None
|
|
97
|
-
return get_open_xlsx_datatypes(
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
120
|
+
return get_open_xlsx_datatypes(
|
|
121
|
+
file_path=file_path,
|
|
122
|
+
sheet_name=sheet_name,
|
|
123
|
+
min_row=start_row + 1,
|
|
124
|
+
min_col=start_column + 1,
|
|
125
|
+
max_row=100,
|
|
126
|
+
max_col=max_col,
|
|
127
|
+
has_headers=has_headers,
|
|
128
|
+
)
|
|
129
|
+
elif engine == "calamine":
|
|
130
|
+
return get_calamine_xlsx_data_types(
|
|
131
|
+
file_path=file_path, sheet_name=sheet_name, start_row=start_row, end_row=end_row
|
|
132
|
+
)
|
|
133
|
+
logger.info("done calculating the schema")
|
|
109
134
|
except Exception as e:
|
|
110
135
|
logger.error(e)
|
|
111
136
|
return []
|
|
112
137
|
|
|
113
138
|
|
|
114
|
-
def skip_node_message(flow_logger: FlowLogger, nodes:
|
|
139
|
+
def skip_node_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
|
|
115
140
|
"""Logs a warning message listing all nodes that will be skipped during execution.
|
|
116
141
|
|
|
117
142
|
Args:
|
|
@@ -120,10 +145,10 @@ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
|
|
|
120
145
|
"""
|
|
121
146
|
if len(nodes) > 0:
|
|
122
147
|
msg = "\n".join(str(node) for node in nodes)
|
|
123
|
-
flow_logger.warning(f
|
|
148
|
+
flow_logger.warning(f"skipping nodes:\n{msg}")
|
|
124
149
|
|
|
125
150
|
|
|
126
|
-
def execution_order_message(flow_logger: FlowLogger, nodes:
|
|
151
|
+
def execution_order_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
|
|
127
152
|
"""Logs an informational message showing the determined execution order of nodes.
|
|
128
153
|
|
|
129
154
|
Args:
|
|
@@ -131,11 +156,19 @@ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> N
|
|
|
131
156
|
nodes: A list of FlowNode objects in the order they will be executed.
|
|
132
157
|
"""
|
|
133
158
|
msg = "\n".join(str(node) for node in nodes)
|
|
134
|
-
flow_logger.info(f
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def get_xlsx_schema_callback(
|
|
138
|
-
|
|
159
|
+
flow_logger.info(f"execution order:\n{msg}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def get_xlsx_schema_callback(
|
|
163
|
+
engine: str,
|
|
164
|
+
file_path: str,
|
|
165
|
+
sheet_name: str,
|
|
166
|
+
start_row: int,
|
|
167
|
+
start_column: int,
|
|
168
|
+
end_row: int,
|
|
169
|
+
end_column: int,
|
|
170
|
+
has_headers: bool,
|
|
171
|
+
):
|
|
139
172
|
"""Creates a partially applied function for lazy calculation of an XLSX schema.
|
|
140
173
|
|
|
141
174
|
Args:
|
|
@@ -151,12 +184,22 @@ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start
|
|
|
151
184
|
Returns:
|
|
152
185
|
A callable function that, when called, will execute `get_xlsx_schema`.
|
|
153
186
|
"""
|
|
154
|
-
return partial(
|
|
155
|
-
|
|
187
|
+
return partial(
|
|
188
|
+
get_xlsx_schema,
|
|
189
|
+
engine=engine,
|
|
190
|
+
file_path=file_path,
|
|
191
|
+
sheet_name=sheet_name,
|
|
192
|
+
start_row=start_row,
|
|
193
|
+
start_column=start_column,
|
|
194
|
+
end_row=end_row,
|
|
195
|
+
end_column=end_column,
|
|
196
|
+
has_headers=has_headers,
|
|
197
|
+
)
|
|
156
198
|
|
|
157
199
|
|
|
158
|
-
def get_cloud_connection_settings(
|
|
159
|
-
|
|
200
|
+
def get_cloud_connection_settings(
|
|
201
|
+
connection_name: str, user_id: int, auth_mode: AuthMethod
|
|
202
|
+
) -> FullCloudStorageConnection:
|
|
160
203
|
"""Retrieves cloud storage connection settings, falling back to environment variables if needed.
|
|
161
204
|
|
|
162
205
|
Args:
|
|
@@ -186,32 +229,44 @@ class FlowGraph:
|
|
|
186
229
|
|
|
187
230
|
It manages nodes, connections, and the execution of the entire flow.
|
|
188
231
|
"""
|
|
232
|
+
|
|
189
233
|
uuid: str
|
|
190
|
-
depends_on:
|
|
234
|
+
depends_on: dict[
|
|
235
|
+
int,
|
|
236
|
+
Union[
|
|
237
|
+
ParquetFile,
|
|
238
|
+
FlowDataEngine,
|
|
239
|
+
"FlowGraph",
|
|
240
|
+
pl.DataFrame,
|
|
241
|
+
],
|
|
242
|
+
]
|
|
191
243
|
_flow_id: int
|
|
192
244
|
_input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
|
|
193
|
-
_input_cols:
|
|
194
|
-
_output_cols:
|
|
195
|
-
_node_db:
|
|
196
|
-
_node_ids:
|
|
197
|
-
_results:
|
|
245
|
+
_input_cols: list[str]
|
|
246
|
+
_output_cols: list[str]
|
|
247
|
+
_node_db: dict[str | int, FlowNode]
|
|
248
|
+
_node_ids: list[str | int]
|
|
249
|
+
_results: FlowDataEngine | None = None
|
|
198
250
|
cache_results: bool = False
|
|
199
|
-
schema:
|
|
251
|
+
schema: list[FlowfileColumn] | None = None
|
|
200
252
|
has_over_row_function: bool = False
|
|
201
|
-
_flow_starts:
|
|
202
|
-
latest_run_info:
|
|
253
|
+
_flow_starts: list[int | str] = None
|
|
254
|
+
latest_run_info: RunInformation | None = None
|
|
203
255
|
start_datetime: datetime = None
|
|
204
256
|
end_datetime: datetime = None
|
|
205
257
|
_flow_settings: schemas.FlowSettings = None
|
|
206
258
|
flow_logger: FlowLogger
|
|
207
259
|
|
|
208
|
-
def __init__(
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
260
|
+
def __init__(
|
|
261
|
+
self,
|
|
262
|
+
flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
|
|
263
|
+
name: str = None,
|
|
264
|
+
input_cols: list[str] = None,
|
|
265
|
+
output_cols: list[str] = None,
|
|
266
|
+
path_ref: str = None,
|
|
267
|
+
input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
|
|
268
|
+
cache_results: bool = False,
|
|
269
|
+
):
|
|
215
270
|
"""Initializes a new FlowGraph instance.
|
|
216
271
|
|
|
217
272
|
Args:
|
|
@@ -233,7 +288,7 @@ class FlowGraph:
|
|
|
233
288
|
self.latest_run_info = None
|
|
234
289
|
self._flow_id = flow_settings.flow_id
|
|
235
290
|
self.flow_logger = FlowLogger(flow_settings.flow_id)
|
|
236
|
-
self._flow_starts:
|
|
291
|
+
self._flow_starts: list[FlowNode] = []
|
|
237
292
|
self._results = None
|
|
238
293
|
self.schema = None
|
|
239
294
|
self.has_over_row_function = False
|
|
@@ -255,13 +310,21 @@ class FlowGraph:
|
|
|
255
310
|
|
|
256
311
|
@flow_settings.setter
|
|
257
312
|
def flow_settings(self, flow_settings: schemas.FlowSettings):
|
|
258
|
-
if (
|
|
259
|
-
|
|
260
|
-
(self._flow_settings.execution_mode != flow_settings.execution_mode)
|
|
313
|
+
if (self._flow_settings.execution_location != flow_settings.execution_location) or (
|
|
314
|
+
self._flow_settings.execution_mode != flow_settings.execution_mode
|
|
261
315
|
):
|
|
262
316
|
self.reset()
|
|
263
317
|
self._flow_settings = flow_settings
|
|
264
318
|
|
|
319
|
+
def add_node_to_starting_list(self, node: FlowNode) -> None:
|
|
320
|
+
"""Adds a node to the list of starting nodes for the flow if not already present.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
node: The FlowNode to add as a starting node.
|
|
324
|
+
"""
|
|
325
|
+
if node.node_id not in {self_node.node_id for self_node in self._flow_starts}:
|
|
326
|
+
self._flow_starts.append(node)
|
|
327
|
+
|
|
265
328
|
def add_node_promise(self, node_promise: input_schema.NodePromise):
|
|
266
329
|
"""Adds a placeholder node to the graph that is not yet fully configured.
|
|
267
330
|
|
|
@@ -270,13 +333,31 @@ class FlowGraph:
|
|
|
270
333
|
Args:
|
|
271
334
|
node_promise: A promise object containing basic node information.
|
|
272
335
|
"""
|
|
336
|
+
|
|
273
337
|
def placeholder(n: FlowNode = None):
|
|
274
338
|
if n is None:
|
|
275
339
|
return FlowDataEngine()
|
|
276
340
|
return n
|
|
277
341
|
|
|
278
|
-
self.add_node_step(
|
|
279
|
-
|
|
342
|
+
self.add_node_step(
|
|
343
|
+
node_id=node_promise.node_id,
|
|
344
|
+
node_type=node_promise.node_type,
|
|
345
|
+
function=placeholder,
|
|
346
|
+
setting_input=node_promise,
|
|
347
|
+
)
|
|
348
|
+
if node_promise.is_user_defined:
|
|
349
|
+
node_needs_settings: bool
|
|
350
|
+
custom_node = CUSTOM_NODE_STORE.get(node_promise.node_type)
|
|
351
|
+
if custom_node is None:
|
|
352
|
+
raise Exception(f"Custom node type '{node_promise.node_type}' not found in registry.")
|
|
353
|
+
settings_schema = custom_node.model_fields["settings_schema"].default
|
|
354
|
+
node_needs_settings = settings_schema is not None and not settings_schema.is_empty()
|
|
355
|
+
if not node_needs_settings:
|
|
356
|
+
user_defined_node_settings = input_schema.UserDefinedNode(settings={}, **node_promise.model_dump())
|
|
357
|
+
initialized_model = custom_node()
|
|
358
|
+
self.add_user_defined_node(
|
|
359
|
+
custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings
|
|
360
|
+
)
|
|
280
361
|
|
|
281
362
|
def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
|
|
282
363
|
"""Calculates and applies a layered layout to all nodes in the graph.
|
|
@@ -304,20 +385,24 @@ class FlowGraph:
|
|
|
304
385
|
updated_count = 0
|
|
305
386
|
for node_id, (pos_x, pos_y) in new_positions.items():
|
|
306
387
|
node = self.get_node(node_id)
|
|
307
|
-
if node and hasattr(node,
|
|
388
|
+
if node and hasattr(node, "setting_input"):
|
|
308
389
|
setting = node.setting_input
|
|
309
|
-
if hasattr(setting,
|
|
390
|
+
if hasattr(setting, "pos_x") and hasattr(setting, "pos_y"):
|
|
310
391
|
setting.pos_x = pos_x
|
|
311
392
|
setting.pos_y = pos_y
|
|
312
393
|
updated_count += 1
|
|
313
394
|
else:
|
|
314
|
-
self.flow_logger.warning(
|
|
395
|
+
self.flow_logger.warning(
|
|
396
|
+
f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes."
|
|
397
|
+
)
|
|
315
398
|
elif node:
|
|
316
399
|
self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
|
|
317
400
|
# else: Node not found, already warned by calculate_layered_layout
|
|
318
401
|
|
|
319
402
|
end_time = time()
|
|
320
|
-
self.flow_logger.info(
|
|
403
|
+
self.flow_logger.info(
|
|
404
|
+
f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds."
|
|
405
|
+
)
|
|
321
406
|
|
|
322
407
|
except Exception as e:
|
|
323
408
|
self.flow_logger.error(f"Error applying layout: {e}")
|
|
@@ -337,13 +422,13 @@ class FlowGraph:
|
|
|
337
422
|
"""
|
|
338
423
|
self._flow_id = new_id
|
|
339
424
|
for node in self.nodes:
|
|
340
|
-
if hasattr(node.setting_input,
|
|
425
|
+
if hasattr(node.setting_input, "flow_id"):
|
|
341
426
|
node.setting_input.flow_id = new_id
|
|
342
427
|
self.flow_settings.flow_id = new_id
|
|
343
428
|
|
|
344
429
|
def __repr__(self):
|
|
345
430
|
"""Provides the official string representation of the FlowGraph instance."""
|
|
346
|
-
settings_str = " -" +
|
|
431
|
+
settings_str = " -" + "\n -".join(f"{k}: {v}" for k, v in self.flow_settings)
|
|
347
432
|
return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
|
|
348
433
|
|
|
349
434
|
def print_tree(self):
|
|
@@ -361,7 +446,7 @@ class FlowGraph:
|
|
|
361
446
|
|
|
362
447
|
# Group nodes by depth
|
|
363
448
|
depth_groups, max_depth = group_nodes_by_depth(node_info)
|
|
364
|
-
|
|
449
|
+
|
|
365
450
|
# Sort nodes within each depth group
|
|
366
451
|
for depth in depth_groups:
|
|
367
452
|
depth_groups[depth].sort()
|
|
@@ -371,7 +456,7 @@ class FlowGraph:
|
|
|
371
456
|
|
|
372
457
|
# Track which nodes connect to what
|
|
373
458
|
merge_points = define_node_connections(node_info)
|
|
374
|
-
|
|
459
|
+
|
|
375
460
|
# Build the flow paths
|
|
376
461
|
|
|
377
462
|
# Find the maximum label length for each depth level
|
|
@@ -380,15 +465,15 @@ class FlowGraph:
|
|
|
380
465
|
if depth in depth_groups:
|
|
381
466
|
max_len = max(len(node_info[nid].label) for nid in depth_groups[depth])
|
|
382
467
|
max_label_length[depth] = max_len
|
|
383
|
-
|
|
468
|
+
|
|
384
469
|
# Draw the paths
|
|
385
470
|
drawn_nodes = set()
|
|
386
471
|
merge_drawn = set()
|
|
387
|
-
|
|
472
|
+
|
|
388
473
|
# Group paths by their merge points
|
|
389
474
|
paths_by_merge = {}
|
|
390
475
|
standalone_paths = []
|
|
391
|
-
|
|
476
|
+
|
|
392
477
|
# Build flow paths
|
|
393
478
|
paths = build_flow_paths(node_info, self._flow_starts, merge_points)
|
|
394
479
|
|
|
@@ -410,22 +495,22 @@ class FlowGraph:
|
|
|
410
495
|
|
|
411
496
|
# Add undrawn nodes
|
|
412
497
|
add_un_drawn_nodes(drawn_nodes, node_info, lines)
|
|
413
|
-
|
|
498
|
+
|
|
414
499
|
try:
|
|
415
500
|
skip_nodes, ordered_nodes = compute_execution_plan(
|
|
416
|
-
nodes=self.nodes,
|
|
417
|
-
|
|
501
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
502
|
+
)
|
|
418
503
|
if ordered_nodes:
|
|
419
504
|
for i, node in enumerate(ordered_nodes, 1):
|
|
420
505
|
lines.append(f" {i:3d}. {node_info[node.node_id].label}")
|
|
421
506
|
except Exception as e:
|
|
422
507
|
lines.append(f" Could not determine execution order: {e}")
|
|
423
|
-
|
|
508
|
+
|
|
424
509
|
# Print everything
|
|
425
510
|
output = "\n".join(lines)
|
|
426
|
-
|
|
511
|
+
|
|
427
512
|
print(output)
|
|
428
|
-
|
|
513
|
+
|
|
429
514
|
def get_nodes_overview(self):
|
|
430
515
|
"""Gets a list of dictionary representations for all nodes in the graph."""
|
|
431
516
|
output = []
|
|
@@ -433,7 +518,7 @@ class FlowGraph:
|
|
|
433
518
|
output.append(v.get_repr())
|
|
434
519
|
return output
|
|
435
520
|
|
|
436
|
-
def remove_from_output_cols(self, columns:
|
|
521
|
+
def remove_from_output_cols(self, columns: list[str]):
|
|
437
522
|
"""Removes specified columns from the list of expected output columns.
|
|
438
523
|
|
|
439
524
|
Args:
|
|
@@ -442,7 +527,7 @@ class FlowGraph:
|
|
|
442
527
|
cols = set(columns)
|
|
443
528
|
self._output_cols = [c for c in self._output_cols if c not in cols]
|
|
444
529
|
|
|
445
|
-
def get_node(self, node_id:
|
|
530
|
+
def get_node(self, node_id: int | str = None) -> FlowNode | None:
|
|
446
531
|
"""Retrieves a node from the graph by its ID.
|
|
447
532
|
|
|
448
533
|
Args:
|
|
@@ -456,24 +541,43 @@ class FlowGraph:
|
|
|
456
541
|
node = self._node_db.get(node_id)
|
|
457
542
|
if node is not None:
|
|
458
543
|
return node
|
|
459
|
-
|
|
460
|
-
def add_user_defined_node(
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
544
|
+
|
|
545
|
+
def add_user_defined_node(
|
|
546
|
+
self, *, custom_node: CustomNodeBase, user_defined_node_settings: input_schema.UserDefinedNode
|
|
547
|
+
):
|
|
548
|
+
"""Adds a user-defined custom node to the graph.
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
custom_node: The custom node instance to add.
|
|
552
|
+
user_defined_node_settings: The settings for the user-defined node.
|
|
553
|
+
"""
|
|
554
|
+
|
|
555
|
+
def _func(*flow_data_engine: FlowDataEngine) -> FlowDataEngine | None:
|
|
556
|
+
user_id = user_defined_node_settings.user_id
|
|
557
|
+
if user_id is not None:
|
|
558
|
+
custom_node.set_execution_context(user_id)
|
|
559
|
+
if custom_node.settings_schema:
|
|
560
|
+
custom_node.settings_schema.set_secret_context(user_id, custom_node.accessed_secrets)
|
|
561
|
+
|
|
562
|
+
output = custom_node.process(*(fde.data_frame for fde in flow_data_engine))
|
|
563
|
+
|
|
564
|
+
accessed_secrets = custom_node.get_accessed_secrets()
|
|
565
|
+
if accessed_secrets:
|
|
566
|
+
logger.info(f"Node '{user_defined_node_settings.node_id}' accessed secrets: {accessed_secrets}")
|
|
567
|
+
if isinstance(output, (pl.LazyFrame, pl.DataFrame)):
|
|
468
568
|
return FlowDataEngine(output)
|
|
469
569
|
return None
|
|
470
|
-
|
|
471
|
-
self.add_node_step(
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
570
|
+
|
|
571
|
+
self.add_node_step(
|
|
572
|
+
node_id=user_defined_node_settings.node_id,
|
|
573
|
+
function=_func,
|
|
574
|
+
setting_input=user_defined_node_settings,
|
|
575
|
+
input_node_ids=user_defined_node_settings.depending_on_ids,
|
|
576
|
+
node_type=custom_node.item,
|
|
577
|
+
)
|
|
578
|
+
if custom_node.number_of_inputs == 0:
|
|
579
|
+
node = self.get_node(user_defined_node_settings.node_id)
|
|
580
|
+
self.add_node_to_starting_list(node)
|
|
477
581
|
|
|
478
582
|
def add_pivot(self, pivot_settings: input_schema.NodePivot):
|
|
479
583
|
"""Adds a pivot node to the graph.
|
|
@@ -485,11 +589,13 @@ class FlowGraph:
|
|
|
485
589
|
def _func(fl: FlowDataEngine):
|
|
486
590
|
return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
|
|
487
591
|
|
|
488
|
-
self.add_node_step(
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
592
|
+
self.add_node_step(
|
|
593
|
+
node_id=pivot_settings.node_id,
|
|
594
|
+
function=_func,
|
|
595
|
+
node_type="pivot",
|
|
596
|
+
setting_input=pivot_settings,
|
|
597
|
+
input_node_ids=[pivot_settings.depending_on_id],
|
|
598
|
+
)
|
|
493
599
|
|
|
494
600
|
node = self.get_node(pivot_settings.node_id)
|
|
495
601
|
|
|
@@ -498,6 +604,7 @@ class FlowGraph:
|
|
|
498
604
|
input_data.lazy = True # ensure the dataset is lazy
|
|
499
605
|
input_lf = input_data.data_frame # get the lazy frame
|
|
500
606
|
return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
|
|
607
|
+
|
|
501
608
|
node.schema_callback = schema_callback
|
|
502
609
|
|
|
503
610
|
def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
|
|
@@ -510,11 +617,13 @@ class FlowGraph:
|
|
|
510
617
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
511
618
|
return fl.unpivot(unpivot_settings.unpivot_input)
|
|
512
619
|
|
|
513
|
-
self.add_node_step(
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
620
|
+
self.add_node_step(
|
|
621
|
+
node_id=unpivot_settings.node_id,
|
|
622
|
+
function=_func,
|
|
623
|
+
node_type="unpivot",
|
|
624
|
+
setting_input=unpivot_settings,
|
|
625
|
+
input_node_ids=[unpivot_settings.depending_on_id],
|
|
626
|
+
)
|
|
518
627
|
|
|
519
628
|
def add_union(self, union_settings: input_schema.NodeUnion):
|
|
520
629
|
"""Adds a union node to combine multiple data streams.
|
|
@@ -524,14 +633,16 @@ class FlowGraph:
|
|
|
524
633
|
"""
|
|
525
634
|
|
|
526
635
|
def _func(*flowfile_tables: FlowDataEngine):
|
|
527
|
-
dfs:
|
|
528
|
-
return FlowDataEngine(pl.concat(dfs, how=
|
|
636
|
+
dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
|
|
637
|
+
return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
|
|
529
638
|
|
|
530
|
-
self.add_node_step(
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
639
|
+
self.add_node_step(
|
|
640
|
+
node_id=union_settings.node_id,
|
|
641
|
+
function=_func,
|
|
642
|
+
node_type="union",
|
|
643
|
+
setting_input=union_settings,
|
|
644
|
+
input_node_ids=union_settings.depending_on_ids,
|
|
645
|
+
)
|
|
535
646
|
|
|
536
647
|
def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
|
|
537
648
|
"""Adds a data exploration/analysis node based on a node promise.
|
|
@@ -559,13 +670,14 @@ class FlowGraph:
|
|
|
559
670
|
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
560
671
|
external_sampler = ExternalDfFetcher(
|
|
561
672
|
lf=flowfile_table.data_frame,
|
|
562
|
-
file_ref="__gf_walker"+node.hash,
|
|
673
|
+
file_ref="__gf_walker" + node.hash,
|
|
563
674
|
wait_on_completion=True,
|
|
564
675
|
node_id=node.node_id,
|
|
565
676
|
flow_id=self.flow_id,
|
|
566
677
|
)
|
|
567
|
-
node.results.analysis_data_generator = get_read_top_n(
|
|
568
|
-
|
|
678
|
+
node.results.analysis_data_generator = get_read_top_n(
|
|
679
|
+
external_sampler.status.file_ref, n=min(sample_size, number_of_records)
|
|
680
|
+
)
|
|
569
681
|
return flowfile_table
|
|
570
682
|
|
|
571
683
|
def schema_callback():
|
|
@@ -574,11 +686,15 @@ class FlowGraph:
|
|
|
574
686
|
input_node = node.all_inputs[0]
|
|
575
687
|
return input_node.schema
|
|
576
688
|
else:
|
|
577
|
-
return [FlowfileColumn.from_input(
|
|
689
|
+
return [FlowfileColumn.from_input("col_1", "na")]
|
|
578
690
|
|
|
579
|
-
self.add_node_step(
|
|
580
|
-
|
|
581
|
-
|
|
691
|
+
self.add_node_step(
|
|
692
|
+
node_id=node_analysis.node_id,
|
|
693
|
+
node_type="explore_data",
|
|
694
|
+
function=analysis_preparation,
|
|
695
|
+
setting_input=node_analysis,
|
|
696
|
+
schema_callback=schema_callback,
|
|
697
|
+
)
|
|
582
698
|
node = self.get_node(node_analysis.node_id)
|
|
583
699
|
|
|
584
700
|
def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
|
|
@@ -591,19 +707,20 @@ class FlowGraph:
|
|
|
591
707
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
592
708
|
return fl.do_group_by(group_by_settings.groupby_input, False)
|
|
593
709
|
|
|
594
|
-
self.add_node_step(
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
710
|
+
self.add_node_step(
|
|
711
|
+
node_id=group_by_settings.node_id,
|
|
712
|
+
function=_func,
|
|
713
|
+
node_type="group_by",
|
|
714
|
+
setting_input=group_by_settings,
|
|
715
|
+
input_node_ids=[group_by_settings.depending_on_id],
|
|
716
|
+
)
|
|
599
717
|
|
|
600
718
|
node = self.get_node(group_by_settings.node_id)
|
|
601
719
|
|
|
602
720
|
def schema_callback():
|
|
603
|
-
|
|
604
721
|
output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
|
|
605
722
|
depends_on = node.node_inputs.main_inputs[0]
|
|
606
|
-
input_schema_dict:
|
|
723
|
+
input_schema_dict: dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
|
|
607
724
|
output_schema = []
|
|
608
725
|
for old_name, new_name, data_type in output_columns:
|
|
609
726
|
data_type = input_schema_dict[old_name] if data_type is None else data_type
|
|
@@ -618,38 +735,148 @@ class FlowGraph:
|
|
|
618
735
|
Args:
|
|
619
736
|
filter_settings: The settings for the filter operation.
|
|
620
737
|
"""
|
|
738
|
+
from flowfile_core.schemas.transform_schema import FilterOperator
|
|
739
|
+
|
|
740
|
+
def _build_basic_filter_expression(
|
|
741
|
+
basic_filter: transform_schema.BasicFilter, field_data_type: str | None = None
|
|
742
|
+
) -> str:
|
|
743
|
+
"""Build a filter expression string from a BasicFilter object.
|
|
744
|
+
|
|
745
|
+
Uses the Flowfile expression language that is compatible with polars_expr_transformer.
|
|
746
|
+
|
|
747
|
+
Args:
|
|
748
|
+
basic_filter: The basic filter configuration.
|
|
749
|
+
field_data_type: The data type of the field (optional, for smart quoting).
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
A filter expression string compatible with polars_expr_transformer.
|
|
753
|
+
"""
|
|
754
|
+
field = f"[{basic_filter.field}]"
|
|
755
|
+
value = basic_filter.value
|
|
756
|
+
value2 = basic_filter.value2
|
|
757
|
+
|
|
758
|
+
is_numeric_value = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
|
|
759
|
+
should_quote = field_data_type == "str" or not is_numeric_value
|
|
760
|
+
|
|
761
|
+
try:
|
|
762
|
+
operator = basic_filter.get_operator()
|
|
763
|
+
except (ValueError, AttributeError):
|
|
764
|
+
operator = FilterOperator.from_symbol(str(basic_filter.operator))
|
|
765
|
+
|
|
766
|
+
if operator == FilterOperator.EQUALS:
|
|
767
|
+
if should_quote:
|
|
768
|
+
return f'{field}="{value}"'
|
|
769
|
+
return f"{field}={value}"
|
|
770
|
+
|
|
771
|
+
elif operator == FilterOperator.NOT_EQUALS:
|
|
772
|
+
if should_quote:
|
|
773
|
+
return f'{field}!="{value}"'
|
|
774
|
+
return f"{field}!={value}"
|
|
775
|
+
|
|
776
|
+
elif operator == FilterOperator.GREATER_THAN:
|
|
777
|
+
if should_quote:
|
|
778
|
+
return f'{field}>"{value}"'
|
|
779
|
+
return f"{field}>{value}"
|
|
780
|
+
|
|
781
|
+
elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
|
|
782
|
+
if should_quote:
|
|
783
|
+
return f'{field}>="{value}"'
|
|
784
|
+
return f"{field}>={value}"
|
|
785
|
+
|
|
786
|
+
elif operator == FilterOperator.LESS_THAN:
|
|
787
|
+
if should_quote:
|
|
788
|
+
return f'{field}<"{value}"'
|
|
789
|
+
return f"{field}<{value}"
|
|
790
|
+
|
|
791
|
+
elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
|
|
792
|
+
if should_quote:
|
|
793
|
+
return f'{field}<="{value}"'
|
|
794
|
+
return f"{field}<={value}"
|
|
795
|
+
|
|
796
|
+
elif operator == FilterOperator.CONTAINS:
|
|
797
|
+
return f'contains({field}, "{value}")'
|
|
798
|
+
|
|
799
|
+
elif operator == FilterOperator.NOT_CONTAINS:
|
|
800
|
+
return f'contains({field}, "{value}") = false'
|
|
801
|
+
|
|
802
|
+
elif operator == FilterOperator.STARTS_WITH:
|
|
803
|
+
return f'left({field}, {len(value)}) = "{value}"'
|
|
804
|
+
|
|
805
|
+
elif operator == FilterOperator.ENDS_WITH:
|
|
806
|
+
return f'right({field}, {len(value)}) = "{value}"'
|
|
807
|
+
|
|
808
|
+
elif operator == FilterOperator.IS_NULL:
|
|
809
|
+
return f"is_empty({field})"
|
|
810
|
+
|
|
811
|
+
elif operator == FilterOperator.IS_NOT_NULL:
|
|
812
|
+
return f"is_not_empty({field})"
|
|
813
|
+
|
|
814
|
+
elif operator == FilterOperator.IN:
|
|
815
|
+
values = [v.strip() for v in value.split(",")]
|
|
816
|
+
if len(values) == 1:
|
|
817
|
+
if should_quote:
|
|
818
|
+
return f'{field}="{values[0]}"'
|
|
819
|
+
return f"{field}={values[0]}"
|
|
820
|
+
if should_quote:
|
|
821
|
+
conditions = [f'({field}="{v}")' for v in values]
|
|
822
|
+
else:
|
|
823
|
+
conditions = [f"({field}={v})" for v in values]
|
|
824
|
+
return " | ".join(conditions)
|
|
825
|
+
|
|
826
|
+
elif operator == FilterOperator.NOT_IN:
|
|
827
|
+
values = [v.strip() for v in value.split(",")]
|
|
828
|
+
if len(values) == 1:
|
|
829
|
+
if should_quote:
|
|
830
|
+
return f'{field}!="{values[0]}"'
|
|
831
|
+
return f"{field}!={values[0]}"
|
|
832
|
+
if should_quote:
|
|
833
|
+
conditions = [f'({field}!="{v}")' for v in values]
|
|
834
|
+
else:
|
|
835
|
+
conditions = [f"({field}!={v})" for v in values]
|
|
836
|
+
return " & ".join(conditions)
|
|
621
837
|
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
838
|
+
elif operator == FilterOperator.BETWEEN:
|
|
839
|
+
if value2 is None:
|
|
840
|
+
raise ValueError("BETWEEN operator requires value2")
|
|
841
|
+
if should_quote:
|
|
842
|
+
return f'({field}>="{value}") & ({field}<="{value2}")'
|
|
843
|
+
return f"({field}>={value}) & ({field}<={value2})"
|
|
844
|
+
|
|
845
|
+
else:
|
|
846
|
+
# Fallback for unknown operators - use legacy format
|
|
847
|
+
if should_quote:
|
|
848
|
+
return f'{field}{operator.to_symbol()}"{value}"'
|
|
849
|
+
return f"{field}{operator.to_symbol()}{value}"
|
|
629
850
|
|
|
630
851
|
def _func(fl: FlowDataEngine):
|
|
631
|
-
is_advanced = filter_settings.filter_input.
|
|
852
|
+
is_advanced = filter_settings.filter_input.is_advanced()
|
|
853
|
+
|
|
632
854
|
if is_advanced:
|
|
855
|
+
predicate = filter_settings.filter_input.advanced_filter
|
|
633
856
|
return fl.do_filter(predicate)
|
|
634
857
|
else:
|
|
635
858
|
basic_filter = filter_settings.filter_input.basic_filter
|
|
636
|
-
if basic_filter
|
|
859
|
+
if basic_filter is None:
|
|
860
|
+
logger.warning("Basic filter is None, returning unfiltered data")
|
|
861
|
+
return fl
|
|
862
|
+
|
|
863
|
+
try:
|
|
637
864
|
field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
else:
|
|
641
|
-
_f = f'[{basic_filter.field}]{basic_filter.filter_type}{basic_filter.filter_value}'
|
|
642
|
-
else:
|
|
643
|
-
_f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
|
|
644
|
-
filter_settings.filter_input.advanced_filter = _f
|
|
645
|
-
return fl.do_filter(_f)
|
|
865
|
+
except Exception:
|
|
866
|
+
field_data_type = None
|
|
646
867
|
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
868
|
+
expression = _build_basic_filter_expression(basic_filter, field_data_type)
|
|
869
|
+
filter_settings.filter_input.advanced_filter = expression
|
|
870
|
+
return fl.do_filter(expression)
|
|
871
|
+
|
|
872
|
+
self.add_node_step(
|
|
873
|
+
filter_settings.node_id,
|
|
874
|
+
_func,
|
|
875
|
+
node_type="filter",
|
|
876
|
+
renew_schema=False,
|
|
877
|
+
setting_input=filter_settings,
|
|
878
|
+
input_node_ids=[filter_settings.depending_on_id],
|
|
879
|
+
)
|
|
653
880
|
|
|
654
881
|
def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
|
|
655
882
|
"""Adds a filter node to the graph.
|
|
@@ -661,11 +888,13 @@ class FlowGraph:
|
|
|
661
888
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
662
889
|
return fl.get_record_count()
|
|
663
890
|
|
|
664
|
-
self.add_node_step(
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
891
|
+
self.add_node_step(
|
|
892
|
+
node_id=node_number_of_records.node_id,
|
|
893
|
+
function=_func,
|
|
894
|
+
node_type="record_count",
|
|
895
|
+
setting_input=node_number_of_records,
|
|
896
|
+
input_node_ids=[node_number_of_records.depending_on_id],
|
|
897
|
+
)
|
|
669
898
|
|
|
670
899
|
def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
|
|
671
900
|
"""Adds a node that executes custom Polars code.
|
|
@@ -676,11 +905,14 @@ class FlowGraph:
|
|
|
676
905
|
|
|
677
906
|
def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
|
|
678
907
|
return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
908
|
+
|
|
909
|
+
self.add_node_step(
|
|
910
|
+
node_id=node_polars_code.node_id,
|
|
911
|
+
function=_func,
|
|
912
|
+
node_type="polars_code",
|
|
913
|
+
setting_input=node_polars_code,
|
|
914
|
+
input_node_ids=node_polars_code.depending_on_ids,
|
|
915
|
+
)
|
|
684
916
|
|
|
685
917
|
try:
|
|
686
918
|
polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
|
|
@@ -688,9 +920,7 @@ class FlowGraph:
|
|
|
688
920
|
node = self.get_node(node_id=node_polars_code.node_id)
|
|
689
921
|
node.results.errors = str(e)
|
|
690
922
|
|
|
691
|
-
def add_dependency_on_polars_lazy_frame(self,
|
|
692
|
-
lazy_frame: pl.LazyFrame,
|
|
693
|
-
node_id: int):
|
|
923
|
+
def add_dependency_on_polars_lazy_frame(self, lazy_frame: pl.LazyFrame, node_id: int):
|
|
694
924
|
"""Adds a special node that directly injects a Polars LazyFrame into the graph.
|
|
695
925
|
|
|
696
926
|
Note: This is intended for backend use and will not work in the UI editor.
|
|
@@ -699,13 +929,16 @@ class FlowGraph:
|
|
|
699
929
|
lazy_frame: The Polars LazyFrame to inject.
|
|
700
930
|
node_id: The ID for the new node.
|
|
701
931
|
"""
|
|
932
|
+
|
|
702
933
|
def _func():
|
|
703
934
|
return FlowDataEngine(lazy_frame)
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
935
|
+
|
|
936
|
+
node_promise = input_schema.NodePromise(
|
|
937
|
+
flow_id=self.flow_id, node_id=node_id, node_type="polars_lazy_frame", is_setup=True
|
|
938
|
+
)
|
|
939
|
+
self.add_node_step(
|
|
940
|
+
node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func, setting_input=node_promise
|
|
941
|
+
)
|
|
709
942
|
|
|
710
943
|
def add_unique(self, unique_settings: input_schema.NodeUnique):
|
|
711
944
|
"""Adds a node to find and remove duplicate rows.
|
|
@@ -717,12 +950,14 @@ class FlowGraph:
|
|
|
717
950
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
718
951
|
return fl.make_unique(unique_settings.unique_input)
|
|
719
952
|
|
|
720
|
-
self.add_node_step(
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
953
|
+
self.add_node_step(
|
|
954
|
+
node_id=unique_settings.node_id,
|
|
955
|
+
function=_func,
|
|
956
|
+
input_columns=[],
|
|
957
|
+
node_type="unique",
|
|
958
|
+
setting_input=unique_settings,
|
|
959
|
+
input_node_ids=[unique_settings.depending_on_id],
|
|
960
|
+
)
|
|
726
961
|
|
|
727
962
|
def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
|
|
728
963
|
"""Adds a node that solves graph-like problems within the data.
|
|
@@ -735,14 +970,17 @@ class FlowGraph:
|
|
|
735
970
|
graph_solver_settings: The settings object defining the graph inputs
|
|
736
971
|
and the specific algorithm to apply.
|
|
737
972
|
"""
|
|
973
|
+
|
|
738
974
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
739
975
|
return fl.solve_graph(graph_solver_settings.graph_solver_input)
|
|
740
976
|
|
|
741
|
-
self.add_node_step(
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
977
|
+
self.add_node_step(
|
|
978
|
+
node_id=graph_solver_settings.node_id,
|
|
979
|
+
function=_func,
|
|
980
|
+
node_type="graph_solver",
|
|
981
|
+
setting_input=graph_solver_settings,
|
|
982
|
+
input_node_ids=[graph_solver_settings.depending_on_id],
|
|
983
|
+
)
|
|
746
984
|
|
|
747
985
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
748
986
|
"""Adds a node that applies a formula to create or modify a column.
|
|
@@ -757,24 +995,28 @@ class FlowGraph:
|
|
|
757
995
|
else:
|
|
758
996
|
output_type = None
|
|
759
997
|
if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
|
|
760
|
-
new_col = [
|
|
761
|
-
|
|
998
|
+
new_col = [
|
|
999
|
+
FlowfileColumn.from_input(column_name=function_settings.function.field.name, data_type=str(output_type))
|
|
1000
|
+
]
|
|
762
1001
|
else:
|
|
763
|
-
new_col = [FlowfileColumn.from_input(function_settings.function.field.name,
|
|
1002
|
+
new_col = [FlowfileColumn.from_input(function_settings.function.field.name, "String")]
|
|
764
1003
|
|
|
765
1004
|
def _func(fl: FlowDataEngine):
|
|
766
|
-
return fl.apply_sql_formula(
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
1005
|
+
return fl.apply_sql_formula(
|
|
1006
|
+
func=function_settings.function.function,
|
|
1007
|
+
col_name=function_settings.function.field.name,
|
|
1008
|
+
output_data_type=output_type,
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
self.add_node_step(
|
|
1012
|
+
function_settings.node_id,
|
|
1013
|
+
_func,
|
|
1014
|
+
output_schema=new_col,
|
|
1015
|
+
node_type="formula",
|
|
1016
|
+
renew_schema=False,
|
|
1017
|
+
setting_input=function_settings,
|
|
1018
|
+
input_node_ids=[function_settings.depending_on_id],
|
|
1019
|
+
)
|
|
778
1020
|
if error != "":
|
|
779
1021
|
node = self.get_node(function_settings.node_id)
|
|
780
1022
|
node.results.errors = error
|
|
@@ -791,22 +1033,27 @@ class FlowGraph:
|
|
|
791
1033
|
Returns:
|
|
792
1034
|
The `FlowGraph` instance for method chaining.
|
|
793
1035
|
"""
|
|
1036
|
+
|
|
794
1037
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
795
1038
|
for left_select in cross_join_settings.cross_join_input.left_select.renames:
|
|
796
1039
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
797
1040
|
for right_select in cross_join_settings.cross_join_input.right_select.renames:
|
|
798
1041
|
right_select.is_available = True if right_select.old_name in right.schema else False
|
|
799
|
-
return main.do_cross_join(
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
1042
|
+
return main.do_cross_join(
|
|
1043
|
+
cross_join_input=cross_join_settings.cross_join_input,
|
|
1044
|
+
auto_generate_selection=cross_join_settings.auto_generate_selection,
|
|
1045
|
+
verify_integrity=False,
|
|
1046
|
+
other=right,
|
|
1047
|
+
)
|
|
1048
|
+
|
|
1049
|
+
self.add_node_step(
|
|
1050
|
+
node_id=cross_join_settings.node_id,
|
|
1051
|
+
function=_func,
|
|
1052
|
+
input_columns=[],
|
|
1053
|
+
node_type="cross_join",
|
|
1054
|
+
setting_input=cross_join_settings,
|
|
1055
|
+
input_node_ids=cross_join_settings.depending_on_ids,
|
|
1056
|
+
)
|
|
810
1057
|
return self
|
|
811
1058
|
|
|
812
1059
|
def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
|
|
@@ -818,22 +1065,27 @@ class FlowGraph:
|
|
|
818
1065
|
Returns:
|
|
819
1066
|
The `FlowGraph` instance for method chaining.
|
|
820
1067
|
"""
|
|
1068
|
+
|
|
821
1069
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
822
1070
|
for left_select in join_settings.join_input.left_select.renames:
|
|
823
1071
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
824
1072
|
for right_select in join_settings.join_input.right_select.renames:
|
|
825
1073
|
right_select.is_available = True if right_select.old_name in right.schema else False
|
|
826
|
-
return main.join(
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
1074
|
+
return main.join(
|
|
1075
|
+
join_input=join_settings.join_input,
|
|
1076
|
+
auto_generate_selection=join_settings.auto_generate_selection,
|
|
1077
|
+
verify_integrity=False,
|
|
1078
|
+
other=right,
|
|
1079
|
+
)
|
|
1080
|
+
|
|
1081
|
+
self.add_node_step(
|
|
1082
|
+
node_id=join_settings.node_id,
|
|
1083
|
+
function=_func,
|
|
1084
|
+
input_columns=[],
|
|
1085
|
+
node_type="join",
|
|
1086
|
+
setting_input=join_settings,
|
|
1087
|
+
input_node_ids=join_settings.depending_on_ids,
|
|
1088
|
+
)
|
|
837
1089
|
return self
|
|
838
1090
|
|
|
839
1091
|
def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
|
|
@@ -849,31 +1101,43 @@ class FlowGraph:
|
|
|
849
1101
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
850
1102
|
node = self.get_node(node_id=fuzzy_settings.node_id)
|
|
851
1103
|
if self.execution_location == "local":
|
|
852
|
-
return main.fuzzy_join(
|
|
853
|
-
|
|
854
|
-
|
|
1104
|
+
return main.fuzzy_join(
|
|
1105
|
+
fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
|
|
1106
|
+
other=right,
|
|
1107
|
+
node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id),
|
|
1108
|
+
)
|
|
855
1109
|
|
|
856
|
-
f = main.start_fuzzy_join(
|
|
857
|
-
|
|
1110
|
+
f = main.start_fuzzy_join(
|
|
1111
|
+
fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
|
|
1112
|
+
other=right,
|
|
1113
|
+
file_ref=node.hash,
|
|
1114
|
+
flow_id=self.flow_id,
|
|
1115
|
+
node_id=fuzzy_settings.node_id,
|
|
1116
|
+
)
|
|
858
1117
|
logger.info("Started the fuzzy match action")
|
|
859
1118
|
node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
|
|
860
1119
|
return FlowDataEngine(f.get_result())
|
|
861
1120
|
|
|
862
1121
|
def schema_callback():
|
|
863
|
-
fm_input_copy = FuzzyMatchInputManager(
|
|
1122
|
+
fm_input_copy = FuzzyMatchInputManager(
|
|
1123
|
+
fuzzy_settings.join_input
|
|
1124
|
+
) # Deepcopy create an unique object per func
|
|
864
1125
|
node = self.get_node(node_id=fuzzy_settings.node_id)
|
|
865
|
-
return calculate_fuzzy_match_schema(
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
1126
|
+
return calculate_fuzzy_match_schema(
|
|
1127
|
+
fm_input_copy,
|
|
1128
|
+
left_schema=node.node_inputs.main_inputs[0].schema,
|
|
1129
|
+
right_schema=node.node_inputs.right_input.schema,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
self.add_node_step(
|
|
1133
|
+
node_id=fuzzy_settings.node_id,
|
|
1134
|
+
function=_func,
|
|
1135
|
+
input_columns=[],
|
|
1136
|
+
node_type="fuzzy_match",
|
|
1137
|
+
setting_input=fuzzy_settings,
|
|
1138
|
+
input_node_ids=fuzzy_settings.depending_on_ids,
|
|
1139
|
+
schema_callback=schema_callback,
|
|
1140
|
+
)
|
|
877
1141
|
|
|
878
1142
|
return self
|
|
879
1143
|
|
|
@@ -890,14 +1154,17 @@ class FlowGraph:
|
|
|
890
1154
|
Returns:
|
|
891
1155
|
The `FlowGraph` instance for method chaining.
|
|
892
1156
|
"""
|
|
1157
|
+
|
|
893
1158
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
894
1159
|
return table.split(node_text_to_rows.text_to_rows_input)
|
|
895
1160
|
|
|
896
|
-
self.add_node_step(
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
1161
|
+
self.add_node_step(
|
|
1162
|
+
node_id=node_text_to_rows.node_id,
|
|
1163
|
+
function=_func,
|
|
1164
|
+
node_type="text_to_rows",
|
|
1165
|
+
setting_input=node_text_to_rows,
|
|
1166
|
+
input_node_ids=[node_text_to_rows.depending_on_id],
|
|
1167
|
+
)
|
|
901
1168
|
return self
|
|
902
1169
|
|
|
903
1170
|
def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
|
|
@@ -913,11 +1180,13 @@ class FlowGraph:
|
|
|
913
1180
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
914
1181
|
return table.do_sort(sort_settings.sort_input)
|
|
915
1182
|
|
|
916
|
-
self.add_node_step(
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
1183
|
+
self.add_node_step(
|
|
1184
|
+
node_id=sort_settings.node_id,
|
|
1185
|
+
function=_func,
|
|
1186
|
+
node_type="sort",
|
|
1187
|
+
setting_input=sort_settings,
|
|
1188
|
+
input_node_ids=[sort_settings.depending_on_id],
|
|
1189
|
+
)
|
|
921
1190
|
return self
|
|
922
1191
|
|
|
923
1192
|
def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
|
|
@@ -929,15 +1198,17 @@ class FlowGraph:
|
|
|
929
1198
|
Returns:
|
|
930
1199
|
The `FlowGraph` instance for method chaining.
|
|
931
1200
|
"""
|
|
1201
|
+
|
|
932
1202
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
933
1203
|
return table.get_sample(sample_settings.sample_size)
|
|
934
1204
|
|
|
935
|
-
self.add_node_step(
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
1205
|
+
self.add_node_step(
|
|
1206
|
+
node_id=sample_settings.node_id,
|
|
1207
|
+
function=_func,
|
|
1208
|
+
node_type="sample",
|
|
1209
|
+
setting_input=sample_settings,
|
|
1210
|
+
input_node_ids=[sample_settings.depending_on_id],
|
|
1211
|
+
)
|
|
941
1212
|
return self
|
|
942
1213
|
|
|
943
1214
|
def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
|
|
@@ -954,12 +1225,13 @@ class FlowGraph:
|
|
|
954
1225
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
955
1226
|
return table.add_record_id(record_id_settings.record_id_input)
|
|
956
1227
|
|
|
957
|
-
self.add_node_step(
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
1228
|
+
self.add_node_step(
|
|
1229
|
+
node_id=record_id_settings.node_id,
|
|
1230
|
+
function=_func,
|
|
1231
|
+
node_type="record_id",
|
|
1232
|
+
setting_input=record_id_settings,
|
|
1233
|
+
input_node_ids=[record_id_settings.depending_on_id],
|
|
1234
|
+
)
|
|
963
1235
|
return self
|
|
964
1236
|
|
|
965
1237
|
def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
|
|
@@ -991,16 +1263,19 @@ class FlowGraph:
|
|
|
991
1263
|
for i in ids_to_remove:
|
|
992
1264
|
v = select_cols.pop(i)
|
|
993
1265
|
del v
|
|
994
|
-
return table.do_select(
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1266
|
+
return table.do_select(
|
|
1267
|
+
select_inputs=transform_schema.SelectInputs(select_cols), keep_missing=select_settings.keep_missing
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
self.add_node_step(
|
|
1271
|
+
node_id=select_settings.node_id,
|
|
1272
|
+
function=_func,
|
|
1273
|
+
input_columns=[],
|
|
1274
|
+
node_type="select",
|
|
1275
|
+
drop_columns=list(drop_cols),
|
|
1276
|
+
setting_input=select_settings,
|
|
1277
|
+
input_node_ids=[select_settings.depending_on_id],
|
|
1278
|
+
)
|
|
1004
1279
|
return self
|
|
1005
1280
|
|
|
1006
1281
|
@property
|
|
@@ -1008,7 +1283,7 @@ class FlowGraph:
|
|
|
1008
1283
|
"""Checks if the graph has any nodes."""
|
|
1009
1284
|
return len(self._node_ids) > 0
|
|
1010
1285
|
|
|
1011
|
-
def delete_node(self, node_id:
|
|
1286
|
+
def delete_node(self, node_id: int | str):
|
|
1012
1287
|
"""Deletes a node from the graph and updates all its connections.
|
|
1013
1288
|
|
|
1014
1289
|
Args:
|
|
@@ -1023,7 +1298,7 @@ class FlowGraph:
|
|
|
1023
1298
|
if node:
|
|
1024
1299
|
logger.info(f"Found node: {node_id}, processing deletion")
|
|
1025
1300
|
|
|
1026
|
-
lead_to_steps:
|
|
1301
|
+
lead_to_steps: list[FlowNode] = node.leads_to_nodes
|
|
1027
1302
|
logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
|
|
1028
1303
|
|
|
1029
1304
|
if len(lead_to_steps) > 0:
|
|
@@ -1032,7 +1307,7 @@ class FlowGraph:
|
|
|
1032
1307
|
lead_to_step.delete_input_node(node_id, complete=True)
|
|
1033
1308
|
|
|
1034
1309
|
if not node.is_start:
|
|
1035
|
-
depends_on:
|
|
1310
|
+
depends_on: list[FlowNode] = node.node_inputs.get_all_inputs()
|
|
1036
1311
|
logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
|
|
1037
1312
|
|
|
1038
1313
|
for depend_on in depends_on:
|
|
@@ -1052,18 +1327,20 @@ class FlowGraph:
|
|
|
1052
1327
|
"""Checks if the graph has an initial input data source."""
|
|
1053
1328
|
return self._input_data is not None
|
|
1054
1329
|
|
|
1055
|
-
def add_node_step(
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1330
|
+
def add_node_step(
|
|
1331
|
+
self,
|
|
1332
|
+
node_id: int | str,
|
|
1333
|
+
function: Callable,
|
|
1334
|
+
input_columns: list[str] = None,
|
|
1335
|
+
output_schema: list[FlowfileColumn] = None,
|
|
1336
|
+
node_type: str = None,
|
|
1337
|
+
drop_columns: list[str] = None,
|
|
1338
|
+
renew_schema: bool = True,
|
|
1339
|
+
setting_input: Any = None,
|
|
1340
|
+
cache_results: bool = None,
|
|
1341
|
+
schema_callback: Callable = None,
|
|
1342
|
+
input_node_ids: list[int] = None,
|
|
1343
|
+
) -> FlowNode:
|
|
1067
1344
|
"""The core method for adding or updating a node in the graph.
|
|
1068
1345
|
|
|
1069
1346
|
Args:
|
|
@@ -1096,29 +1373,33 @@ class FlowGraph:
|
|
|
1096
1373
|
if isinstance(input_columns, str):
|
|
1097
1374
|
input_columns = [input_columns]
|
|
1098
1375
|
if (
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1376
|
+
input_nodes is not None
|
|
1377
|
+
or function.__name__ in ("placeholder", "analysis_preparation")
|
|
1378
|
+
or node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
|
|
1102
1379
|
):
|
|
1103
1380
|
if not existing_node:
|
|
1104
|
-
node = FlowNode(
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1381
|
+
node = FlowNode(
|
|
1382
|
+
node_id=node_id,
|
|
1383
|
+
function=function,
|
|
1384
|
+
output_schema=output_schema,
|
|
1385
|
+
input_columns=input_columns,
|
|
1386
|
+
drop_columns=drop_columns,
|
|
1387
|
+
renew_schema=renew_schema,
|
|
1388
|
+
setting_input=setting_input,
|
|
1389
|
+
node_type=node_type,
|
|
1390
|
+
name=function.__name__,
|
|
1391
|
+
schema_callback=schema_callback,
|
|
1392
|
+
parent_uuid=self.uuid,
|
|
1393
|
+
)
|
|
1115
1394
|
else:
|
|
1116
|
-
existing_node.update_node(
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1395
|
+
existing_node.update_node(
|
|
1396
|
+
function=function,
|
|
1397
|
+
output_schema=output_schema,
|
|
1398
|
+
input_columns=input_columns,
|
|
1399
|
+
drop_columns=drop_columns,
|
|
1400
|
+
setting_input=setting_input,
|
|
1401
|
+
schema_callback=schema_callback,
|
|
1402
|
+
)
|
|
1122
1403
|
node = existing_node
|
|
1123
1404
|
else:
|
|
1124
1405
|
raise Exception("No data initialized")
|
|
@@ -1126,7 +1407,7 @@ class FlowGraph:
|
|
|
1126
1407
|
self._node_ids.append(node_id)
|
|
1127
1408
|
return node
|
|
1128
1409
|
|
|
1129
|
-
def add_include_cols(self, include_columns:
|
|
1410
|
+
def add_include_cols(self, include_columns: list[str]):
|
|
1130
1411
|
"""Adds columns to both the input and output column lists.
|
|
1131
1412
|
|
|
1132
1413
|
Args:
|
|
@@ -1147,23 +1428,30 @@ class FlowGraph:
|
|
|
1147
1428
|
"""
|
|
1148
1429
|
|
|
1149
1430
|
def _func(df: FlowDataEngine):
|
|
1150
|
-
execute_remote = self.execution_location !=
|
|
1151
|
-
df.output(
|
|
1152
|
-
|
|
1431
|
+
execute_remote = self.execution_location != "local"
|
|
1432
|
+
df.output(
|
|
1433
|
+
output_fs=output_file.output_settings,
|
|
1434
|
+
flow_id=self.flow_id,
|
|
1435
|
+
node_id=output_file.node_id,
|
|
1436
|
+
execute_remote=execute_remote,
|
|
1437
|
+
)
|
|
1153
1438
|
return df
|
|
1154
1439
|
|
|
1155
1440
|
def schema_callback():
|
|
1156
1441
|
input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
|
|
1157
1442
|
|
|
1158
1443
|
return input_node.schema
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1444
|
+
|
|
1445
|
+
input_node_id = output_file.depending_on_id if hasattr(output_file, "depending_on_id") else None
|
|
1446
|
+
self.add_node_step(
|
|
1447
|
+
node_id=output_file.node_id,
|
|
1448
|
+
function=_func,
|
|
1449
|
+
input_columns=[],
|
|
1450
|
+
node_type="output",
|
|
1451
|
+
setting_input=output_file,
|
|
1452
|
+
schema_callback=schema_callback,
|
|
1453
|
+
input_node_ids=[input_node_id],
|
|
1454
|
+
)
|
|
1167
1455
|
|
|
1168
1456
|
def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
|
|
1169
1457
|
"""Adds a node to write data to a database.
|
|
@@ -1172,18 +1460,20 @@ class FlowGraph:
|
|
|
1172
1460
|
node_database_writer: The settings for the database writer node.
|
|
1173
1461
|
"""
|
|
1174
1462
|
|
|
1175
|
-
node_type =
|
|
1463
|
+
node_type = "database_writer"
|
|
1176
1464
|
database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
|
|
1177
|
-
database_connection:
|
|
1178
|
-
if database_settings.connection_mode ==
|
|
1465
|
+
database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
|
|
1466
|
+
if database_settings.connection_mode == "inline":
|
|
1179
1467
|
database_connection: input_schema.DatabaseConnection = database_settings.database_connection
|
|
1180
|
-
encrypted_password = get_encrypted_secret(
|
|
1181
|
-
|
|
1468
|
+
encrypted_password = get_encrypted_secret(
|
|
1469
|
+
current_user_id=node_database_writer.user_id, secret_name=database_connection.password_ref
|
|
1470
|
+
)
|
|
1182
1471
|
if encrypted_password is None:
|
|
1183
1472
|
raise HTTPException(status_code=400, detail="Password not found")
|
|
1184
1473
|
else:
|
|
1185
|
-
database_reference_settings = get_local_database_connection(
|
|
1186
|
-
|
|
1474
|
+
database_reference_settings = get_local_database_connection(
|
|
1475
|
+
database_settings.database_connection_name, node_database_writer.user_id
|
|
1476
|
+
)
|
|
1187
1477
|
encrypted_password = database_reference_settings.password.get_secret_value()
|
|
1188
1478
|
|
|
1189
1479
|
def _func(df: FlowDataEngine):
|
|
@@ -1192,14 +1482,20 @@ class FlowGraph:
|
|
|
1192
1482
|
sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
|
|
1193
1483
|
node_database_writer=node_database_writer,
|
|
1194
1484
|
password=encrypted_password,
|
|
1195
|
-
table_name=(
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1485
|
+
table_name=(
|
|
1486
|
+
database_settings.schema_name + "." + database_settings.table_name
|
|
1487
|
+
if database_settings.schema_name
|
|
1488
|
+
else database_settings.table_name
|
|
1489
|
+
),
|
|
1490
|
+
database_reference_settings=(
|
|
1491
|
+
database_reference_settings if database_settings.connection_mode == "reference" else None
|
|
1492
|
+
),
|
|
1493
|
+
lf=df.data_frame,
|
|
1200
1494
|
)
|
|
1201
1495
|
)
|
|
1202
|
-
external_database_writer = ExternalDatabaseWriter(
|
|
1496
|
+
external_database_writer = ExternalDatabaseWriter(
|
|
1497
|
+
database_external_write_settings, wait_on_completion=False
|
|
1498
|
+
)
|
|
1203
1499
|
node._fetch_cached_df = external_database_writer
|
|
1204
1500
|
external_database_writer.get_result()
|
|
1205
1501
|
return df
|
|
@@ -1226,56 +1522,64 @@ class FlowGraph:
|
|
|
1226
1522
|
"""
|
|
1227
1523
|
|
|
1228
1524
|
logger.info("Adding database reader")
|
|
1229
|
-
node_type =
|
|
1525
|
+
node_type = "database_reader"
|
|
1230
1526
|
database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
|
|
1231
|
-
database_connection:
|
|
1232
|
-
if database_settings.connection_mode ==
|
|
1527
|
+
database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
|
|
1528
|
+
if database_settings.connection_mode == "inline":
|
|
1233
1529
|
database_connection: input_schema.DatabaseConnection = database_settings.database_connection
|
|
1234
|
-
encrypted_password = get_encrypted_secret(
|
|
1235
|
-
|
|
1530
|
+
encrypted_password = get_encrypted_secret(
|
|
1531
|
+
current_user_id=node_database_reader.user_id, secret_name=database_connection.password_ref
|
|
1532
|
+
)
|
|
1236
1533
|
if encrypted_password is None:
|
|
1237
1534
|
raise HTTPException(status_code=400, detail="Password not found")
|
|
1238
1535
|
else:
|
|
1239
|
-
database_reference_settings = get_local_database_connection(
|
|
1240
|
-
|
|
1536
|
+
database_reference_settings = get_local_database_connection(
|
|
1537
|
+
database_settings.database_connection_name, node_database_reader.user_id
|
|
1538
|
+
)
|
|
1241
1539
|
database_connection = database_reference_settings
|
|
1242
1540
|
encrypted_password = database_reference_settings.password.get_secret_value()
|
|
1243
1541
|
|
|
1244
1542
|
def _func():
|
|
1245
|
-
sql_source = BaseSqlSource(
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1543
|
+
sql_source = BaseSqlSource(
|
|
1544
|
+
query=None if database_settings.query_mode == "table" else database_settings.query,
|
|
1545
|
+
table_name=database_settings.table_name,
|
|
1546
|
+
schema_name=database_settings.schema_name,
|
|
1547
|
+
fields=node_database_reader.fields,
|
|
1548
|
+
)
|
|
1250
1549
|
database_external_read_settings = (
|
|
1251
1550
|
sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
|
|
1252
1551
|
node_database_reader=node_database_reader,
|
|
1253
1552
|
password=encrypted_password,
|
|
1254
1553
|
query=sql_source.query,
|
|
1255
|
-
database_reference_settings=(
|
|
1256
|
-
|
|
1554
|
+
database_reference_settings=(
|
|
1555
|
+
database_reference_settings if database_settings.connection_mode == "reference" else None
|
|
1556
|
+
),
|
|
1257
1557
|
)
|
|
1258
1558
|
)
|
|
1259
1559
|
|
|
1260
|
-
external_database_fetcher = ExternalDatabaseFetcher(
|
|
1560
|
+
external_database_fetcher = ExternalDatabaseFetcher(
|
|
1561
|
+
database_external_read_settings, wait_on_completion=False
|
|
1562
|
+
)
|
|
1261
1563
|
node._fetch_cached_df = external_database_fetcher
|
|
1262
1564
|
fl = FlowDataEngine(external_database_fetcher.get_result())
|
|
1263
1565
|
node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
|
|
1264
1566
|
return fl
|
|
1265
1567
|
|
|
1266
1568
|
def schema_callback():
|
|
1267
|
-
sql_source = SqlSource(
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1569
|
+
sql_source = SqlSource(
|
|
1570
|
+
connection_string=sql_utils.construct_sql_uri(
|
|
1571
|
+
database_type=database_connection.database_type,
|
|
1572
|
+
host=database_connection.host,
|
|
1573
|
+
port=database_connection.port,
|
|
1574
|
+
database=database_connection.database,
|
|
1575
|
+
username=database_connection.username,
|
|
1576
|
+
password=decrypt_secret(encrypted_password),
|
|
1577
|
+
),
|
|
1578
|
+
query=None if database_settings.query_mode == "table" else database_settings.query,
|
|
1579
|
+
table_name=database_settings.table_name,
|
|
1580
|
+
schema_name=database_settings.schema_name,
|
|
1581
|
+
fields=node_database_reader.fields,
|
|
1582
|
+
)
|
|
1279
1583
|
return sql_source.get_schema()
|
|
1280
1584
|
|
|
1281
1585
|
node = self.get_node(node_database_reader.node_id)
|
|
@@ -1285,16 +1589,20 @@ class FlowGraph:
|
|
|
1285
1589
|
node.function = _func
|
|
1286
1590
|
node.setting_input = node_database_reader
|
|
1287
1591
|
node.node_settings.cache_results = node_database_reader.cache_results
|
|
1288
|
-
|
|
1289
|
-
self._flow_starts.append(node)
|
|
1592
|
+
self.add_node_to_starting_list(node)
|
|
1290
1593
|
node.schema_callback = schema_callback
|
|
1291
1594
|
else:
|
|
1292
|
-
node = FlowNode(
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1595
|
+
node = FlowNode(
|
|
1596
|
+
node_database_reader.node_id,
|
|
1597
|
+
function=_func,
|
|
1598
|
+
setting_input=node_database_reader,
|
|
1599
|
+
name=node_type,
|
|
1600
|
+
node_type=node_type,
|
|
1601
|
+
parent_uuid=self.uuid,
|
|
1602
|
+
schema_callback=schema_callback,
|
|
1603
|
+
)
|
|
1296
1604
|
self._node_db[node_database_reader.node_id] = node
|
|
1297
|
-
self.
|
|
1605
|
+
self.add_node_to_starting_list(node)
|
|
1298
1606
|
self._node_ids.append(node_database_reader.node_id)
|
|
1299
1607
|
|
|
1300
1608
|
def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
@@ -1305,7 +1613,7 @@ class FlowGraph:
|
|
|
1305
1613
|
Args:
|
|
1306
1614
|
external_source_input: The settings for the external SQL source node.
|
|
1307
1615
|
"""
|
|
1308
|
-
logger.info(
|
|
1616
|
+
logger.info("Adding sql source")
|
|
1309
1617
|
self.add_external_source(external_source_input)
|
|
1310
1618
|
|
|
1311
1619
|
def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
|
|
@@ -1316,27 +1624,30 @@ class FlowGraph:
|
|
|
1316
1624
|
"""
|
|
1317
1625
|
|
|
1318
1626
|
node_type = "cloud_storage_writer"
|
|
1627
|
+
|
|
1319
1628
|
def _func(df: FlowDataEngine):
|
|
1320
1629
|
df.lazy = True
|
|
1321
|
-
execute_remote = self.execution_location !=
|
|
1630
|
+
execute_remote = self.execution_location != "local"
|
|
1322
1631
|
cloud_connection_settings = get_cloud_connection_settings(
|
|
1323
1632
|
connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
|
|
1324
1633
|
user_id=node_cloud_storage_writer.user_id,
|
|
1325
|
-
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
|
|
1634
|
+
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode,
|
|
1326
1635
|
)
|
|
1327
1636
|
full_cloud_storage_connection = FullCloudStorageConnection(
|
|
1328
1637
|
storage_type=cloud_connection_settings.storage_type,
|
|
1329
1638
|
auth_method=cloud_connection_settings.auth_method,
|
|
1330
1639
|
aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
|
|
1331
|
-
**CloudStorageReader.get_storage_options(cloud_connection_settings)
|
|
1640
|
+
**CloudStorageReader.get_storage_options(cloud_connection_settings),
|
|
1332
1641
|
)
|
|
1333
1642
|
if execute_remote:
|
|
1334
1643
|
settings = get_cloud_storage_write_settings_worker_interface(
|
|
1335
1644
|
write_settings=node_cloud_storage_writer.cloud_storage_settings,
|
|
1336
1645
|
connection=full_cloud_storage_connection,
|
|
1337
1646
|
lf=df.data_frame,
|
|
1647
|
+
user_id=node_cloud_storage_writer.user_id,
|
|
1338
1648
|
flowfile_node_id=node_cloud_storage_writer.node_id,
|
|
1339
|
-
flowfile_flow_id=self.flow_id
|
|
1649
|
+
flowfile_flow_id=self.flow_id,
|
|
1650
|
+
)
|
|
1340
1651
|
external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
|
|
1341
1652
|
node._fetch_cached_df = external_database_writer
|
|
1342
1653
|
external_database_writer.get_result()
|
|
@@ -1362,7 +1673,7 @@ class FlowGraph:
|
|
|
1362
1673
|
node_type=node_type,
|
|
1363
1674
|
setting_input=node_cloud_storage_writer,
|
|
1364
1675
|
schema_callback=schema_callback,
|
|
1365
|
-
input_node_ids=[node_cloud_storage_writer.depending_on_id]
|
|
1676
|
+
input_node_ids=[node_cloud_storage_writer.depending_on_id],
|
|
1366
1677
|
)
|
|
1367
1678
|
|
|
1368
1679
|
node = self.get_node(node_cloud_storage_writer.node_id)
|
|
@@ -1380,49 +1691,53 @@ class FlowGraph:
|
|
|
1380
1691
|
def _func():
|
|
1381
1692
|
logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1382
1693
|
self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1383
|
-
settings = CloudStorageReadSettingsInternal(
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1694
|
+
settings = CloudStorageReadSettingsInternal(
|
|
1695
|
+
read_settings=cloud_storage_read_settings,
|
|
1696
|
+
connection=get_cloud_connection_settings(
|
|
1697
|
+
connection_name=cloud_storage_read_settings.connection_name,
|
|
1698
|
+
user_id=node_cloud_storage_reader.user_id,
|
|
1699
|
+
auth_mode=cloud_storage_read_settings.auth_mode,
|
|
1700
|
+
),
|
|
1701
|
+
)
|
|
1389
1702
|
fl = FlowDataEngine.from_cloud_storage_obj(settings)
|
|
1390
1703
|
return fl
|
|
1391
1704
|
|
|
1392
|
-
node = self.add_node_step(
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1705
|
+
node = self.add_node_step(
|
|
1706
|
+
node_id=node_cloud_storage_reader.node_id,
|
|
1707
|
+
function=_func,
|
|
1708
|
+
cache_results=node_cloud_storage_reader.cache_results,
|
|
1709
|
+
setting_input=node_cloud_storage_reader,
|
|
1710
|
+
node_type=node_type,
|
|
1711
|
+
)
|
|
1712
|
+
self.add_node_to_starting_list(node)
|
|
1400
1713
|
|
|
1401
|
-
def add_external_source(self,
|
|
1402
|
-
external_source_input: input_schema.NodeExternalSource):
|
|
1714
|
+
def add_external_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
1403
1715
|
"""Adds a node for a custom external data source.
|
|
1404
1716
|
|
|
1405
1717
|
Args:
|
|
1406
1718
|
external_source_input: The settings for the external source node.
|
|
1407
1719
|
"""
|
|
1408
1720
|
|
|
1409
|
-
node_type =
|
|
1721
|
+
node_type = "external_source"
|
|
1410
1722
|
external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
|
|
1411
|
-
source_settings =
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1723
|
+
source_settings = getattr(
|
|
1724
|
+
input_schema, snake_case_to_camel_case(external_source_input.identifier)
|
|
1725
|
+
).model_validate(external_source_input.source_settings)
|
|
1726
|
+
if hasattr(external_source_script, "initial_getter"):
|
|
1727
|
+
initial_getter = external_source_script.initial_getter(source_settings)
|
|
1415
1728
|
else:
|
|
1416
1729
|
initial_getter = None
|
|
1417
1730
|
data_getter = external_source_script.getter(source_settings)
|
|
1418
|
-
external_source = data_source_factory(
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1731
|
+
external_source = data_source_factory(
|
|
1732
|
+
source_type="custom",
|
|
1733
|
+
data_getter=data_getter,
|
|
1734
|
+
initial_data_getter=initial_getter,
|
|
1735
|
+
orientation=external_source_input.source_settings.orientation,
|
|
1736
|
+
schema=None,
|
|
1737
|
+
)
|
|
1423
1738
|
|
|
1424
1739
|
def _func():
|
|
1425
|
-
logger.info(
|
|
1740
|
+
logger.info("Calling external source")
|
|
1426
1741
|
fl = FlowDataEngine.create_from_external_source(external_source=external_source)
|
|
1427
1742
|
external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
|
|
1428
1743
|
return fl
|
|
@@ -1434,31 +1749,39 @@ class FlowGraph:
|
|
|
1434
1749
|
node.function = _func
|
|
1435
1750
|
node.setting_input = external_source_input
|
|
1436
1751
|
node.node_settings.cache_results = external_source_input.cache_results
|
|
1437
|
-
|
|
1438
|
-
|
|
1752
|
+
self.add_node_to_starting_list(node)
|
|
1753
|
+
|
|
1439
1754
|
else:
|
|
1440
|
-
node = FlowNode(
|
|
1441
|
-
|
|
1442
|
-
|
|
1755
|
+
node = FlowNode(
|
|
1756
|
+
external_source_input.node_id,
|
|
1757
|
+
function=_func,
|
|
1758
|
+
setting_input=external_source_input,
|
|
1759
|
+
name=node_type,
|
|
1760
|
+
node_type=node_type,
|
|
1761
|
+
parent_uuid=self.uuid,
|
|
1762
|
+
)
|
|
1443
1763
|
self._node_db[external_source_input.node_id] = node
|
|
1444
|
-
self.
|
|
1764
|
+
self.add_node_to_starting_list(node)
|
|
1445
1765
|
self._node_ids.append(external_source_input.node_id)
|
|
1446
1766
|
if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
|
|
1447
|
-
logger.info(
|
|
1767
|
+
logger.info("Using provided schema in the node")
|
|
1448
1768
|
|
|
1449
1769
|
def schema_callback():
|
|
1450
|
-
return [
|
|
1451
|
-
|
|
1770
|
+
return [
|
|
1771
|
+
FlowfileColumn.from_input(f.name, f.data_type) for f in external_source_input.source_settings.fields
|
|
1772
|
+
]
|
|
1452
1773
|
|
|
1453
1774
|
node.schema_callback = schema_callback
|
|
1454
1775
|
else:
|
|
1455
|
-
logger.warning(
|
|
1776
|
+
logger.warning("Removing schema")
|
|
1456
1777
|
node._schema_callback = None
|
|
1457
|
-
self.add_node_step(
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1778
|
+
self.add_node_step(
|
|
1779
|
+
node_id=external_source_input.node_id,
|
|
1780
|
+
function=_func,
|
|
1781
|
+
input_columns=[],
|
|
1782
|
+
node_type=node_type,
|
|
1783
|
+
setting_input=external_source_input,
|
|
1784
|
+
)
|
|
1462
1785
|
|
|
1463
1786
|
def add_read(self, input_file: input_schema.NodeRead):
|
|
1464
1787
|
"""Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
|
|
@@ -1466,8 +1789,10 @@ class FlowGraph:
|
|
|
1466
1789
|
Args:
|
|
1467
1790
|
input_file: The settings for the read operation.
|
|
1468
1791
|
"""
|
|
1469
|
-
if (
|
|
1470
|
-
|
|
1792
|
+
if (
|
|
1793
|
+
input_file.received_file.file_type in ("xlsx", "excel")
|
|
1794
|
+
and input_file.received_file.table_settings.sheet_name == ""
|
|
1795
|
+
):
|
|
1471
1796
|
sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
|
|
1472
1797
|
input_file.received_file.table_settings.sheet_name = sheet_name
|
|
1473
1798
|
|
|
@@ -1476,14 +1801,17 @@ class FlowGraph:
|
|
|
1476
1801
|
|
|
1477
1802
|
def _func():
|
|
1478
1803
|
input_file.received_file.set_absolute_filepath()
|
|
1479
|
-
if input_file.received_file.file_type ==
|
|
1804
|
+
if input_file.received_file.file_type == "parquet":
|
|
1480
1805
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1481
|
-
elif
|
|
1806
|
+
elif (
|
|
1807
|
+
input_file.received_file.file_type == "csv"
|
|
1808
|
+
and "utf" in input_file.received_file.table_settings.encoding
|
|
1809
|
+
):
|
|
1482
1810
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1483
1811
|
else:
|
|
1484
|
-
input_data = FlowDataEngine.create_from_path_worker(
|
|
1485
|
-
|
|
1486
|
-
|
|
1812
|
+
input_data = FlowDataEngine.create_from_path_worker(
|
|
1813
|
+
input_file.received_file, node_id=input_file.node_id, flow_id=self.flow_id
|
|
1814
|
+
)
|
|
1487
1815
|
input_data.name = input_file.received_file.name
|
|
1488
1816
|
return input_data
|
|
1489
1817
|
|
|
@@ -1491,51 +1819,58 @@ class FlowGraph:
|
|
|
1491
1819
|
schema_callback = None
|
|
1492
1820
|
if node:
|
|
1493
1821
|
start_hash = node.hash
|
|
1494
|
-
node.node_type =
|
|
1495
|
-
node.name =
|
|
1822
|
+
node.node_type = "read"
|
|
1823
|
+
node.name = "read"
|
|
1496
1824
|
node.function = _func
|
|
1497
1825
|
node.setting_input = input_file
|
|
1498
|
-
|
|
1499
|
-
self._flow_starts.append(node)
|
|
1826
|
+
self.add_node_to_starting_list(node)
|
|
1500
1827
|
|
|
1501
1828
|
if start_hash != node.hash:
|
|
1502
|
-
logger.info(
|
|
1829
|
+
logger.info("Hash changed, updating schema")
|
|
1503
1830
|
if len(received_file.fields) > 0:
|
|
1504
1831
|
# If the file has fields defined, we can use them to create the schema
|
|
1505
1832
|
def schema_callback():
|
|
1506
1833
|
return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
|
|
1507
1834
|
|
|
1508
|
-
elif input_file.received_file.file_type in (
|
|
1835
|
+
elif input_file.received_file.file_type in ("csv", "json", "parquet"):
|
|
1509
1836
|
# everything that can be scanned by polars
|
|
1510
1837
|
def schema_callback():
|
|
1511
1838
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
1512
1839
|
return input_data.schema
|
|
1513
1840
|
|
|
1514
|
-
elif input_file.received_file.file_type in (
|
|
1841
|
+
elif input_file.received_file.file_type in ("xlsx", "excel"):
|
|
1515
1842
|
# If the file is an Excel file, we need to use the openpyxl engine to read the schema
|
|
1516
|
-
schema_callback = get_xlsx_schema_callback(
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1843
|
+
schema_callback = get_xlsx_schema_callback(
|
|
1844
|
+
engine="openpyxl",
|
|
1845
|
+
file_path=received_file.file_path,
|
|
1846
|
+
sheet_name=received_file.table_settings.sheet_name,
|
|
1847
|
+
start_row=received_file.table_settings.start_row,
|
|
1848
|
+
end_row=received_file.table_settings.end_row,
|
|
1849
|
+
start_column=received_file.table_settings.start_column,
|
|
1850
|
+
end_column=received_file.table_settings.end_column,
|
|
1851
|
+
has_headers=received_file.table_settings.has_headers,
|
|
1852
|
+
)
|
|
1524
1853
|
else:
|
|
1525
1854
|
schema_callback = None
|
|
1526
1855
|
else:
|
|
1527
|
-
node = FlowNode(
|
|
1528
|
-
|
|
1529
|
-
|
|
1856
|
+
node = FlowNode(
|
|
1857
|
+
input_file.node_id,
|
|
1858
|
+
function=_func,
|
|
1859
|
+
setting_input=input_file,
|
|
1860
|
+
name="read",
|
|
1861
|
+
node_type="read",
|
|
1862
|
+
parent_uuid=self.uuid,
|
|
1863
|
+
)
|
|
1530
1864
|
self._node_db[input_file.node_id] = node
|
|
1531
|
-
self.
|
|
1865
|
+
self.add_node_to_starting_list(node)
|
|
1532
1866
|
self._node_ids.append(input_file.node_id)
|
|
1533
1867
|
|
|
1534
1868
|
if schema_callback is not None:
|
|
1535
1869
|
node.schema_callback = schema_callback
|
|
1870
|
+
node.user_provided_schema_callback = schema_callback
|
|
1536
1871
|
return self
|
|
1537
1872
|
|
|
1538
|
-
def add_datasource(self, input_file:
|
|
1873
|
+
def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
|
|
1539
1874
|
"""Adds a data source node to the graph.
|
|
1540
1875
|
|
|
1541
1876
|
This method serves as a factory for creating starting nodes, handling both
|
|
@@ -1549,25 +1884,30 @@ class FlowGraph:
|
|
|
1549
1884
|
"""
|
|
1550
1885
|
if isinstance(input_file, input_schema.NodeManualInput):
|
|
1551
1886
|
input_data = FlowDataEngine(input_file.raw_data_format)
|
|
1552
|
-
ref =
|
|
1887
|
+
ref = "manual_input"
|
|
1553
1888
|
else:
|
|
1554
1889
|
input_data = FlowDataEngine(path_ref=input_file.file_ref)
|
|
1555
|
-
ref =
|
|
1890
|
+
ref = "datasource"
|
|
1556
1891
|
node = self.get_node(input_file.node_id)
|
|
1557
1892
|
if node:
|
|
1558
1893
|
node.node_type = ref
|
|
1559
1894
|
node.name = ref
|
|
1560
1895
|
node.function = input_data
|
|
1561
1896
|
node.setting_input = input_file
|
|
1562
|
-
|
|
1563
|
-
|
|
1897
|
+
self.add_node_to_starting_list(node)
|
|
1898
|
+
|
|
1564
1899
|
else:
|
|
1565
1900
|
input_data.collect()
|
|
1566
|
-
node = FlowNode(
|
|
1567
|
-
|
|
1568
|
-
|
|
1901
|
+
node = FlowNode(
|
|
1902
|
+
input_file.node_id,
|
|
1903
|
+
function=input_data,
|
|
1904
|
+
setting_input=input_file,
|
|
1905
|
+
name=ref,
|
|
1906
|
+
node_type=ref,
|
|
1907
|
+
parent_uuid=self.uuid,
|
|
1908
|
+
)
|
|
1569
1909
|
self._node_db[input_file.node_id] = node
|
|
1570
|
-
self.
|
|
1910
|
+
self.add_node_to_starting_list(node)
|
|
1571
1911
|
self._node_ids.append(input_file.node_id)
|
|
1572
1912
|
return self
|
|
1573
1913
|
|
|
@@ -1582,7 +1922,7 @@ class FlowGraph:
|
|
|
1582
1922
|
self.add_datasource(input_file)
|
|
1583
1923
|
|
|
1584
1924
|
@property
|
|
1585
|
-
def nodes(self) ->
|
|
1925
|
+
def nodes(self) -> list[FlowNode]:
|
|
1586
1926
|
"""Gets a list of all FlowNode objects in the graph."""
|
|
1587
1927
|
|
|
1588
1928
|
return list(self._node_db.values())
|
|
@@ -1592,7 +1932,7 @@ class FlowGraph:
|
|
|
1592
1932
|
"""Gets the current execution mode ('Development' or 'Performance')."""
|
|
1593
1933
|
return self.flow_settings.execution_mode
|
|
1594
1934
|
|
|
1595
|
-
def get_implicit_starter_nodes(self) ->
|
|
1935
|
+
def get_implicit_starter_nodes(self) -> list[FlowNode]:
|
|
1596
1936
|
"""Finds nodes that can act as starting points but are not explicitly defined as such.
|
|
1597
1937
|
|
|
1598
1938
|
Some nodes, like the Polars Code node, can function without an input. This
|
|
@@ -1638,24 +1978,31 @@ class FlowGraph:
|
|
|
1638
1978
|
if not flow_node:
|
|
1639
1979
|
raise Exception("Node not found found")
|
|
1640
1980
|
skip_nodes, execution_order = compute_execution_plan(
|
|
1641
|
-
nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
|
|
1981
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
1642
1982
|
)
|
|
1643
1983
|
if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
|
|
1644
1984
|
raise Exception("Node can not be executed because it does not have it's inputs")
|
|
1645
1985
|
|
|
1646
|
-
def create_initial_run_information(self, number_of_nodes: int,
|
|
1647
|
-
run_type: Literal["fetch_one", "full_run"]):
|
|
1986
|
+
def create_initial_run_information(self, number_of_nodes: int, run_type: Literal["fetch_one", "full_run"]):
|
|
1648
1987
|
return RunInformation(
|
|
1649
|
-
flow_id=self.flow_id,
|
|
1650
|
-
|
|
1651
|
-
|
|
1988
|
+
flow_id=self.flow_id,
|
|
1989
|
+
start_time=datetime.datetime.now(),
|
|
1990
|
+
end_time=None,
|
|
1991
|
+
success=None,
|
|
1992
|
+
number_of_nodes=number_of_nodes,
|
|
1993
|
+
node_step_result=[],
|
|
1994
|
+
run_type=run_type,
|
|
1652
1995
|
)
|
|
1653
1996
|
|
|
1654
1997
|
def create_empty_run_information(self) -> RunInformation:
|
|
1655
1998
|
return RunInformation(
|
|
1656
|
-
flow_id=self.flow_id,
|
|
1657
|
-
|
|
1658
|
-
|
|
1999
|
+
flow_id=self.flow_id,
|
|
2000
|
+
start_time=None,
|
|
2001
|
+
end_time=None,
|
|
2002
|
+
success=None,
|
|
2003
|
+
number_of_nodes=0,
|
|
2004
|
+
node_step_result=[],
|
|
2005
|
+
run_type="init",
|
|
1659
2006
|
)
|
|
1660
2007
|
|
|
1661
2008
|
def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
|
|
@@ -1669,14 +2016,16 @@ class FlowGraph:
|
|
|
1669
2016
|
self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
|
|
1670
2017
|
node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
|
|
1671
2018
|
node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
|
|
1672
|
-
logger.info(f
|
|
2019
|
+
logger.info(f"Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}")
|
|
1673
2020
|
try:
|
|
1674
2021
|
self.latest_run_info.node_step_result.append(node_result)
|
|
1675
|
-
flow_node.execute_node(
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
2022
|
+
flow_node.execute_node(
|
|
2023
|
+
run_location=self.flow_settings.execution_location,
|
|
2024
|
+
performance_mode=False,
|
|
2025
|
+
node_logger=node_logger,
|
|
2026
|
+
optimize_for_downstream=False,
|
|
2027
|
+
reset_cache=True,
|
|
2028
|
+
)
|
|
1680
2029
|
node_result.error = str(flow_node.results.errors)
|
|
1681
2030
|
if self.flow_settings.is_canceled:
|
|
1682
2031
|
node_result.success = None
|
|
@@ -1691,12 +2040,12 @@ class FlowGraph:
|
|
|
1691
2040
|
self.flow_settings.is_running = False
|
|
1692
2041
|
return self.get_run_info()
|
|
1693
2042
|
except Exception as e:
|
|
1694
|
-
node_result.error =
|
|
2043
|
+
node_result.error = "Node did not run"
|
|
1695
2044
|
node_result.success = False
|
|
1696
2045
|
node_result.end_timestamp = time()
|
|
1697
2046
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1698
2047
|
node_result.is_running = False
|
|
1699
|
-
node_logger.error(f
|
|
2048
|
+
node_logger.error(f"Error in node {flow_node.node_id}: {e}")
|
|
1700
2049
|
finally:
|
|
1701
2050
|
self.flow_settings.is_running = False
|
|
1702
2051
|
|
|
@@ -1713,39 +2062,38 @@ class FlowGraph:
|
|
|
1713
2062
|
Exception: If the flow is already running.
|
|
1714
2063
|
"""
|
|
1715
2064
|
if self.flow_settings.is_running:
|
|
1716
|
-
raise Exception(
|
|
2065
|
+
raise Exception("Flow is already running")
|
|
1717
2066
|
try:
|
|
1718
|
-
|
|
1719
2067
|
self.flow_settings.is_running = True
|
|
1720
2068
|
self.flow_settings.is_canceled = False
|
|
1721
2069
|
self.flow_logger.clear_log_file()
|
|
1722
|
-
self.flow_logger.info(
|
|
1723
|
-
|
|
2070
|
+
self.flow_logger.info("Starting to run flowfile flow...")
|
|
1724
2071
|
skip_nodes, execution_order = compute_execution_plan(
|
|
1725
|
-
nodes=self.nodes,
|
|
1726
|
-
flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
|
|
2072
|
+
nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
|
|
1727
2073
|
)
|
|
1728
2074
|
|
|
1729
2075
|
self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
|
|
1730
2076
|
|
|
1731
2077
|
skip_node_message(self.flow_logger, skip_nodes)
|
|
1732
2078
|
execution_order_message(self.flow_logger, execution_order)
|
|
1733
|
-
performance_mode = self.flow_settings.execution_mode ==
|
|
2079
|
+
performance_mode = self.flow_settings.execution_mode == "Performance"
|
|
1734
2080
|
|
|
1735
2081
|
for node in execution_order:
|
|
1736
2082
|
node_logger = self.flow_logger.get_node_logger(node.node_id)
|
|
1737
2083
|
if self.flow_settings.is_canceled:
|
|
1738
|
-
self.flow_logger.info(
|
|
2084
|
+
self.flow_logger.info("Flow canceled")
|
|
1739
2085
|
break
|
|
1740
2086
|
if node in skip_nodes:
|
|
1741
|
-
node_logger.info(f
|
|
2087
|
+
node_logger.info(f"Skipping node {node.node_id}")
|
|
1742
2088
|
continue
|
|
1743
2089
|
node_result = NodeResult(node_id=node.node_id, node_name=node.name)
|
|
1744
2090
|
self.latest_run_info.node_step_result.append(node_result)
|
|
1745
|
-
logger.info(f
|
|
1746
|
-
node.execute_node(
|
|
1747
|
-
|
|
1748
|
-
|
|
2091
|
+
logger.info(f"Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}")
|
|
2092
|
+
node.execute_node(
|
|
2093
|
+
run_location=self.flow_settings.execution_location,
|
|
2094
|
+
performance_mode=performance_mode,
|
|
2095
|
+
node_logger=node_logger,
|
|
2096
|
+
)
|
|
1749
2097
|
try:
|
|
1750
2098
|
node_result.error = str(node.results.errors)
|
|
1751
2099
|
if self.flow_settings.is_canceled:
|
|
@@ -1758,22 +2106,22 @@ class FlowGraph:
|
|
|
1758
2106
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1759
2107
|
node_result.is_running = False
|
|
1760
2108
|
except Exception as e:
|
|
1761
|
-
node_result.error =
|
|
2109
|
+
node_result.error = "Node did not run"
|
|
1762
2110
|
node_result.success = False
|
|
1763
2111
|
node_result.end_timestamp = time()
|
|
1764
2112
|
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1765
2113
|
node_result.is_running = False
|
|
1766
|
-
node_logger.error(f
|
|
2114
|
+
node_logger.error(f"Error in node {node.node_id}: {e}")
|
|
1767
2115
|
if not node_result.success:
|
|
1768
2116
|
skip_nodes.extend(list(node.get_all_dependent_nodes()))
|
|
1769
|
-
node_logger.info(f
|
|
2117
|
+
node_logger.info(f"Completed node with success: {node_result.success}")
|
|
1770
2118
|
self.latest_run_info.nodes_completed += 1
|
|
1771
2119
|
self.latest_run_info.end_time = datetime.datetime.now()
|
|
1772
|
-
self.flow_logger.info(
|
|
2120
|
+
self.flow_logger.info("Flow completed!")
|
|
1773
2121
|
self.end_datetime = datetime.datetime.now()
|
|
1774
2122
|
self.flow_settings.is_running = False
|
|
1775
2123
|
if self.flow_settings.is_canceled:
|
|
1776
|
-
self.flow_logger.info(
|
|
2124
|
+
self.flow_logger.info("Flow canceled")
|
|
1777
2125
|
return self.get_run_info()
|
|
1778
2126
|
except Exception as e:
|
|
1779
2127
|
raise e
|
|
@@ -1799,7 +2147,7 @@ class FlowGraph:
|
|
|
1799
2147
|
return run_info
|
|
1800
2148
|
|
|
1801
2149
|
@property
|
|
1802
|
-
def node_connections(self) ->
|
|
2150
|
+
def node_connections(self) -> list[tuple[int, int]]:
|
|
1803
2151
|
"""Computes and returns a list of all connections in the graph.
|
|
1804
2152
|
|
|
1805
2153
|
Returns:
|
|
@@ -1809,8 +2157,9 @@ class FlowGraph:
|
|
|
1809
2157
|
for node in self.nodes:
|
|
1810
2158
|
outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
|
|
1811
2159
|
incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
|
|
1812
|
-
node_connections = [
|
|
1813
|
-
|
|
2160
|
+
node_connections = [
|
|
2161
|
+
c for c in outgoing_connections + incoming_connections if (c[0] is not None and c[1] is not None)
|
|
2162
|
+
]
|
|
1814
2163
|
for node_connection in node_connections:
|
|
1815
2164
|
if node_connection not in connections:
|
|
1816
2165
|
connections.add(node_connection)
|
|
@@ -1871,16 +2220,18 @@ class FlowGraph:
|
|
|
1871
2220
|
Returns:
|
|
1872
2221
|
A FlowInformation object representing the complete graph.
|
|
1873
2222
|
"""
|
|
1874
|
-
node_information = {
|
|
1875
|
-
|
|
2223
|
+
node_information = {
|
|
2224
|
+
node.node_id: node.get_node_information() for node in self.nodes if node.is_setup and node.is_correct
|
|
2225
|
+
}
|
|
1876
2226
|
|
|
1877
|
-
return schemas.FlowInformation(
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
2227
|
+
return schemas.FlowInformation(
|
|
2228
|
+
flow_id=self.flow_id,
|
|
2229
|
+
flow_name=self.__name__,
|
|
2230
|
+
flow_settings=self.flow_settings,
|
|
2231
|
+
data=node_information,
|
|
2232
|
+
node_starts=[v.node_id for v in self._flow_starts],
|
|
2233
|
+
node_connections=self.node_connections,
|
|
2234
|
+
)
|
|
1884
2235
|
|
|
1885
2236
|
def cancel(self):
|
|
1886
2237
|
"""Cancels an ongoing graph execution."""
|
|
@@ -1901,7 +2252,11 @@ class FlowGraph:
|
|
|
1901
2252
|
"""
|
|
1902
2253
|
Handle the rename of a flow when it is being saved.
|
|
1903
2254
|
"""
|
|
1904
|
-
if
|
|
2255
|
+
if (
|
|
2256
|
+
self.flow_settings
|
|
2257
|
+
and self.flow_settings.path
|
|
2258
|
+
and Path(self.flow_settings.path).absolute() != new_path.absolute()
|
|
2259
|
+
):
|
|
1905
2260
|
self.__name__ = new_name
|
|
1906
2261
|
self.flow_settings.save_location = str(new_path.absolute())
|
|
1907
2262
|
self.flow_settings.name = new_name
|
|
@@ -1928,27 +2283,27 @@ class FlowGraph:
|
|
|
1928
2283
|
self._handle_flow_renaming(new_flow_name, path)
|
|
1929
2284
|
self.flow_settings.modified_on = datetime.datetime.now().timestamp()
|
|
1930
2285
|
try:
|
|
1931
|
-
if suffix ==
|
|
2286
|
+
if suffix == ".flowfile":
|
|
1932
2287
|
raise DeprecationWarning(
|
|
1933
|
-
|
|
2288
|
+
"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
|
|
1934
2289
|
"Or stay on v0.4.1 if you still need .flowfile support.\n\n"
|
|
1935
2290
|
)
|
|
1936
|
-
elif suffix in (
|
|
2291
|
+
elif suffix in (".yaml", ".yml"):
|
|
1937
2292
|
flowfile_data = self.get_flowfile_data()
|
|
1938
|
-
data = flowfile_data.model_dump(mode=
|
|
1939
|
-
with open(flow_path,
|
|
2293
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2294
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
1940
2295
|
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
1941
|
-
elif suffix ==
|
|
2296
|
+
elif suffix == ".json":
|
|
1942
2297
|
flowfile_data = self.get_flowfile_data()
|
|
1943
|
-
data = flowfile_data.model_dump(mode=
|
|
1944
|
-
with open(flow_path,
|
|
2298
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2299
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
1945
2300
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
1946
2301
|
|
|
1947
2302
|
else:
|
|
1948
2303
|
flowfile_data = self.get_flowfile_data()
|
|
1949
2304
|
logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
|
|
1950
|
-
data = flowfile_data.model_dump(mode=
|
|
1951
|
-
with open(flow_path,
|
|
2305
|
+
data = flowfile_data.model_dump(mode="json")
|
|
2306
|
+
with open(flow_path, "w", encoding="utf-8") as f:
|
|
1952
2307
|
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
|
1953
2308
|
|
|
1954
2309
|
except Exception as e:
|
|
@@ -1966,11 +2321,7 @@ class FlowGraph:
|
|
|
1966
2321
|
Returns:
|
|
1967
2322
|
A dictionary representing the graph in Drawflow format.
|
|
1968
2323
|
"""
|
|
1969
|
-
result = {
|
|
1970
|
-
'Home': {
|
|
1971
|
-
"data": {}
|
|
1972
|
-
}
|
|
1973
|
-
}
|
|
2324
|
+
result = {"Home": {"data": {}}}
|
|
1974
2325
|
flow_info: schemas.FlowInformation = self.get_node_storage()
|
|
1975
2326
|
|
|
1976
2327
|
for node_id, node_info in flow_info.data.items():
|
|
@@ -1989,7 +2340,7 @@ class FlowGraph:
|
|
|
1989
2340
|
"inputs": {},
|
|
1990
2341
|
"outputs": {},
|
|
1991
2342
|
"pos_x": pos_x,
|
|
1992
|
-
"pos_y": pos_y
|
|
2343
|
+
"pos_y": pos_y,
|
|
1993
2344
|
}
|
|
1994
2345
|
except Exception as e:
|
|
1995
2346
|
logger.error(e)
|
|
@@ -2003,24 +2354,27 @@ class FlowGraph:
|
|
|
2003
2354
|
leading_to_node = self.get_node(output_node_id)
|
|
2004
2355
|
input_types = leading_to_node.get_input_type(node_info.id)
|
|
2005
2356
|
for input_type in input_types:
|
|
2006
|
-
if input_type ==
|
|
2007
|
-
input_frontend_id =
|
|
2008
|
-
elif input_type ==
|
|
2009
|
-
input_frontend_id =
|
|
2010
|
-
elif input_type ==
|
|
2011
|
-
input_frontend_id =
|
|
2357
|
+
if input_type == "main":
|
|
2358
|
+
input_frontend_id = "input_1"
|
|
2359
|
+
elif input_type == "right":
|
|
2360
|
+
input_frontend_id = "input_2"
|
|
2361
|
+
elif input_type == "left":
|
|
2362
|
+
input_frontend_id = "input_3"
|
|
2012
2363
|
else:
|
|
2013
|
-
input_frontend_id =
|
|
2364
|
+
input_frontend_id = "input_1"
|
|
2014
2365
|
connection = {"node": str(output_node_id), "input": input_frontend_id}
|
|
2015
2366
|
connections.append(connection)
|
|
2016
2367
|
|
|
2017
|
-
result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
|
|
2018
|
-
"connections": connections}
|
|
2368
|
+
result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {"connections": connections}
|
|
2019
2369
|
else:
|
|
2020
2370
|
result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
|
|
2021
2371
|
|
|
2022
2372
|
# Add input to the node based on `depending_on_id` in your backend data
|
|
2023
|
-
if
|
|
2373
|
+
if (
|
|
2374
|
+
node_info.left_input_id is not None
|
|
2375
|
+
or node_info.right_input_id is not None
|
|
2376
|
+
or node_info.input_ids is not None
|
|
2377
|
+
):
|
|
2024
2378
|
main_inputs = node_info.main_input_ids
|
|
2025
2379
|
result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
|
|
2026
2380
|
"connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
|
|
@@ -2041,8 +2395,8 @@ class FlowGraph:
|
|
|
2041
2395
|
Returns:
|
|
2042
2396
|
A VueFlowInput object.
|
|
2043
2397
|
"""
|
|
2044
|
-
edges:
|
|
2045
|
-
nodes:
|
|
2398
|
+
edges: list[schemas.NodeEdge] = []
|
|
2399
|
+
nodes: list[schemas.NodeInput] = []
|
|
2046
2400
|
for node in self.nodes:
|
|
2047
2401
|
nodes.append(node.get_node_input())
|
|
2048
2402
|
edges.extend(node.get_edge_input())
|
|
@@ -2054,7 +2408,9 @@ class FlowGraph:
|
|
|
2054
2408
|
for node in self.nodes:
|
|
2055
2409
|
node.reset(True)
|
|
2056
2410
|
|
|
2057
|
-
def copy_node(
|
|
2411
|
+
def copy_node(
|
|
2412
|
+
self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str
|
|
2413
|
+
) -> None:
|
|
2058
2414
|
"""Creates a copy of an existing node.
|
|
2059
2415
|
|
|
2060
2416
|
Args:
|
|
@@ -2067,9 +2423,7 @@ class FlowGraph:
|
|
|
2067
2423
|
if isinstance(existing_setting_input, input_schema.NodePromise):
|
|
2068
2424
|
return
|
|
2069
2425
|
|
|
2070
|
-
combined_settings = combine_existing_settings_and_new_settings(
|
|
2071
|
-
existing_setting_input, new_node_settings
|
|
2072
|
-
)
|
|
2426
|
+
combined_settings = combine_existing_settings_and_new_settings(existing_setting_input, new_node_settings)
|
|
2073
2427
|
getattr(self, f"add_{node_type}")(combined_settings)
|
|
2074
2428
|
|
|
2075
2429
|
def generate_code(self):
|
|
@@ -2077,6 +2431,7 @@ class FlowGraph:
|
|
|
2077
2431
|
This method exports the flow graph to a Polars-compatible format.
|
|
2078
2432
|
"""
|
|
2079
2433
|
from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
|
|
2434
|
+
|
|
2080
2435
|
print(export_flow_to_polars(self))
|
|
2081
2436
|
|
|
2082
2437
|
|
|
@@ -2095,13 +2450,7 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
|
|
|
2095
2450
|
copied_setting_input = deepcopy(setting_input)
|
|
2096
2451
|
|
|
2097
2452
|
# Update only attributes that exist on new_settings
|
|
2098
|
-
fields_to_update = (
|
|
2099
|
-
"node_id",
|
|
2100
|
-
"pos_x",
|
|
2101
|
-
"pos_y",
|
|
2102
|
-
"description",
|
|
2103
|
-
"flow_id"
|
|
2104
|
-
)
|
|
2453
|
+
fields_to_update = ("node_id", "pos_x", "pos_y", "description", "flow_id")
|
|
2105
2454
|
|
|
2106
2455
|
for field in fields_to_update:
|
|
2107
2456
|
if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
|
|
@@ -2117,12 +2466,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
|
|
|
2117
2466
|
flow: The FlowGraph instance to modify.
|
|
2118
2467
|
node_connection: An object defining the source and target of the connection.
|
|
2119
2468
|
"""
|
|
2120
|
-
logger.info(
|
|
2469
|
+
logger.info("adding a connection")
|
|
2121
2470
|
from_node = flow.get_node(node_connection.output_connection.node_id)
|
|
2122
2471
|
to_node = flow.get_node(node_connection.input_connection.node_id)
|
|
2123
|
-
logger.info(f
|
|
2472
|
+
logger.info(f"from_node={from_node}, to_node={to_node}")
|
|
2124
2473
|
if not (from_node and to_node):
|
|
2125
|
-
raise HTTPException(404,
|
|
2474
|
+
raise HTTPException(404, "Not not available")
|
|
2126
2475
|
else:
|
|
2127
2476
|
to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
|
|
2128
2477
|
|