Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,23 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
from typing import List
|
|
3
|
-
|
|
4
|
-
from polars import datatypes
|
|
5
1
|
import polars as pl
|
|
6
|
-
|
|
7
2
|
from pl_fuzzy_frame_match.output_column_name_utils import set_name_in_fuzzy_mappings
|
|
8
3
|
from pl_fuzzy_frame_match.pre_process import rename_fuzzy_right_mapping
|
|
4
|
+
from polars import datatypes
|
|
9
5
|
|
|
10
|
-
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
|
|
11
6
|
from flowfile_core.configs.flow_logger import main_logger
|
|
12
7
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
|
|
13
|
-
from flowfile_core.
|
|
14
|
-
from flowfile_core.schemas import input_schema
|
|
15
|
-
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
8
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
|
|
9
|
+
from flowfile_core.schemas import input_schema, transform_schema
|
|
16
10
|
|
|
17
11
|
|
|
18
|
-
def _ensure_all_columns_have_select(
|
|
19
|
-
|
|
20
|
-
|
|
12
|
+
def _ensure_all_columns_have_select(
|
|
13
|
+
left_cols: list[str], right_cols: list[str], fuzzy_match_input: transform_schema.FuzzyMatchInputManager
|
|
14
|
+
):
|
|
21
15
|
"""
|
|
22
16
|
Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
|
|
23
17
|
statements.
|
|
@@ -33,13 +27,16 @@ def _ensure_all_columns_have_select(left_cols: List[str],
|
|
|
33
27
|
left_cols_in_select = {c.old_name for c in fuzzy_match_input.left_select.renames}
|
|
34
28
|
|
|
35
29
|
fuzzy_match_input.left_select.renames.extend(
|
|
36
|
-
[transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select]
|
|
30
|
+
[transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select]
|
|
31
|
+
)
|
|
37
32
|
fuzzy_match_input.right_select.renames.extend(
|
|
38
33
|
[transform_schema.SelectInput(col) for col in right_cols if col not in right_cols_in_select]
|
|
39
34
|
)
|
|
40
35
|
|
|
41
36
|
|
|
42
|
-
def _order_join_inputs_based_on_col_order(
|
|
37
|
+
def _order_join_inputs_based_on_col_order(
|
|
38
|
+
col_order: list[str], join_inputs: transform_schema.JoinInputsManager
|
|
39
|
+
) -> None:
|
|
43
40
|
"""
|
|
44
41
|
Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
|
|
45
42
|
This function modifies the join_inputs object in-place.
|
|
@@ -52,17 +49,23 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: tra
|
|
|
52
49
|
join_inputs.select_inputs.renames = ordered_renames
|
|
53
50
|
|
|
54
51
|
|
|
55
|
-
def calculate_fuzzy_match_schema(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
52
|
+
def calculate_fuzzy_match_schema(
|
|
53
|
+
fm_input: transform_schema.FuzzyMatchInputManager,
|
|
54
|
+
left_schema: list[FlowfileColumn],
|
|
55
|
+
right_schema: list[FlowfileColumn],
|
|
56
|
+
):
|
|
57
|
+
_ensure_all_columns_have_select(
|
|
58
|
+
left_cols=[col.column_name for col in left_schema],
|
|
59
|
+
right_cols=[col.column_name for col in right_schema],
|
|
60
|
+
fuzzy_match_input=fm_input,
|
|
61
|
+
)
|
|
61
62
|
|
|
62
|
-
_order_join_inputs_based_on_col_order(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
63
|
+
_order_join_inputs_based_on_col_order(
|
|
64
|
+
col_order=[col.column_name for col in left_schema], join_inputs=fm_input.left_select
|
|
65
|
+
)
|
|
66
|
+
_order_join_inputs_based_on_col_order(
|
|
67
|
+
col_order=[col.column_name for col in right_schema], join_inputs=fm_input.right_select
|
|
68
|
+
)
|
|
66
69
|
for column in fm_input.left_select.renames:
|
|
67
70
|
if column.join_key:
|
|
68
71
|
column.keep = True
|
|
@@ -78,20 +81,27 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInputManag
|
|
|
78
81
|
for column in fm_input.left_select.renames:
|
|
79
82
|
column_schema = left_schema_dict.get(column.old_name)
|
|
80
83
|
if column_schema and (column.keep or column.join_key):
|
|
81
|
-
output_schema.append(
|
|
82
|
-
|
|
84
|
+
output_schema.append(
|
|
85
|
+
FlowfileColumn.from_input(
|
|
86
|
+
column.new_name, column_schema.data_type, example_values=column_schema.example_values
|
|
87
|
+
)
|
|
88
|
+
)
|
|
83
89
|
for column in fm_input.right_select.renames:
|
|
84
90
|
column_schema = right_schema_dict.get(column.old_name)
|
|
85
91
|
if column_schema and (column.keep or column.join_key):
|
|
86
|
-
output_schema.append(
|
|
87
|
-
|
|
92
|
+
output_schema.append(
|
|
93
|
+
FlowfileColumn.from_input(
|
|
94
|
+
column.new_name, column_schema.data_type, example_values=column_schema.example_values
|
|
95
|
+
)
|
|
96
|
+
)
|
|
88
97
|
set_name_in_fuzzy_mappings(new_join_mapping)
|
|
89
|
-
output_schema.extend(
|
|
90
|
-
|
|
98
|
+
output_schema.extend(
|
|
99
|
+
[FlowfileColumn.from_input(fuzzy_mapping.output_column_name, "Float64") for fuzzy_mapping in new_join_mapping]
|
|
100
|
+
)
|
|
91
101
|
return output_schema
|
|
92
102
|
|
|
93
103
|
|
|
94
|
-
def get_schema_of_column(node_input_schema:
|
|
104
|
+
def get_schema_of_column(node_input_schema: list[FlowfileColumn], col_name: str) -> FlowfileColumn | None:
|
|
95
105
|
for s in node_input_schema:
|
|
96
106
|
if s.name == col_name:
|
|
97
107
|
return s
|
|
@@ -99,44 +109,54 @@ def get_schema_of_column(node_input_schema: List[FlowfileColumn], col_name: str)
|
|
|
99
109
|
|
|
100
110
|
class InvalidSetup(ValueError):
|
|
101
111
|
"""Error raised when pivot column has too many unique values."""
|
|
112
|
+
|
|
102
113
|
pass
|
|
103
114
|
|
|
104
115
|
|
|
105
116
|
def get_output_data_type_pivot(schema: FlowfileColumn, agg_type: str) -> datatypes:
|
|
106
|
-
if agg_type in (
|
|
117
|
+
if agg_type in ("count", "n_unique"):
|
|
107
118
|
output_type = datatypes.Float64 # count is always float
|
|
108
|
-
elif schema.generic_datatype() ==
|
|
119
|
+
elif schema.generic_datatype() == "numeric":
|
|
109
120
|
output_type = datatypes.Float64
|
|
110
|
-
elif schema.generic_datatype() ==
|
|
121
|
+
elif schema.generic_datatype() == "string":
|
|
111
122
|
output_type = datatypes.Utf8
|
|
112
|
-
elif schema.generic_datatype() ==
|
|
123
|
+
elif schema.generic_datatype() == "date":
|
|
113
124
|
output_type = datatypes.Datetime
|
|
114
125
|
else:
|
|
115
126
|
output_type = datatypes.Utf8
|
|
116
127
|
return output_type
|
|
117
128
|
|
|
118
129
|
|
|
119
|
-
def pre_calculate_pivot_schema(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
130
|
+
def pre_calculate_pivot_schema(
|
|
131
|
+
node_input_schema: list[FlowfileColumn],
|
|
132
|
+
pivot_input: transform_schema.PivotInput,
|
|
133
|
+
output_fields: list[input_schema.MinimalFieldInfo] = None,
|
|
134
|
+
input_lf: pl.LazyFrame = None,
|
|
135
|
+
) -> list[FlowfileColumn]:
|
|
136
|
+
index_columns_schema = [
|
|
137
|
+
get_schema_of_column(node_input_schema, index_col) for index_col in pivot_input.index_columns
|
|
138
|
+
]
|
|
125
139
|
val_column_schema = get_schema_of_column(node_input_schema, pivot_input.value_col)
|
|
126
140
|
if output_fields is not None and len(output_fields) > 0:
|
|
127
|
-
return index_columns_schema+[
|
|
128
|
-
|
|
129
|
-
|
|
141
|
+
return index_columns_schema + [
|
|
142
|
+
FlowfileColumn(PlType(column_name=output_field.name, pl_datatype=output_field.data_type))
|
|
143
|
+
for output_field in output_fields
|
|
144
|
+
]
|
|
130
145
|
|
|
131
146
|
else:
|
|
132
147
|
max_unique_vals = 200
|
|
133
|
-
unique_vals = fetch_unique_values(
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
148
|
+
unique_vals = fetch_unique_values(
|
|
149
|
+
input_lf.select(pivot_input.pivot_column)
|
|
150
|
+
.unique()
|
|
151
|
+
.sort(pivot_input.pivot_column)
|
|
152
|
+
.limit(max_unique_vals)
|
|
153
|
+
.cast(pl.String)
|
|
154
|
+
)
|
|
137
155
|
if len(unique_vals) >= max_unique_vals:
|
|
138
|
-
main_logger.warning(
|
|
139
|
-
|
|
156
|
+
main_logger.warning(
|
|
157
|
+
"Pivot column has too many unique values. Please consider using a different column."
|
|
158
|
+
f" Max unique values: {max_unique_vals}"
|
|
159
|
+
)
|
|
140
160
|
pl_output_fields = []
|
|
141
161
|
for val in unique_vals:
|
|
142
162
|
if len(pivot_input.aggregations) == 1:
|
|
@@ -145,5 +165,5 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
|
|
|
145
165
|
else:
|
|
146
166
|
for agg in pivot_input.aggregations:
|
|
147
167
|
output_type = get_output_data_type_pivot(val_column_schema, agg)
|
|
148
|
-
pl_output_fields.append(PlType(column_name=f
|
|
168
|
+
pl_output_fields.append(PlType(column_name=f"{val}_{agg}", pl_datatype=output_type))
|
|
149
169
|
return index_columns_schema + [FlowfileColumn(pl_output_field) for pl_output_field in pl_output_fields]
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
|
|
1
3
|
from flowfile_core.configs import logger
|
|
2
|
-
from typing import Callable
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
class SettingGenerator:
|
|
@@ -13,10 +14,10 @@ class SettingGenerator:
|
|
|
13
14
|
setattr(self, f.__name__, f)
|
|
14
15
|
|
|
15
16
|
def get_setting_generator(self, node_type: str) -> Callable:
|
|
16
|
-
logger.info(
|
|
17
|
+
logger.info("getting setting generator for " + node_type)
|
|
17
18
|
|
|
18
19
|
if node_type in self.setting_generator_set:
|
|
19
|
-
logger.info(
|
|
20
|
+
logger.info("setting generator found")
|
|
20
21
|
return getattr(self, node_type)
|
|
21
22
|
else:
|
|
22
23
|
return lambda x: x
|
|
@@ -33,9 +34,9 @@ class SettingUpdator:
|
|
|
33
34
|
setattr(self, f.__name__, f)
|
|
34
35
|
|
|
35
36
|
def get_setting_updator(self, node_type: str) -> Callable:
|
|
36
|
-
logger.info(
|
|
37
|
+
logger.info("getting setting updator for " + node_type)
|
|
37
38
|
if node_type in self.setting_updator_set:
|
|
38
|
-
logger.info(
|
|
39
|
+
logger.info("setting updator found")
|
|
39
40
|
return getattr(self, node_type)
|
|
40
41
|
else:
|
|
41
42
|
return lambda x: x
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
from collections.abc import Callable, Iterable
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
4
|
+
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
1
5
|
|
|
6
|
+
from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
|
|
2
7
|
from flowfile_core.schemas import input_schema, transform_schema
|
|
3
|
-
from typing import Callable, Iterable
|
|
4
|
-
from functools import wraps
|
|
5
8
|
from flowfile_core.schemas.output_model import NodeData
|
|
6
|
-
from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
|
|
7
|
-
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
8
9
|
|
|
9
10
|
setting_generator = SettingGenerator()
|
|
10
11
|
setting_updator = SettingUpdator()
|
|
@@ -16,6 +17,7 @@ def setting_generator_method(f: callable) -> Callable:
|
|
|
16
17
|
if node_data.setting_input is None or isinstance(node_data.setting_input, input_schema.NodePromise):
|
|
17
18
|
f(node_data)
|
|
18
19
|
return node_data
|
|
20
|
+
|
|
19
21
|
setting_generator.add_setting_generator_func(inner)
|
|
20
22
|
return inner
|
|
21
23
|
|
|
@@ -38,18 +40,19 @@ def join(node_data: "NodeData") -> NodeData:
|
|
|
38
40
|
if len(overlapping_cols) > 0:
|
|
39
41
|
join_key = overlapping_cols[0]
|
|
40
42
|
else:
|
|
41
|
-
join_key =
|
|
43
|
+
join_key = ""
|
|
42
44
|
join_input_manager = transform_schema.JoinInputManager(
|
|
43
|
-
transform_schema.JoinInput(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
transform_schema.JoinInput(
|
|
46
|
+
join_mapping=join_key,
|
|
47
|
+
left_select=node_data.main_input.columns,
|
|
48
|
+
right_select=node_data.right_input.columns,
|
|
49
|
+
)
|
|
47
50
|
)
|
|
48
51
|
join_input_manager.auto_rename()
|
|
49
52
|
ji = join_input_manager.to_join_input()
|
|
50
|
-
node_data.setting_input = input_schema.NodeJoin(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
node_data.setting_input = input_schema.NodeJoin(
|
|
54
|
+
flow_id=node_data.flow_id, node_id=node_data.node_id, join_input=ji
|
|
55
|
+
)
|
|
53
56
|
return node_data
|
|
54
57
|
|
|
55
58
|
|
|
@@ -57,24 +60,31 @@ def join(node_data: "NodeData") -> NodeData:
|
|
|
57
60
|
def cross_join(node_data: "NodeData") -> NodeData:
|
|
58
61
|
if node_data.right_input and node_data.main_input:
|
|
59
62
|
cj_input_manager = transform_schema.CrossJoinInputManager(
|
|
60
|
-
transform_schema.CrossJoinInput(
|
|
61
|
-
|
|
63
|
+
transform_schema.CrossJoinInput(
|
|
64
|
+
left_select=node_data.main_input.columns, right_select=node_data.right_input.columns
|
|
65
|
+
)
|
|
62
66
|
)
|
|
63
67
|
cj_input_manager.auto_rename()
|
|
64
68
|
cj = cj_input_manager.to_cross_join_input()
|
|
65
|
-
node_data.setting_input = input_schema.NodeCrossJoin(
|
|
66
|
-
|
|
67
|
-
|
|
69
|
+
node_data.setting_input = input_schema.NodeCrossJoin(
|
|
70
|
+
flow_id=node_data.flow_id, node_id=node_data.node_id, cross_join_input=cj
|
|
71
|
+
)
|
|
68
72
|
return node_data
|
|
69
73
|
|
|
70
74
|
|
|
71
75
|
@setting_generator_method
|
|
72
76
|
def filter(node_data: "NodeData") -> NodeData:
|
|
73
77
|
if node_data.main_input:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
+
# Default to basic mode with an empty basic filter
|
|
79
|
+
basic_filter = transform_schema.BasicFilter(
|
|
80
|
+
field="",
|
|
81
|
+
operator=transform_schema.FilterOperator.EQUALS,
|
|
82
|
+
value="",
|
|
83
|
+
)
|
|
84
|
+
fi = transform_schema.FilterInput(basic_filter=basic_filter, mode="basic")
|
|
85
|
+
node_data.setting_input = input_schema.NodeFilter(
|
|
86
|
+
flow_id=node_data.flow_id, node_id=node_data.node_id, filter_input=fi
|
|
87
|
+
)
|
|
78
88
|
return node_data
|
|
79
89
|
|
|
80
90
|
|
|
@@ -86,26 +96,26 @@ def join(node_data: NodeData):
|
|
|
86
96
|
right_columns = set(node_data.right_input.columns)
|
|
87
97
|
left_select = setting_input.join_input.left_select
|
|
88
98
|
right_select = setting_input.join_input.right_select
|
|
99
|
+
# Update is_available based on whether column exists in input
|
|
89
100
|
for ls in left_select.renames:
|
|
90
|
-
|
|
91
|
-
left_select.remove_select_input(ls.old_name)
|
|
101
|
+
ls.is_available = ls.old_name in left_columns
|
|
92
102
|
for rs in right_select.renames:
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
103
|
+
rs.is_available = rs.old_name in right_columns
|
|
104
|
+
# Check ALL columns in renames to prevent duplicates
|
|
105
|
+
existing_columns_left = set(r.old_name for r in left_select.renames)
|
|
106
|
+
existing_columns_right = set(r.old_name for r in right_select.renames)
|
|
97
107
|
missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
|
|
98
108
|
missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
|
|
99
|
-
if not hasattr(setting_input,
|
|
109
|
+
if not hasattr(setting_input, "auto_keep_left"):
|
|
100
110
|
setting_input.auto_keep_left = False
|
|
101
|
-
if not hasattr(setting_input,
|
|
111
|
+
if not hasattr(setting_input, "auto_keep_right"):
|
|
102
112
|
setting_input.auto_keep_right = False
|
|
103
113
|
for milc in missing_incoming_left_columns:
|
|
104
114
|
select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
|
|
105
|
-
setting_input.join_input.add_new_select_column(select_input,
|
|
115
|
+
setting_input.join_input.add_new_select_column(select_input, "left")
|
|
106
116
|
for mirc in missing_incoming_right_columns:
|
|
107
117
|
select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
|
|
108
|
-
setting_input.join_input.add_new_select_column(select_input,
|
|
118
|
+
setting_input.join_input.add_new_select_column(select_input, "right")
|
|
109
119
|
return node_data
|
|
110
120
|
|
|
111
121
|
|
|
@@ -117,31 +127,32 @@ def cross_join(node_data: NodeData):
|
|
|
117
127
|
right_columns = set(node_data.right_input.columns)
|
|
118
128
|
left_select = setting_input.cross_join_input.left_select
|
|
119
129
|
right_select = setting_input.cross_join_input.right_select
|
|
130
|
+
# Update is_available based on whether column exists in input
|
|
120
131
|
for ls in left_select.renames:
|
|
121
|
-
|
|
122
|
-
left_select.remove_select_input(ls.old_name)
|
|
132
|
+
ls.is_available = ls.old_name in left_columns
|
|
123
133
|
for rs in right_select.renames:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
134
|
+
rs.is_available = rs.old_name in right_columns
|
|
135
|
+
# Check ALL columns in renames to prevent duplicates
|
|
136
|
+
existing_columns_left = set(r.old_name for r in left_select.renames)
|
|
137
|
+
existing_columns_right = set(r.old_name for r in right_select.renames)
|
|
128
138
|
missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
|
|
129
139
|
missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
|
|
130
|
-
if not hasattr(setting_input,
|
|
140
|
+
if not hasattr(setting_input, "auto_keep_left"):
|
|
131
141
|
setting_input.auto_keep_left = False
|
|
132
|
-
if not hasattr(setting_input,
|
|
142
|
+
if not hasattr(setting_input, "auto_keep_right"):
|
|
133
143
|
setting_input.auto_keep_right = False
|
|
134
144
|
for milc in missing_incoming_left_columns:
|
|
135
145
|
select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
|
|
136
|
-
setting_input.cross_join_input.add_new_select_column(select_input,
|
|
146
|
+
setting_input.cross_join_input.add_new_select_column(select_input, "left")
|
|
137
147
|
for mirc in missing_incoming_right_columns:
|
|
138
148
|
select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
|
|
139
|
-
setting_input.cross_join_input.add_new_select_column(select_input,
|
|
149
|
+
setting_input.cross_join_input.add_new_select_column(select_input, "right")
|
|
140
150
|
return node_data
|
|
141
151
|
|
|
142
152
|
|
|
143
|
-
def check_if_fuzzy_match_is_valid(
|
|
144
|
-
|
|
153
|
+
def check_if_fuzzy_match_is_valid(
|
|
154
|
+
left_columns: Iterable[str], right_columns: Iterable[str], fuzzy_map: FuzzyMapping
|
|
155
|
+
) -> bool:
|
|
145
156
|
if fuzzy_map.left_col not in left_columns:
|
|
146
157
|
return False
|
|
147
158
|
if fuzzy_map.right_col not in right_columns:
|
|
@@ -159,24 +170,24 @@ def fuzzy_match(node_data: NodeData):
|
|
|
159
170
|
right_select = setting_input.join_input.right_select
|
|
160
171
|
for fuzzy_map in setting_input.join_input.join_mapping:
|
|
161
172
|
fuzzy_map.valid = check_if_fuzzy_match_is_valid(left_columns, right_columns, fuzzy_map)
|
|
173
|
+
# Update is_available based on whether column exists in input
|
|
162
174
|
for ls in left_select.renames:
|
|
163
|
-
|
|
164
|
-
left_select.remove_select_input(ls.old_name)
|
|
175
|
+
ls.is_available = ls.old_name in left_columns
|
|
165
176
|
for rs in right_select.renames:
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
177
|
+
rs.is_available = rs.old_name in right_columns
|
|
178
|
+
# Check ALL columns in renames to prevent duplicates
|
|
179
|
+
existing_columns_left = set(r.old_name for r in left_select.renames)
|
|
180
|
+
existing_columns_right = set(r.old_name for r in right_select.renames)
|
|
170
181
|
missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
|
|
171
182
|
missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
|
|
172
|
-
if not hasattr(setting_input,
|
|
183
|
+
if not hasattr(setting_input, "auto_keep_left"):
|
|
173
184
|
setting_input.auto_keep_left = False
|
|
174
|
-
if not hasattr(setting_input,
|
|
185
|
+
if not hasattr(setting_input, "auto_keep_right"):
|
|
175
186
|
setting_input.auto_keep_right = False
|
|
176
187
|
for milc in missing_incoming_left_columns:
|
|
177
188
|
select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
|
|
178
|
-
setting_input.join_input.add_new_select_column(select_input,
|
|
189
|
+
setting_input.join_input.add_new_select_column(select_input, "left")
|
|
179
190
|
for mirc in missing_incoming_right_columns:
|
|
180
191
|
select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
|
|
181
|
-
setting_input.join_input.add_new_select_column(select_input,
|
|
192
|
+
setting_input.join_input.add_new_select_column(select_input, "right")
|
|
182
193
|
return node_data
|
|
@@ -1,31 +1,34 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
3
|
-
from
|
|
2
|
+
from collections.abc import Callable, Generator
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
4
5
|
import polars as pl
|
|
5
6
|
|
|
7
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
8
|
+
|
|
6
9
|
|
|
7
10
|
class ExternalDataSource(ABC):
|
|
8
|
-
schema:
|
|
9
|
-
data_getter:
|
|
11
|
+
schema: list[FlowfileColumn] | None
|
|
12
|
+
data_getter: Callable | None
|
|
10
13
|
is_collected: bool
|
|
11
14
|
cache_store: Any
|
|
12
15
|
_type: str
|
|
13
|
-
initial_data_getter:
|
|
16
|
+
initial_data_getter: Callable | None
|
|
14
17
|
|
|
15
18
|
@abstractmethod
|
|
16
19
|
def __init__(self):
|
|
17
20
|
pass
|
|
18
21
|
|
|
19
22
|
@abstractmethod
|
|
20
|
-
def get_initial_data(self) ->
|
|
23
|
+
def get_initial_data(self) -> list[dict[str, Any]]:
|
|
21
24
|
pass
|
|
22
25
|
|
|
23
26
|
@abstractmethod
|
|
24
|
-
def get_iter(self) -> Generator[
|
|
27
|
+
def get_iter(self) -> Generator[dict[str, Any], None, None]:
|
|
25
28
|
pass
|
|
26
29
|
|
|
27
30
|
@abstractmethod
|
|
28
|
-
def get_sample(self, n: int = 10000) -> Generator[
|
|
31
|
+
def get_sample(self, n: int = 10000) -> Generator[dict[str, Any], None, None]:
|
|
29
32
|
pass
|
|
30
33
|
|
|
31
34
|
@abstractmethod
|
|
@@ -34,6 +37,5 @@ class ExternalDataSource(ABC):
|
|
|
34
37
|
|
|
35
38
|
@staticmethod
|
|
36
39
|
@abstractmethod
|
|
37
|
-
def parse_schema(*args, **kwargs) ->
|
|
40
|
+
def parse_schema(*args, **kwargs) -> list[FlowfileColumn]:
|
|
38
41
|
pass
|
|
39
|
-
|
|
@@ -1,16 +1,19 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Callable, Generator
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import polars as pl
|
|
5
|
+
|
|
2
6
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
3
|
-
from flowfile_core.schemas import input_schema
|
|
4
7
|
from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
|
|
5
|
-
|
|
8
|
+
from flowfile_core.schemas import input_schema
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class CustomExternalSourceSettings:
|
|
9
12
|
data_getter: Generator
|
|
10
|
-
initial_data_getter:
|
|
11
|
-
orientation: str =
|
|
13
|
+
initial_data_getter: Callable | None = None
|
|
14
|
+
orientation: str = "row"
|
|
12
15
|
|
|
13
|
-
def __init__(self, data_getter: Generator, initial_data_getter:
|
|
16
|
+
def __init__(self, data_getter: Generator, initial_data_getter: Callable | None = None, orientation: str = "row"):
|
|
14
17
|
self.data_getter = data_getter
|
|
15
18
|
self.initial_data_getter = initial_data_getter
|
|
16
19
|
self.orientation = orientation
|
|
@@ -18,15 +21,18 @@ class CustomExternalSourceSettings:
|
|
|
18
21
|
|
|
19
22
|
class CustomExternalSource(ExternalDataSource):
|
|
20
23
|
data_getter: Generator = None
|
|
21
|
-
schema:
|
|
22
|
-
cache_store:
|
|
24
|
+
schema: list[FlowfileColumn] | None = None
|
|
25
|
+
cache_store: list = None
|
|
23
26
|
is_collected: bool = False
|
|
24
27
|
|
|
25
|
-
def __init__(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
data_getter: Generator[Any, None, None],
|
|
31
|
+
initial_data_getter: Callable = None,
|
|
32
|
+
orientation: str = "row",
|
|
33
|
+
schema: list = None,
|
|
34
|
+
**kwargs,
|
|
35
|
+
):
|
|
30
36
|
self.cache_store = list()
|
|
31
37
|
self.data_getter = data_getter
|
|
32
38
|
self.collected = False
|
|
@@ -38,23 +44,27 @@ class CustomExternalSource(ExternalDataSource):
|
|
|
38
44
|
else:
|
|
39
45
|
self.schema = None
|
|
40
46
|
|
|
41
|
-
if not initial_data_getter and orientation ==
|
|
47
|
+
if not initial_data_getter and orientation == "row":
|
|
48
|
+
|
|
42
49
|
def initial_data_getter():
|
|
43
50
|
if len(self.cache_store) == 0:
|
|
44
51
|
self.cache_store.append(next(data_getter, None))
|
|
45
52
|
return self.cache_store
|
|
53
|
+
|
|
46
54
|
self.initial_data_getter = initial_data_getter
|
|
47
55
|
elif initial_data_getter:
|
|
48
56
|
self.initial_data_getter = initial_data_getter
|
|
49
57
|
elif self.schema:
|
|
58
|
+
|
|
50
59
|
def initial_data_getter():
|
|
51
60
|
return [{d.column_name: None for d in self.schema}]
|
|
61
|
+
|
|
52
62
|
self.initial_data_getter = initial_data_getter
|
|
53
63
|
else:
|
|
54
64
|
self.initial_data_getter = None
|
|
55
65
|
|
|
56
66
|
@staticmethod
|
|
57
|
-
def parse_schema(schema:
|
|
67
|
+
def parse_schema(schema: list[Any]) -> list[FlowfileColumn]:
|
|
58
68
|
if len(schema) == 0:
|
|
59
69
|
return []
|
|
60
70
|
first_col = schema[0]
|
|
@@ -63,7 +73,7 @@ class CustomExternalSource(ExternalDataSource):
|
|
|
63
73
|
elif isinstance(first_col, (list, tuple)):
|
|
64
74
|
return [FlowfileColumn.from_input(column_name=col[0], data_type=col[1]) for col in schema]
|
|
65
75
|
elif isinstance(first_col, str):
|
|
66
|
-
return [FlowfileColumn.from_input(column_name=col, data_type=
|
|
76
|
+
return [FlowfileColumn.from_input(column_name=col, data_type="varchar") for col in schema]
|
|
67
77
|
elif isinstance(first_col, input_schema.MinimalFieldInfo):
|
|
68
78
|
return [FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type) for col in schema]
|
|
69
79
|
elif isinstance(first_col, FlowfileColumn):
|
|
@@ -76,7 +86,7 @@ class CustomExternalSource(ExternalDataSource):
|
|
|
76
86
|
return self.initial_data_getter()
|
|
77
87
|
return []
|
|
78
88
|
|
|
79
|
-
def get_iter(self) -> Generator[
|
|
89
|
+
def get_iter(self) -> Generator[dict[str, Any], None, None]:
|
|
80
90
|
if self.collected:
|
|
81
91
|
return
|
|
82
92
|
for data in self.cache_store:
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Generator
|
|
2
2
|
from time import sleep
|
|
3
|
-
from
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
4
5
|
import requests
|
|
5
6
|
|
|
7
|
+
from flowfile_core.schemas.input_schema import SampleUsers
|
|
6
8
|
|
|
7
|
-
|
|
9
|
+
|
|
10
|
+
def getter(data: SampleUsers) -> Generator[dict[str, Any], None, None]:
|
|
8
11
|
"""
|
|
9
12
|
Sample users generator function. This is a minimal example of a generator function that yields user data and can
|
|
10
13
|
be used in a flowfile. The function simulates a delay to mimic the behavior of an external data source.
|
|
@@ -17,13 +20,10 @@ def getter(data: SampleUsers) -> Generator[Dict[str, Any], None, None]:
|
|
|
17
20
|
index_pos = 0
|
|
18
21
|
for i in range(data.size):
|
|
19
22
|
sleep(0.01)
|
|
20
|
-
headers = {
|
|
21
|
-
'x-api-key': 'reqres-free-v1'
|
|
22
|
-
}
|
|
23
|
+
headers = {"x-api-key": "reqres-free-v1"}
|
|
23
24
|
|
|
24
25
|
response = requests.get("https://reqres.in/api/users", headers=headers).json()
|
|
25
|
-
for v in response[
|
|
26
|
-
v[
|
|
26
|
+
for v in response["data"]:
|
|
27
|
+
v["index"] = index_pos
|
|
27
28
|
index_pos += 1
|
|
28
29
|
yield v
|
|
29
|
-
|