Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
from typing import Literal
|
|
2
2
|
|
|
3
|
-
DataTypeGroup = Literal[
|
|
4
|
-
ReadableDataTypeGroup = Literal[
|
|
3
|
+
DataTypeGroup = Literal["numeric", "str", "date"]
|
|
4
|
+
ReadableDataTypeGroup = Literal["Numeric", "String", "Date", "Other", "Boolean", "Binary", "Complex"]
|
|
@@ -1,13 +1,14 @@
|
|
|
1
|
-
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
from dataclasses import dataclass
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Any
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.interface import DataTypeGroup, ReadableDataTypeGroup
|
|
7
8
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
|
|
8
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.interface import ReadableDataTypeGroup, DataTypeGroup
|
|
9
9
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.type_registry import convert_pl_type_to_string
|
|
10
|
-
|
|
10
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
11
|
+
from flowfile_core.schemas import input_schema
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
@dataclass
|
|
@@ -22,12 +23,12 @@ class FlowfileColumn:
|
|
|
22
23
|
number_of_unique_values: int
|
|
23
24
|
example_values: str
|
|
24
25
|
data_type_group: ReadableDataTypeGroup
|
|
25
|
-
__sql_type:
|
|
26
|
-
__is_unique:
|
|
27
|
-
__nullable:
|
|
28
|
-
__has_values:
|
|
29
|
-
average_value:
|
|
30
|
-
__perc_unique:
|
|
26
|
+
__sql_type: Any | None
|
|
27
|
+
__is_unique: bool | None
|
|
28
|
+
__nullable: bool | None
|
|
29
|
+
__has_values: bool | None
|
|
30
|
+
average_value: str | None
|
|
31
|
+
__perc_unique: float | None
|
|
31
32
|
|
|
32
33
|
def __init__(self, polars_type: PlType):
|
|
33
34
|
self.data_type = convert_pl_type_to_string(polars_type.pl_datatype)
|
|
@@ -52,10 +53,12 @@ class FlowfileColumn:
|
|
|
52
53
|
Provides a concise, developer-friendly representation of the object.
|
|
53
54
|
Ideal for debugging and console inspection.
|
|
54
55
|
"""
|
|
55
|
-
return (
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
return (
|
|
57
|
+
f"FlowfileColumn(name='{self.column_name}', "
|
|
58
|
+
f"type={self.data_type}, "
|
|
59
|
+
f"size={self.size}, "
|
|
60
|
+
f"nulls={self.number_of_empty_values})"
|
|
61
|
+
)
|
|
59
62
|
|
|
60
63
|
def __str__(self):
|
|
61
64
|
"""
|
|
@@ -103,7 +106,7 @@ class FlowfileColumn:
|
|
|
103
106
|
example_str = str(self.example_values)
|
|
104
107
|
# Truncate long example strings for cleaner display
|
|
105
108
|
if len(example_str) > 70:
|
|
106
|
-
example_str = example_str[:67] +
|
|
109
|
+
example_str = example_str[:67] + "..."
|
|
107
110
|
lines.append(f" Examples: {example_str}")
|
|
108
111
|
|
|
109
112
|
return f"{header}\n" + "\n".join(lines)
|
|
@@ -131,8 +134,7 @@ class FlowfileColumn:
|
|
|
131
134
|
|
|
132
135
|
@classmethod
|
|
133
136
|
def create_from_minimal_field_info(cls, minimal_field_info: input_schema.MinimalFieldInfo) -> "FlowfileColumn":
|
|
134
|
-
return cls.from_input(column_name=minimal_field_info.name,
|
|
135
|
-
data_type=minimal_field_info.data_type)
|
|
137
|
+
return cls.from_input(column_name=minimal_field_info.name, data_type=minimal_field_info.data_type)
|
|
136
138
|
|
|
137
139
|
@property
|
|
138
140
|
def is_unique(self) -> bool:
|
|
@@ -170,41 +172,77 @@ class FlowfileColumn:
|
|
|
170
172
|
return self.column_name
|
|
171
173
|
|
|
172
174
|
def get_column_repr(self):
|
|
173
|
-
return dict(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
175
|
+
return dict(
|
|
176
|
+
name=self.name,
|
|
177
|
+
size=self.size,
|
|
178
|
+
data_type=str(self.data_type),
|
|
179
|
+
has_values=self.has_values,
|
|
180
|
+
is_unique=self.is_unique,
|
|
181
|
+
max_value=str(self.max_value),
|
|
182
|
+
min_value=str(self.min_value),
|
|
183
|
+
number_of_unique_values=self.number_of_unique_values,
|
|
184
|
+
number_of_filled_values=self.number_of_filled_values,
|
|
185
|
+
number_of_empty_values=self.number_of_empty_values,
|
|
186
|
+
average_size=self.average_value,
|
|
187
|
+
)
|
|
184
188
|
|
|
185
189
|
def generic_datatype(self) -> DataTypeGroup:
|
|
186
|
-
if self.data_type in (
|
|
187
|
-
return
|
|
188
|
-
elif self.data_type in (
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
if self.data_type in ("Utf8", "VARCHAR", "CHAR", "NVARCHAR", "String"):
|
|
191
|
+
return "str"
|
|
192
|
+
elif self.data_type in (
|
|
193
|
+
"fixed_decimal",
|
|
194
|
+
"decimal",
|
|
195
|
+
"float",
|
|
196
|
+
"integer",
|
|
197
|
+
"boolean",
|
|
198
|
+
"double",
|
|
199
|
+
"Int16",
|
|
200
|
+
"Int32",
|
|
201
|
+
"Int64",
|
|
202
|
+
"Float32",
|
|
203
|
+
"Float64",
|
|
204
|
+
"Decimal",
|
|
205
|
+
"Binary",
|
|
206
|
+
"Boolean",
|
|
207
|
+
"Uint8",
|
|
208
|
+
"Uint16",
|
|
209
|
+
"Uint32",
|
|
210
|
+
"Uint64",
|
|
211
|
+
):
|
|
212
|
+
return "numeric"
|
|
213
|
+
elif self.data_type in ("datetime", "date", "Date", "Datetime", "Time"):
|
|
214
|
+
return "date"
|
|
194
215
|
else:
|
|
195
|
-
return
|
|
216
|
+
return "str"
|
|
196
217
|
|
|
197
218
|
def get_readable_datatype_group(self) -> ReadableDataTypeGroup:
|
|
198
|
-
if self.data_type in (
|
|
199
|
-
return
|
|
200
|
-
elif self.data_type in (
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
219
|
+
if self.data_type in ("Utf8", "VARCHAR", "CHAR", "NVARCHAR", "String"):
|
|
220
|
+
return "String"
|
|
221
|
+
elif self.data_type in (
|
|
222
|
+
"fixed_decimal",
|
|
223
|
+
"decimal",
|
|
224
|
+
"float",
|
|
225
|
+
"integer",
|
|
226
|
+
"boolean",
|
|
227
|
+
"double",
|
|
228
|
+
"Int16",
|
|
229
|
+
"Int32",
|
|
230
|
+
"Int64",
|
|
231
|
+
"Float32",
|
|
232
|
+
"Float64",
|
|
233
|
+
"Decimal",
|
|
234
|
+
"Binary",
|
|
235
|
+
"Boolean",
|
|
236
|
+
"Uint8",
|
|
237
|
+
"Uint16",
|
|
238
|
+
"Uint32",
|
|
239
|
+
"Uint64",
|
|
240
|
+
):
|
|
241
|
+
return "Numeric"
|
|
242
|
+
elif self.data_type in ("datetime", "date", "Date", "Datetime", "Time"):
|
|
243
|
+
return "Date"
|
|
206
244
|
else:
|
|
207
|
-
return
|
|
245
|
+
return "Other"
|
|
208
246
|
|
|
209
247
|
def get_polars_type(self) -> PlType:
|
|
210
248
|
pl_datatype = cast_str_to_polars_type(self.data_type)
|
|
@@ -215,13 +253,15 @@ class FlowfileColumn:
|
|
|
215
253
|
self.data_type = str(pl_type.pl_datatype.base_type())
|
|
216
254
|
|
|
217
255
|
|
|
218
|
-
def convert_stats_to_column_info(stats:
|
|
256
|
+
def convert_stats_to_column_info(stats: list[dict]) -> list[FlowfileColumn]:
|
|
219
257
|
return [FlowfileColumn.create_from_polars_type(PlType(**c)) for c in stats]
|
|
220
258
|
|
|
221
259
|
|
|
222
|
-
def convert_pl_schema_to_raw_data_format(pl_schema: pl.Schema) ->
|
|
223
|
-
return [
|
|
224
|
-
|
|
260
|
+
def convert_pl_schema_to_raw_data_format(pl_schema: pl.Schema) -> list[input_schema.MinimalFieldInfo]:
|
|
261
|
+
return [
|
|
262
|
+
FlowfileColumn.create_from_polars_type(PlType(column_name=k, pl_datatype=v)).get_minimal_field_info()
|
|
263
|
+
for k, v in pl_schema.items()
|
|
264
|
+
]
|
|
225
265
|
|
|
226
266
|
|
|
227
267
|
def assert_if_flowfile_schema(obj: Iterable) -> bool:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
2
3
|
from pydantic import BaseModel
|
|
3
4
|
|
|
4
5
|
|
|
@@ -9,16 +10,16 @@ class ColumnInfo:
|
|
|
9
10
|
class PlType(BaseModel):
|
|
10
11
|
column_name: str
|
|
11
12
|
col_index: int = -1
|
|
12
|
-
count:
|
|
13
|
-
null_count:
|
|
14
|
-
mean:
|
|
15
|
-
std:
|
|
16
|
-
min:
|
|
17
|
-
max:
|
|
18
|
-
median:
|
|
19
|
-
pl_datatype:
|
|
20
|
-
n_unique:
|
|
21
|
-
examples:
|
|
13
|
+
count: int | None = -1
|
|
14
|
+
null_count: int | None = -1
|
|
15
|
+
mean: str | None = ""
|
|
16
|
+
std: float | None = -1
|
|
17
|
+
min: str | None = ""
|
|
18
|
+
max: str | None = ""
|
|
19
|
+
median: str | None = 0
|
|
20
|
+
pl_datatype: Any | None
|
|
21
|
+
n_unique: int | None = -1
|
|
22
|
+
examples: str | None = ""
|
|
22
23
|
|
|
23
24
|
class Config:
|
|
24
25
|
arbitrary_types_allowed = True
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from typing import Literal
|
|
1
2
|
|
|
2
|
-
from typing import Type, Literal, List, Dict, Union, Tuple
|
|
3
3
|
import polars as pl
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
DataTypeGroup = Literal["numeric", "string", "datetime", "boolean", "binary", "complex", "unknown"]
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
|
|
@@ -12,8 +13,8 @@ def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
|
|
|
12
13
|
inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
|
|
13
14
|
return f"pl.Array({inner_str})"
|
|
14
15
|
elif isinstance(pl_type, pl.Decimal):
|
|
15
|
-
precision = pl_type.precision if hasattr(pl_type,
|
|
16
|
-
scale = pl_type.scale if hasattr(pl_type,
|
|
16
|
+
precision = pl_type.precision if hasattr(pl_type, "precision") else None
|
|
17
|
+
scale = pl_type.scale if hasattr(pl_type, "scale") else None
|
|
17
18
|
if precision is not None and scale is not None:
|
|
18
19
|
return f"pl.Decimal({precision}, {scale})"
|
|
19
20
|
elif precision is not None:
|
|
@@ -23,7 +24,7 @@ def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
|
|
|
23
24
|
elif isinstance(pl_type, pl.Struct):
|
|
24
25
|
# Handle Struct with field definitions
|
|
25
26
|
fields = []
|
|
26
|
-
if hasattr(pl_type,
|
|
27
|
+
if hasattr(pl_type, "fields"):
|
|
27
28
|
for field in pl_type.fields:
|
|
28
29
|
field_name = field.name
|
|
29
30
|
field_type = convert_pl_type_to_string(field.dtype, inner=True)
|
|
@@ -33,4 +34,3 @@ def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
|
|
|
33
34
|
else:
|
|
34
35
|
# For base types, we want the full pl.TypeName format
|
|
35
36
|
return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
|
|
36
|
-
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
import polars as pl
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
dtype_to_pl = {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
4
|
+
"int": pl.Int64,
|
|
5
|
+
"integer": pl.Int64,
|
|
6
|
+
"char": pl.String,
|
|
7
|
+
"fixed decimal": pl.Float32,
|
|
8
|
+
"double": pl.Float64,
|
|
9
|
+
"float": pl.Float64,
|
|
10
|
+
"bool": pl.Boolean,
|
|
11
|
+
"byte": pl.UInt8,
|
|
12
|
+
"bit": pl.Binary,
|
|
13
|
+
"date": pl.Date,
|
|
14
|
+
"datetime": pl.Datetime,
|
|
15
|
+
"string": pl.String,
|
|
16
|
+
"str": pl.String,
|
|
17
|
+
"time": pl.Time,
|
|
19
18
|
}
|
|
20
19
|
|
|
21
20
|
|
|
@@ -27,19 +26,17 @@ def safe_eval_pl_type(type_string: str):
|
|
|
27
26
|
# Define allowed names in the evaluation namespace
|
|
28
27
|
safe_dict = {
|
|
29
28
|
# Polars module and types
|
|
30
|
-
|
|
31
|
-
|
|
29
|
+
"pl": pl,
|
|
32
30
|
# Basic Python built-ins for literals
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
31
|
+
"int": int,
|
|
32
|
+
"str": str,
|
|
33
|
+
"float": float,
|
|
34
|
+
"bool": bool,
|
|
35
|
+
"list": list,
|
|
36
|
+
"dict": dict,
|
|
37
|
+
"tuple": tuple,
|
|
41
38
|
# Disable dangerous built-ins
|
|
42
|
-
|
|
39
|
+
"__builtins__": {},
|
|
43
40
|
}
|
|
44
41
|
|
|
45
42
|
try:
|
|
@@ -52,10 +49,10 @@ dtype_to_pl_str = {k: v.__name__ for k, v in dtype_to_pl.items()}
|
|
|
52
49
|
|
|
53
50
|
|
|
54
51
|
def get_polars_type(dtype: str):
|
|
55
|
-
if
|
|
52
|
+
if "pl." in dtype:
|
|
56
53
|
try:
|
|
57
54
|
return safe_eval_pl_type(dtype)
|
|
58
|
-
except Exception
|
|
55
|
+
except Exception:
|
|
59
56
|
return pl.String
|
|
60
57
|
pl_datetype = dtype_to_pl.get(dtype.lower())
|
|
61
58
|
if pl_datetype is not None:
|
|
@@ -68,8 +65,7 @@ def get_polars_type(dtype: str):
|
|
|
68
65
|
|
|
69
66
|
def cast_str_to_polars_type(dtype: str) -> pl.DataType:
|
|
70
67
|
pl_type = get_polars_type(dtype)
|
|
71
|
-
if
|
|
68
|
+
if callable(pl_type):
|
|
72
69
|
return pl_type()
|
|
73
70
|
else:
|
|
74
71
|
return pl_type
|
|
75
|
-
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
3
|
import polars as pl
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
from flowfile_core.flowfile.flow_data_engine.join import verify_join_map_integrity, verify_join_select_integrity
|
|
6
|
+
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager, JoinInputs, SelectInput
|
|
5
7
|
|
|
6
8
|
if TYPE_CHECKING:
|
|
7
9
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
8
10
|
|
|
9
11
|
|
|
10
|
-
def _order_join_inputs_based_on_col_order(col_order:
|
|
12
|
+
def _order_join_inputs_based_on_col_order(col_order: list[str], join_inputs: JoinInputs) -> None:
|
|
11
13
|
"""
|
|
12
14
|
Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
|
|
13
15
|
This function modifies the join_inputs object in-place.
|
|
@@ -20,9 +22,9 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: Joi
|
|
|
20
22
|
join_inputs.renames = ordered_renames
|
|
21
23
|
|
|
22
24
|
|
|
23
|
-
def _ensure_all_columns_have_select(
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
def _ensure_all_columns_have_select(
|
|
26
|
+
left: "FlowDataEngine", right: "FlowDataEngine", fuzzy_match_input_manager: FuzzyMatchInputManager
|
|
27
|
+
):
|
|
26
28
|
"""
|
|
27
29
|
Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
|
|
28
30
|
statements.
|
|
@@ -38,14 +40,16 @@ def _ensure_all_columns_have_select(left: "FlowDataEngine",
|
|
|
38
40
|
left_cols_in_select = {c.old_name for c in fuzzy_match_input_manager.left_select.renames}
|
|
39
41
|
|
|
40
42
|
fuzzy_match_input_manager.left_select.renames.extend(
|
|
41
|
-
[SelectInput(col) for col in left.columns if col not in left_cols_in_select]
|
|
43
|
+
[SelectInput(col) for col in left.columns if col not in left_cols_in_select]
|
|
44
|
+
)
|
|
42
45
|
fuzzy_match_input_manager.right_select.renames.extend(
|
|
43
46
|
[SelectInput(col) for col in right.columns if col not in right_cols_in_select]
|
|
44
47
|
)
|
|
45
48
|
|
|
46
49
|
|
|
47
|
-
def prepare_for_fuzzy_match(
|
|
48
|
-
|
|
50
|
+
def prepare_for_fuzzy_match(
|
|
51
|
+
left: "FlowDataEngine", right: "FlowDataEngine", fuzzy_match_input_manager: FuzzyMatchInputManager
|
|
52
|
+
) -> tuple[pl.LazyFrame, pl.LazyFrame]:
|
|
49
53
|
"""
|
|
50
54
|
Prepare two FlowDataEngines for fuzzy matching.
|
|
51
55
|
|
|
@@ -61,19 +65,26 @@ def prepare_for_fuzzy_match(left: "FlowDataEngine", right: "FlowDataEngine",
|
|
|
61
65
|
_ensure_all_columns_have_select(left, right, fuzzy_match_input_manager)
|
|
62
66
|
_order_join_inputs_based_on_col_order(left.columns, fuzzy_match_input_manager.left_select.join_inputs)
|
|
63
67
|
_order_join_inputs_based_on_col_order(right.columns, fuzzy_match_input_manager.right_select.join_inputs)
|
|
64
|
-
verify_join_select_integrity(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
verify_join_select_integrity(
|
|
69
|
+
fuzzy_match_input_manager.fuzzy_input, left_columns=left.columns, right_columns=right.columns
|
|
70
|
+
)
|
|
71
|
+
if not verify_join_map_integrity(
|
|
72
|
+
fuzzy_match_input_manager.fuzzy_input, left_columns=left.schema, right_columns=right.schema
|
|
73
|
+
):
|
|
74
|
+
raise Exception("Join is not valid by the data fields")
|
|
68
75
|
|
|
69
76
|
fuzzy_match_input_manager.auto_rename()
|
|
70
77
|
|
|
71
|
-
right_select = [
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
78
|
+
right_select = [
|
|
79
|
+
v.old_name for v in fuzzy_match_input_manager.right_select.renames if (v.keep or v.join_key) and v.is_available
|
|
80
|
+
]
|
|
81
|
+
left_select = [
|
|
82
|
+
v.old_name for v in fuzzy_match_input_manager.left_select.renames if (v.keep or v.join_key) and v.is_available
|
|
83
|
+
]
|
|
75
84
|
left_df: pl.LazyFrame | pl.DataFrame = left.data_frame.select(left_select).rename(
|
|
76
|
-
fuzzy_match_input_manager.left_select.rename_table
|
|
85
|
+
fuzzy_match_input_manager.left_select.rename_table
|
|
86
|
+
)
|
|
77
87
|
right_df: pl.LazyFrame | pl.DataFrame = right.data_frame.select(right_select).rename(
|
|
78
|
-
fuzzy_match_input_manager.right_select.rename_table
|
|
88
|
+
fuzzy_match_input_manager.right_select.rename_table
|
|
89
|
+
)
|
|
79
90
|
return left_df, right_df
|
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
# Standard library imports
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import TypeVar
|
|
3
3
|
|
|
4
4
|
# Third-party imports
|
|
5
5
|
import polars as pl
|
|
6
6
|
|
|
7
|
-
from flowfile_core.schemas import
|
|
8
|
-
transform_schema as transform_schemas
|
|
9
|
-
)
|
|
7
|
+
from flowfile_core.schemas import transform_schema as transform_schemas
|
|
10
8
|
|
|
11
|
-
T = TypeVar(
|
|
9
|
+
T = TypeVar("T", pl.DataFrame, pl.LazyFrame)
|
|
12
10
|
|
|
13
11
|
|
|
14
|
-
def rename_df_table_for_join(
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
def rename_df_table_for_join(
|
|
13
|
+
left_df: T, right_df: T, join_key_rename: transform_schemas.FullJoinKeyResponse
|
|
14
|
+
) -> tuple[T, T]:
|
|
15
|
+
return (
|
|
16
|
+
left_df.rename({r[0]: r[1] for r in join_key_rename.left.join_key_renames}),
|
|
17
|
+
right_df.rename({r[0]: r[1] for r in join_key_rename.right.join_key_renames}),
|
|
18
|
+
)
|
|
17
19
|
|
|
18
20
|
|
|
19
|
-
def get_undo_rename_mapping_join(join_input: transform_schemas.JoinInputManager) ->
|
|
21
|
+
def get_undo_rename_mapping_join(join_input: transform_schemas.JoinInputManager) -> dict[str, str]:
|
|
20
22
|
join_key_rename = join_input.get_join_key_renames(True)
|
|
21
23
|
return {r[1]: r[0] for r in join_key_rename.right.join_key_renames + join_key_rename.left.join_key_renames}
|
|
22
24
|
|
|
@@ -1,17 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
from typing import List
|
|
3
|
-
from flowfile_core.schemas import transform_schema
|
|
4
1
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
2
|
+
from flowfile_core.schemas import transform_schema
|
|
5
3
|
|
|
6
4
|
|
|
7
5
|
def verify_join_select_integrity(
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
6
|
+
join_input: transform_schema.JoinInput
|
|
7
|
+
| transform_schema.CrossJoinInput
|
|
8
|
+
| transform_schema.FuzzyMatchInput
|
|
9
|
+
| transform_schema.JoinInputsManager,
|
|
10
|
+
left_columns: list[str],
|
|
11
|
+
right_columns: list[str],
|
|
12
|
+
):
|
|
15
13
|
"""
|
|
16
14
|
Verify column availability for join selection and update availability flags.
|
|
17
15
|
|
|
@@ -32,10 +30,11 @@ def verify_join_select_integrity(
|
|
|
32
30
|
c.is_available = True
|
|
33
31
|
|
|
34
32
|
|
|
35
|
-
def verify_join_map_integrity(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
def verify_join_map_integrity(
|
|
34
|
+
join_input: transform_schema.JoinInput | transform_schema.FuzzyMatchInput | transform_schema.JoinInputManager,
|
|
35
|
+
left_columns: list[FlowfileColumn],
|
|
36
|
+
right_columns: list[FlowfileColumn],
|
|
37
|
+
):
|
|
39
38
|
"""
|
|
40
39
|
Verify data type compatibility for join mappings between tables.
|
|
41
40
|
|
|
@@ -56,4 +55,4 @@ def verify_join_map_integrity(join_input: transform_schema.JoinInput | transform
|
|
|
56
55
|
return False
|
|
57
56
|
if left_column_info.generic_datatype() != right_column_info.generic_datatype():
|
|
58
57
|
return False
|
|
59
|
-
return True
|
|
58
|
+
return True
|
|
@@ -1,20 +1,18 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
1
3
|
import polars as pl
|
|
2
4
|
from polars.expr import Expr
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from typing import List
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
@dataclass
|
|
8
8
|
class AggFunc:
|
|
9
|
-
__slots__ = [
|
|
9
|
+
__slots__ = ["func_name", "func_expr"]
|
|
10
10
|
func_name: str
|
|
11
11
|
func_expr: Expr
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
AggFuncs =
|
|
14
|
+
AggFuncs = list[AggFunc]
|
|
15
15
|
|
|
16
16
|
pl.Expr.sum
|
|
17
17
|
|
|
18
|
-
agg_funcs = [
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
agg_funcs = ["sum", "max", "min", "count", "first", "last", "std", "var", "n_unique", "list", "list_agg"]
|