Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,18 +1,102 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
from dataclasses import asdict
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, Literal, NamedTuple
|
|
6
|
+
|
|
3
7
|
import polars as pl
|
|
8
|
+
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
4
9
|
from polars import selectors
|
|
5
|
-
from
|
|
6
|
-
|
|
7
|
-
from typing import NamedTuple, Union, Any
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
11
|
+
|
|
8
12
|
from flowfile_core.schemas.yaml_types import (
|
|
9
|
-
|
|
10
|
-
CrossJoinInputYaml,
|
|
13
|
+
BasicFilterYaml,
|
|
14
|
+
CrossJoinInputYaml,
|
|
15
|
+
FilterInputYaml,
|
|
16
|
+
FuzzyMatchInputYaml,
|
|
17
|
+
JoinInputsYaml,
|
|
18
|
+
JoinInputYaml,
|
|
19
|
+
SelectInputYaml,
|
|
11
20
|
)
|
|
12
|
-
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
13
|
-
|
|
14
21
|
from flowfile_core.types import DataType, DataTypeStr
|
|
15
22
|
|
|
23
|
+
|
|
24
|
+
class FilterOperator(str, Enum):
|
|
25
|
+
"""Supported filter comparison operators."""
|
|
26
|
+
|
|
27
|
+
EQUALS = "equals"
|
|
28
|
+
NOT_EQUALS = "not_equals"
|
|
29
|
+
GREATER_THAN = "greater_than"
|
|
30
|
+
GREATER_THAN_OR_EQUALS = "greater_than_or_equals"
|
|
31
|
+
LESS_THAN = "less_than"
|
|
32
|
+
LESS_THAN_OR_EQUALS = "less_than_or_equals"
|
|
33
|
+
CONTAINS = "contains"
|
|
34
|
+
NOT_CONTAINS = "not_contains"
|
|
35
|
+
STARTS_WITH = "starts_with"
|
|
36
|
+
ENDS_WITH = "ends_with"
|
|
37
|
+
IS_NULL = "is_null"
|
|
38
|
+
IS_NOT_NULL = "is_not_null"
|
|
39
|
+
IN = "in"
|
|
40
|
+
NOT_IN = "not_in"
|
|
41
|
+
BETWEEN = "between"
|
|
42
|
+
|
|
43
|
+
def __str__(self) -> str:
|
|
44
|
+
return self.value
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def from_symbol(cls, symbol: str) -> "FilterOperator":
|
|
48
|
+
"""Convert UI symbol to FilterOperator enum."""
|
|
49
|
+
symbol_mapping = {
|
|
50
|
+
"=": cls.EQUALS,
|
|
51
|
+
"==": cls.EQUALS,
|
|
52
|
+
"!=": cls.NOT_EQUALS,
|
|
53
|
+
"<>": cls.NOT_EQUALS,
|
|
54
|
+
">": cls.GREATER_THAN,
|
|
55
|
+
">=": cls.GREATER_THAN_OR_EQUALS,
|
|
56
|
+
"<": cls.LESS_THAN,
|
|
57
|
+
"<=": cls.LESS_THAN_OR_EQUALS,
|
|
58
|
+
"contains": cls.CONTAINS,
|
|
59
|
+
"not_contains": cls.NOT_CONTAINS,
|
|
60
|
+
"starts_with": cls.STARTS_WITH,
|
|
61
|
+
"ends_with": cls.ENDS_WITH,
|
|
62
|
+
"is_null": cls.IS_NULL,
|
|
63
|
+
"is_not_null": cls.IS_NOT_NULL,
|
|
64
|
+
"in": cls.IN,
|
|
65
|
+
"not_in": cls.NOT_IN,
|
|
66
|
+
"between": cls.BETWEEN,
|
|
67
|
+
}
|
|
68
|
+
if symbol in symbol_mapping:
|
|
69
|
+
return symbol_mapping[symbol]
|
|
70
|
+
# Try to match by value directly
|
|
71
|
+
try:
|
|
72
|
+
return cls(symbol)
|
|
73
|
+
except ValueError:
|
|
74
|
+
raise ValueError(f"Unknown filter operator symbol: {symbol}")
|
|
75
|
+
|
|
76
|
+
def to_symbol(self) -> str:
|
|
77
|
+
"""Convert FilterOperator to UI-friendly symbol."""
|
|
78
|
+
symbol_mapping = {
|
|
79
|
+
FilterOperator.EQUALS: "=",
|
|
80
|
+
FilterOperator.NOT_EQUALS: "!=",
|
|
81
|
+
FilterOperator.GREATER_THAN: ">",
|
|
82
|
+
FilterOperator.GREATER_THAN_OR_EQUALS: ">=",
|
|
83
|
+
FilterOperator.LESS_THAN: "<",
|
|
84
|
+
FilterOperator.LESS_THAN_OR_EQUALS: "<=",
|
|
85
|
+
FilterOperator.CONTAINS: "contains",
|
|
86
|
+
FilterOperator.NOT_CONTAINS: "not_contains",
|
|
87
|
+
FilterOperator.STARTS_WITH: "starts_with",
|
|
88
|
+
FilterOperator.ENDS_WITH: "ends_with",
|
|
89
|
+
FilterOperator.IS_NULL: "is_null",
|
|
90
|
+
FilterOperator.IS_NOT_NULL: "is_not_null",
|
|
91
|
+
FilterOperator.IN: "in",
|
|
92
|
+
FilterOperator.NOT_IN: "not_in",
|
|
93
|
+
FilterOperator.BETWEEN: "between",
|
|
94
|
+
}
|
|
95
|
+
return symbol_mapping.get(self, self.value)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
FilterModeLiteral = Literal["basic", "advanced"]
|
|
99
|
+
|
|
16
100
|
FuzzyMap = FuzzyMapping
|
|
17
101
|
|
|
18
102
|
AUTO_DATA_TYPE = "Auto"
|
|
@@ -22,22 +106,22 @@ def get_func_type_mapping(func: str):
|
|
|
22
106
|
"""Infers the output data type of common aggregation functions."""
|
|
23
107
|
if func in ["mean", "avg", "median", "std", "var"]:
|
|
24
108
|
return "Float64"
|
|
25
|
-
elif func in [
|
|
109
|
+
elif func in ["min", "max", "first", "last", "cumsum", "sum"]:
|
|
26
110
|
return None
|
|
27
|
-
elif func in [
|
|
111
|
+
elif func in ["count", "n_unique"]:
|
|
28
112
|
return "Int64"
|
|
29
|
-
elif func in [
|
|
113
|
+
elif func in ["concat"]:
|
|
30
114
|
return "Utf8"
|
|
31
115
|
|
|
32
116
|
|
|
33
117
|
def string_concat(*column: str):
|
|
34
118
|
"""A simple wrapper to concatenate string columns in Polars."""
|
|
35
|
-
return pl.col(column).cast(pl.Utf8).str.concat(delimiter=
|
|
119
|
+
return pl.col(column).cast(pl.Utf8).str.concat(delimiter=",")
|
|
36
120
|
|
|
37
121
|
|
|
38
122
|
SideLit = Literal["left", "right"]
|
|
39
|
-
JoinStrategy = Literal[
|
|
40
|
-
FuzzyTypeLiteral = Literal[
|
|
123
|
+
JoinStrategy = Literal["inner", "left", "right", "full", "semi", "anti", "cross", "outer"]
|
|
124
|
+
FuzzyTypeLiteral = Literal["levenshtein", "jaro", "jaro_winkler", "hamming", "damerau_levenshtein", "indel"]
|
|
41
125
|
|
|
42
126
|
|
|
43
127
|
def construct_join_key_name(side: SideLit, column_name: str) -> str:
|
|
@@ -47,18 +131,21 @@ def construct_join_key_name(side: SideLit, column_name: str) -> str:
|
|
|
47
131
|
|
|
48
132
|
class JoinKeyRename(NamedTuple):
|
|
49
133
|
"""Represents the renaming of a join key from its original to a temporary name."""
|
|
134
|
+
|
|
50
135
|
original_name: str
|
|
51
136
|
temp_name: str
|
|
52
137
|
|
|
53
138
|
|
|
54
139
|
class JoinKeyRenameResponse(NamedTuple):
|
|
55
140
|
"""Contains a list of join key renames for one side of a join."""
|
|
141
|
+
|
|
56
142
|
side: SideLit
|
|
57
|
-
join_key_renames:
|
|
143
|
+
join_key_renames: list[JoinKeyRename]
|
|
58
144
|
|
|
59
145
|
|
|
60
146
|
class FullJoinKeyResponse(NamedTuple):
|
|
61
147
|
"""Holds the join key rename responses for both sides of a join."""
|
|
148
|
+
|
|
62
149
|
left: JoinKeyRenameResponse
|
|
63
150
|
right: JoinKeyRenameResponse
|
|
64
151
|
|
|
@@ -69,24 +156,25 @@ class SelectInput(BaseModel):
|
|
|
69
156
|
This is a core building block for any operation that involves column manipulation.
|
|
70
157
|
It holds all the configuration for a single field in a selection operation.
|
|
71
158
|
"""
|
|
159
|
+
|
|
72
160
|
model_config = ConfigDict(frozen=False)
|
|
73
161
|
|
|
74
162
|
old_name: str
|
|
75
|
-
original_position:
|
|
76
|
-
new_name:
|
|
77
|
-
data_type:
|
|
163
|
+
original_position: int | None = None
|
|
164
|
+
new_name: str | None = None
|
|
165
|
+
data_type: str | None = None
|
|
78
166
|
data_type_change: bool = False
|
|
79
167
|
join_key: bool = False
|
|
80
168
|
is_altered: bool = False
|
|
81
|
-
position:
|
|
169
|
+
position: int | None = None
|
|
82
170
|
is_available: bool = True
|
|
83
171
|
keep: bool = True
|
|
84
172
|
|
|
85
173
|
def __init__(self, old_name: str = None, new_name: str = None, **data):
|
|
86
174
|
if old_name is not None:
|
|
87
|
-
data[
|
|
175
|
+
data["old_name"] = old_name
|
|
88
176
|
if new_name is not None:
|
|
89
|
-
data[
|
|
177
|
+
data["new_name"] = new_name
|
|
90
178
|
super().__init__(**data)
|
|
91
179
|
|
|
92
180
|
def to_yaml_dict(self) -> SelectInputYaml:
|
|
@@ -114,7 +202,7 @@ class SelectInput(BaseModel):
|
|
|
114
202
|
is_altered=old_name != new_name,
|
|
115
203
|
)
|
|
116
204
|
|
|
117
|
-
@model_validator(mode=
|
|
205
|
+
@model_validator(mode="after")
|
|
118
206
|
def set_default_new_name(self):
|
|
119
207
|
"""If new_name is None, default it to old_name."""
|
|
120
208
|
if self.new_name is None:
|
|
@@ -137,76 +225,205 @@ class SelectInput(BaseModel):
|
|
|
137
225
|
def polars_type(self) -> str:
|
|
138
226
|
"""Translates a user-friendly type name to a Polars data type string."""
|
|
139
227
|
data_type_lower = self.data_type.lower()
|
|
140
|
-
if data_type_lower ==
|
|
141
|
-
return
|
|
142
|
-
elif data_type_lower ==
|
|
143
|
-
return
|
|
144
|
-
elif data_type_lower ==
|
|
145
|
-
return
|
|
228
|
+
if data_type_lower == "string":
|
|
229
|
+
return "Utf8"
|
|
230
|
+
elif data_type_lower == "integer":
|
|
231
|
+
return "Int64"
|
|
232
|
+
elif data_type_lower == "double":
|
|
233
|
+
return "Float64"
|
|
146
234
|
return self.data_type
|
|
147
235
|
|
|
148
236
|
|
|
149
237
|
class FieldInput(BaseModel):
|
|
150
238
|
"""Represents a single field with its name and data type, typically for defining an output column."""
|
|
239
|
+
|
|
151
240
|
name: str
|
|
152
241
|
data_type: DataType | Literal["Auto"] | DataTypeStr | None = AUTO_DATA_TYPE
|
|
153
242
|
|
|
154
243
|
|
|
155
244
|
class FunctionInput(BaseModel):
|
|
156
245
|
"""Defines a formula to be applied, including the output field information."""
|
|
246
|
+
|
|
157
247
|
field: FieldInput
|
|
158
248
|
function: str
|
|
159
249
|
|
|
160
250
|
def __init__(self, field: FieldInput = None, function: str = None, **data):
|
|
161
251
|
if field is not None:
|
|
162
|
-
data[
|
|
252
|
+
data["field"] = field
|
|
163
253
|
if function is not None:
|
|
164
|
-
data[
|
|
254
|
+
data["function"] = function
|
|
165
255
|
super().__init__(**data)
|
|
166
256
|
|
|
167
257
|
|
|
168
258
|
class BasicFilter(BaseModel):
|
|
169
|
-
"""Defines a simple, single-condition filter (e.g., 'column' 'equals' 'value').
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
259
|
+
"""Defines a simple, single-condition filter (e.g., 'column' 'equals' 'value').
|
|
260
|
+
|
|
261
|
+
Attributes:
|
|
262
|
+
field: The column name to filter on.
|
|
263
|
+
operator: The comparison operator (FilterOperator enum value or symbol).
|
|
264
|
+
value: The value to compare against.
|
|
265
|
+
value2: Second value for BETWEEN operator (optional).
|
|
266
|
+
"""
|
|
267
|
+
|
|
268
|
+
field: str = ""
|
|
269
|
+
operator: FilterOperator | str = FilterOperator.EQUALS
|
|
270
|
+
value: str = ""
|
|
271
|
+
value2: str | None = None # For BETWEEN operator
|
|
272
|
+
|
|
273
|
+
# Keep old field names for backward compatibility
|
|
274
|
+
filter_type: str | None = None
|
|
275
|
+
filter_value: str | None = None
|
|
276
|
+
|
|
277
|
+
def __init__(
|
|
278
|
+
self,
|
|
279
|
+
field: str = None,
|
|
280
|
+
operator: FilterOperator | str = None,
|
|
281
|
+
value: str = None,
|
|
282
|
+
value2: str = None,
|
|
283
|
+
# Backward compatibility parameters
|
|
284
|
+
filter_type: str = None,
|
|
285
|
+
filter_value: str = None,
|
|
286
|
+
**data,
|
|
287
|
+
):
|
|
288
|
+
# Handle backward compatibility
|
|
289
|
+
if filter_type is not None and operator is None:
|
|
290
|
+
data["operator"] = filter_type
|
|
291
|
+
elif operator is not None:
|
|
292
|
+
data["operator"] = operator
|
|
293
|
+
|
|
294
|
+
if filter_value is not None and value is None:
|
|
295
|
+
data["value"] = filter_value
|
|
296
|
+
elif value is not None:
|
|
297
|
+
data["value"] = value
|
|
173
298
|
|
|
174
|
-
def __init__(self, field: str = None, filter_type: str = None, filter_value: str = None, **data):
|
|
175
299
|
if field is not None:
|
|
176
|
-
data[
|
|
177
|
-
if
|
|
178
|
-
data[
|
|
179
|
-
|
|
180
|
-
data['filter_value'] = filter_value
|
|
300
|
+
data["field"] = field
|
|
301
|
+
if value2 is not None:
|
|
302
|
+
data["value2"] = value2
|
|
303
|
+
|
|
181
304
|
super().__init__(**data)
|
|
182
305
|
|
|
306
|
+
@model_validator(mode="after")
|
|
307
|
+
def normalize_operator(self):
|
|
308
|
+
"""Normalize the operator to FilterOperator enum."""
|
|
309
|
+
if isinstance(self.operator, str):
|
|
310
|
+
try:
|
|
311
|
+
self.operator = FilterOperator.from_symbol(self.operator)
|
|
312
|
+
except ValueError:
|
|
313
|
+
# Keep as string if conversion fails (for backward compat)
|
|
314
|
+
pass
|
|
315
|
+
return self
|
|
316
|
+
|
|
317
|
+
def get_operator(self) -> FilterOperator:
|
|
318
|
+
"""Get the operator as FilterOperator enum."""
|
|
319
|
+
if isinstance(self.operator, FilterOperator):
|
|
320
|
+
return self.operator
|
|
321
|
+
return FilterOperator.from_symbol(self.operator)
|
|
322
|
+
|
|
323
|
+
def to_yaml_dict(self) -> BasicFilterYaml:
|
|
324
|
+
"""Serialize for YAML output."""
|
|
325
|
+
result: BasicFilterYaml = {
|
|
326
|
+
"field": self.field,
|
|
327
|
+
"operator": self.operator.value if isinstance(self.operator, FilterOperator) else self.operator,
|
|
328
|
+
"value": self.value,
|
|
329
|
+
}
|
|
330
|
+
if self.value2:
|
|
331
|
+
result["value2"] = self.value2
|
|
332
|
+
return result
|
|
333
|
+
|
|
334
|
+
@classmethod
|
|
335
|
+
def from_yaml_dict(cls, data: dict) -> "BasicFilter":
|
|
336
|
+
"""Load from YAML format."""
|
|
337
|
+
return cls(
|
|
338
|
+
field=data.get("field", ""),
|
|
339
|
+
operator=data.get("operator", FilterOperator.EQUALS),
|
|
340
|
+
value=data.get("value", ""),
|
|
341
|
+
value2=data.get("value2"),
|
|
342
|
+
)
|
|
343
|
+
|
|
183
344
|
|
|
184
345
|
class FilterInput(BaseModel):
|
|
185
|
-
"""Defines the settings for a filter operation, supporting basic or advanced (expression-based) modes.
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
346
|
+
"""Defines the settings for a filter operation, supporting basic or advanced (expression-based) modes.
|
|
347
|
+
|
|
348
|
+
Attributes:
|
|
349
|
+
mode: The filter mode - "basic" or "advanced".
|
|
350
|
+
basic_filter: The basic filter configuration (used when mode="basic").
|
|
351
|
+
advanced_filter: The advanced filter expression string (used when mode="advanced").
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
mode: FilterModeLiteral = "basic"
|
|
355
|
+
basic_filter: BasicFilter | None = None
|
|
356
|
+
advanced_filter: str = ""
|
|
357
|
+
|
|
358
|
+
# Keep old field name for backward compatibility
|
|
359
|
+
filter_type: str | None = None
|
|
360
|
+
|
|
361
|
+
def __init__(
|
|
362
|
+
self,
|
|
363
|
+
mode: FilterModeLiteral = None,
|
|
364
|
+
basic_filter: BasicFilter = None,
|
|
365
|
+
advanced_filter: str = None,
|
|
366
|
+
# Backward compatibility
|
|
367
|
+
filter_type: str = None,
|
|
368
|
+
**data,
|
|
369
|
+
):
|
|
370
|
+
# Handle backward compatibility: filter_type -> mode
|
|
371
|
+
if filter_type is not None and mode is None:
|
|
372
|
+
data["mode"] = filter_type
|
|
373
|
+
elif mode is not None:
|
|
374
|
+
data["mode"] = mode
|
|
189
375
|
|
|
190
|
-
def __init__(self, advanced_filter: str = None, basic_filter: BasicFilter = None,
|
|
191
|
-
filter_type: str = None, **data):
|
|
192
376
|
if advanced_filter is not None:
|
|
193
|
-
data[
|
|
377
|
+
data["advanced_filter"] = advanced_filter
|
|
194
378
|
if basic_filter is not None:
|
|
195
|
-
data[
|
|
196
|
-
|
|
197
|
-
data['filter_type'] = filter_type
|
|
379
|
+
data["basic_filter"] = basic_filter
|
|
380
|
+
|
|
198
381
|
super().__init__(**data)
|
|
199
382
|
|
|
383
|
+
@model_validator(mode="after")
|
|
384
|
+
def ensure_basic_filter(self):
|
|
385
|
+
"""Ensure basic_filter exists when mode is basic."""
|
|
386
|
+
if self.mode == "basic" and self.basic_filter is None:
|
|
387
|
+
self.basic_filter = BasicFilter()
|
|
388
|
+
return self
|
|
389
|
+
|
|
390
|
+
def is_advanced(self) -> bool:
|
|
391
|
+
"""Check if filter is in advanced mode."""
|
|
392
|
+
return self.mode == "advanced"
|
|
393
|
+
|
|
394
|
+
def to_yaml_dict(self) -> FilterInputYaml:
|
|
395
|
+
"""Serialize for YAML output."""
|
|
396
|
+
result: FilterInputYaml = {"mode": self.mode}
|
|
397
|
+
if self.mode == "basic" and self.basic_filter:
|
|
398
|
+
result["basic_filter"] = self.basic_filter.to_yaml_dict()
|
|
399
|
+
elif self.mode == "advanced" and self.advanced_filter:
|
|
400
|
+
result["advanced_filter"] = self.advanced_filter
|
|
401
|
+
return result
|
|
402
|
+
|
|
403
|
+
@classmethod
|
|
404
|
+
def from_yaml_dict(cls, data: dict) -> "FilterInput":
|
|
405
|
+
"""Load from YAML format."""
|
|
406
|
+
mode = data.get("mode", "basic")
|
|
407
|
+
basic_filter = None
|
|
408
|
+
if "basic_filter" in data:
|
|
409
|
+
basic_filter = BasicFilter.from_yaml_dict(data["basic_filter"])
|
|
410
|
+
return cls(
|
|
411
|
+
mode=mode,
|
|
412
|
+
basic_filter=basic_filter,
|
|
413
|
+
advanced_filter=data.get("advanced_filter", ""),
|
|
414
|
+
)
|
|
415
|
+
|
|
200
416
|
|
|
201
417
|
class SelectInputs(BaseModel):
|
|
202
418
|
"""A container for a list of `SelectInput` objects (pure data, no logic)."""
|
|
203
|
-
renames: List[SelectInput] = Field(default_factory=list)
|
|
204
419
|
|
|
205
|
-
|
|
420
|
+
renames: list[SelectInput] = Field(default_factory=list)
|
|
421
|
+
|
|
422
|
+
def __init__(self, renames: list[SelectInput] = None, **kwargs):
|
|
206
423
|
if renames is not None:
|
|
207
|
-
kwargs[
|
|
424
|
+
kwargs["renames"] = renames
|
|
208
425
|
else:
|
|
209
|
-
kwargs[
|
|
426
|
+
kwargs["renames"] = []
|
|
210
427
|
super().__init__(**kwargs)
|
|
211
428
|
|
|
212
429
|
def to_yaml_dict(self) -> JoinInputsYaml:
|
|
@@ -220,7 +437,7 @@ class SelectInputs(BaseModel):
|
|
|
220
437
|
return cls(renames=[SelectInput.from_yaml_dict(item) for item in items])
|
|
221
438
|
|
|
222
439
|
@classmethod
|
|
223
|
-
def create_from_list(cls, col_list:
|
|
440
|
+
def create_from_list(cls, col_list: list[str]) -> "SelectInputs":
|
|
224
441
|
"""Creates a SelectInputs object from a simple list of column names."""
|
|
225
442
|
return cls(renames=[SelectInput(old_name=c) for c in col_list])
|
|
226
443
|
|
|
@@ -229,31 +446,36 @@ class SelectInputs(BaseModel):
|
|
|
229
446
|
"""Creates a SelectInputs object from a Polars DataFrame's columns."""
|
|
230
447
|
return cls(renames=[SelectInput(old_name=c) for c in df.columns])
|
|
231
448
|
|
|
449
|
+
def remove_select_input(self, old_key: str) -> None:
|
|
450
|
+
"""Removes a SelectInput from the list based on its original name."""
|
|
451
|
+
self.renames = [rename for rename in self.renames if rename.old_name != old_key]
|
|
452
|
+
|
|
232
453
|
|
|
233
454
|
class JoinInputs(SelectInputs):
|
|
234
455
|
"""Data model for join-specific select inputs (extends SelectInputs)."""
|
|
235
456
|
|
|
236
|
-
def __init__(self, renames:
|
|
457
|
+
def __init__(self, renames: list[SelectInput] = None, **kwargs):
|
|
237
458
|
if renames is not None:
|
|
238
|
-
kwargs[
|
|
459
|
+
kwargs["renames"] = renames
|
|
239
460
|
else:
|
|
240
|
-
kwargs[
|
|
461
|
+
kwargs["renames"] = []
|
|
241
462
|
super().__init__(**kwargs)
|
|
242
463
|
|
|
243
464
|
|
|
244
465
|
class JoinMap(BaseModel):
|
|
245
466
|
"""Defines a single mapping between a left and right column for a join key."""
|
|
246
|
-
|
|
247
|
-
|
|
467
|
+
|
|
468
|
+
left_col: str | None = None
|
|
469
|
+
right_col: str | None = None
|
|
248
470
|
|
|
249
471
|
def __init__(self, left_col: str = None, right_col: str = None, **data):
|
|
250
472
|
if left_col is not None:
|
|
251
|
-
data[
|
|
473
|
+
data["left_col"] = left_col
|
|
252
474
|
if right_col is not None:
|
|
253
|
-
data[
|
|
475
|
+
data["right_col"] = right_col
|
|
254
476
|
super().__init__(**data)
|
|
255
477
|
|
|
256
|
-
@model_validator(mode=
|
|
478
|
+
@model_validator(mode="after")
|
|
257
479
|
def set_default_right_col(self):
|
|
258
480
|
"""If right_col is None, default it to left_col."""
|
|
259
481
|
if self.right_col is None:
|
|
@@ -263,30 +485,31 @@ class JoinMap(BaseModel):
|
|
|
263
485
|
|
|
264
486
|
class CrossJoinInput(BaseModel):
|
|
265
487
|
"""Data model for cross join operations."""
|
|
488
|
+
|
|
266
489
|
left_select: JoinInputs
|
|
267
490
|
right_select: JoinInputs
|
|
268
491
|
|
|
269
|
-
@model_validator(mode=
|
|
492
|
+
@model_validator(mode="before")
|
|
270
493
|
@classmethod
|
|
271
494
|
def parse_inputs(cls, data: Any) -> Any:
|
|
272
495
|
"""Parse flexible input formats before validation."""
|
|
273
496
|
if isinstance(data, dict):
|
|
274
497
|
# Parse join_mapping
|
|
275
|
-
if
|
|
276
|
-
data[
|
|
498
|
+
if "join_mapping" in data:
|
|
499
|
+
data["join_mapping"] = cls._parse_join_mapping(data["join_mapping"])
|
|
277
500
|
|
|
278
501
|
# Parse left_select
|
|
279
|
-
if
|
|
280
|
-
data[
|
|
502
|
+
if "left_select" in data:
|
|
503
|
+
data["left_select"] = cls._parse_select(data["left_select"])
|
|
281
504
|
|
|
282
505
|
# Parse right_select
|
|
283
|
-
if
|
|
284
|
-
data[
|
|
506
|
+
if "right_select" in data:
|
|
507
|
+
data["right_select"] = cls._parse_select(data["right_select"])
|
|
285
508
|
|
|
286
509
|
return data
|
|
287
510
|
|
|
288
511
|
@staticmethod
|
|
289
|
-
def _parse_join_mapping(join_mapping: Any) ->
|
|
512
|
+
def _parse_join_mapping(join_mapping: Any) -> list[JoinMap]:
|
|
290
513
|
"""Parse various join_mapping formats."""
|
|
291
514
|
# Already a list of JoinMaps
|
|
292
515
|
if isinstance(join_mapping, list):
|
|
@@ -336,22 +559,24 @@ class CrossJoinInput(BaseModel):
|
|
|
336
559
|
|
|
337
560
|
# Dict with 'select' (new YAML) or 'renames' (internal) key
|
|
338
561
|
if isinstance(select, dict):
|
|
339
|
-
if
|
|
340
|
-
return JoinInputs(renames=[SelectInput.from_yaml_dict(s) for s in select[
|
|
341
|
-
if
|
|
562
|
+
if "select" in select:
|
|
563
|
+
return JoinInputs(renames=[SelectInput.from_yaml_dict(s) for s in select["select"]])
|
|
564
|
+
if "renames" in select:
|
|
342
565
|
return JoinInputs(**select)
|
|
343
566
|
|
|
344
567
|
raise ValueError(f"Invalid select format: {type(select)}")
|
|
345
568
|
|
|
346
|
-
def __init__(
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
569
|
+
def __init__(
|
|
570
|
+
self,
|
|
571
|
+
left_select: JoinInputs | list[SelectInput] | list[str] = None,
|
|
572
|
+
right_select: JoinInputs | list[SelectInput] | list[str] = None,
|
|
573
|
+
**data,
|
|
574
|
+
):
|
|
350
575
|
"""Custom init for backward compatibility with positional arguments."""
|
|
351
576
|
if left_select is not None:
|
|
352
|
-
data[
|
|
577
|
+
data["left_select"] = left_select
|
|
353
578
|
if right_select is not None:
|
|
354
|
-
data[
|
|
579
|
+
data["right_select"] = right_select
|
|
355
580
|
super().__init__(**data)
|
|
356
581
|
|
|
357
582
|
def to_yaml_dict(self) -> CrossJoinInputYaml:
|
|
@@ -361,35 +586,43 @@ class CrossJoinInput(BaseModel):
|
|
|
361
586
|
"right_select": self.right_select.to_yaml_dict(),
|
|
362
587
|
}
|
|
363
588
|
|
|
589
|
+
def add_new_select_column(self, select_input: SelectInput, side: str) -> None:
|
|
590
|
+
"""Adds a new column to the selection for either the left or right side."""
|
|
591
|
+
target_input = self.right_select if side == "right" else self.left_select
|
|
592
|
+
if select_input.new_name is None:
|
|
593
|
+
select_input.new_name = select_input.old_name
|
|
594
|
+
target_input.renames.append(select_input)
|
|
595
|
+
|
|
364
596
|
|
|
365
597
|
class JoinInput(BaseModel):
|
|
366
598
|
"""Data model for standard SQL-style join operations."""
|
|
367
|
-
|
|
599
|
+
|
|
600
|
+
join_mapping: list[JoinMap]
|
|
368
601
|
left_select: JoinInputs
|
|
369
602
|
right_select: JoinInputs
|
|
370
|
-
how: JoinStrategy =
|
|
603
|
+
how: JoinStrategy = "inner"
|
|
371
604
|
|
|
372
|
-
@model_validator(mode=
|
|
605
|
+
@model_validator(mode="before")
|
|
373
606
|
@classmethod
|
|
374
607
|
def parse_inputs(cls, data: Any) -> Any:
|
|
375
608
|
"""Parse flexible input formats before validation."""
|
|
376
609
|
if isinstance(data, dict):
|
|
377
610
|
# Parse join_mapping
|
|
378
|
-
if
|
|
379
|
-
data[
|
|
611
|
+
if "join_mapping" in data:
|
|
612
|
+
data["join_mapping"] = cls._parse_join_mapping(data["join_mapping"])
|
|
380
613
|
|
|
381
614
|
# Parse left_select
|
|
382
|
-
if
|
|
383
|
-
data[
|
|
615
|
+
if "left_select" in data:
|
|
616
|
+
data["left_select"] = cls._parse_select(data["left_select"])
|
|
384
617
|
|
|
385
618
|
# Parse right_select
|
|
386
|
-
if
|
|
387
|
-
data[
|
|
619
|
+
if "right_select" in data:
|
|
620
|
+
data["right_select"] = cls._parse_select(data["right_select"])
|
|
388
621
|
|
|
389
622
|
return data
|
|
390
623
|
|
|
391
624
|
@staticmethod
|
|
392
|
-
def _parse_join_mapping(join_mapping: Any) ->
|
|
625
|
+
def _parse_join_mapping(join_mapping: Any) -> list[JoinMap]:
|
|
393
626
|
"""Parse various join_mapping formats."""
|
|
394
627
|
# Already a list of JoinMaps
|
|
395
628
|
if isinstance(join_mapping, list):
|
|
@@ -439,28 +672,30 @@ class JoinInput(BaseModel):
|
|
|
439
672
|
|
|
440
673
|
# Dict with 'select' (new YAML) or 'renames' (internal) key
|
|
441
674
|
if isinstance(select, dict):
|
|
442
|
-
if
|
|
443
|
-
return JoinInputs(renames=[SelectInput.from_yaml_dict(s) for s in select[
|
|
444
|
-
if
|
|
675
|
+
if "select" in select:
|
|
676
|
+
return JoinInputs(renames=[SelectInput.from_yaml_dict(s) for s in select["select"]])
|
|
677
|
+
if "renames" in select:
|
|
445
678
|
return JoinInputs(**select)
|
|
446
679
|
|
|
447
680
|
raise ValueError(f"Invalid select format: {type(select)}")
|
|
448
681
|
|
|
449
|
-
def __init__(
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
682
|
+
def __init__(
|
|
683
|
+
self,
|
|
684
|
+
join_mapping: list[JoinMap] | JoinMap | tuple[str, str] | str | list[tuple] | list[str] = None,
|
|
685
|
+
left_select: JoinInputs | list[SelectInput] | list[str] = None,
|
|
686
|
+
right_select: JoinInputs | list[SelectInput] | list[str] = None,
|
|
687
|
+
how: JoinStrategy = "inner",
|
|
688
|
+
**data,
|
|
689
|
+
):
|
|
455
690
|
"""Custom init for backward compatibility with positional arguments."""
|
|
456
691
|
if join_mapping is not None:
|
|
457
|
-
data[
|
|
692
|
+
data["join_mapping"] = join_mapping
|
|
458
693
|
if left_select is not None:
|
|
459
|
-
data[
|
|
694
|
+
data["left_select"] = left_select
|
|
460
695
|
if right_select is not None:
|
|
461
|
-
data[
|
|
696
|
+
data["right_select"] = right_select
|
|
462
697
|
if how is not None:
|
|
463
|
-
data[
|
|
698
|
+
data["how"] = how
|
|
464
699
|
|
|
465
700
|
super().__init__(**data)
|
|
466
701
|
|
|
@@ -473,24 +708,34 @@ class JoinInput(BaseModel):
|
|
|
473
708
|
"how": self.how,
|
|
474
709
|
}
|
|
475
710
|
|
|
711
|
+
def add_new_select_column(self, select_input: SelectInput, side: str) -> None:
|
|
712
|
+
"""Adds a new column to the selection for either the left or right side."""
|
|
713
|
+
target_input = self.right_select if side == "right" else self.left_select
|
|
714
|
+
if select_input.new_name is None:
|
|
715
|
+
select_input.new_name = select_input.old_name
|
|
716
|
+
target_input.renames.append(select_input)
|
|
717
|
+
|
|
476
718
|
|
|
477
719
|
class FuzzyMatchInput(BaseModel):
|
|
478
720
|
"""Data model for fuzzy matching join operations."""
|
|
479
|
-
|
|
721
|
+
|
|
722
|
+
join_mapping: list[FuzzyMapping]
|
|
480
723
|
left_select: JoinInputs
|
|
481
724
|
right_select: JoinInputs
|
|
482
|
-
how: JoinStrategy =
|
|
725
|
+
how: JoinStrategy = "inner"
|
|
483
726
|
aggregate_output: bool = False
|
|
484
727
|
|
|
485
|
-
def __init__(
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
728
|
+
def __init__(
|
|
729
|
+
self,
|
|
730
|
+
left_select: JoinInputs | list[SelectInput] | list[str] = None,
|
|
731
|
+
right_select: JoinInputs | list[SelectInput] | list[str] = None,
|
|
732
|
+
**data,
|
|
733
|
+
):
|
|
489
734
|
"""Custom init for backward compatibility with positional arguments."""
|
|
490
735
|
if left_select is not None:
|
|
491
|
-
data[
|
|
736
|
+
data["left_select"] = left_select
|
|
492
737
|
if right_select is not None:
|
|
493
|
-
data[
|
|
738
|
+
data["right_select"] = right_select
|
|
494
739
|
|
|
495
740
|
super().__init__(**data)
|
|
496
741
|
|
|
@@ -504,6 +749,13 @@ class FuzzyMatchInput(BaseModel):
|
|
|
504
749
|
"aggregate_output": self.aggregate_output,
|
|
505
750
|
}
|
|
506
751
|
|
|
752
|
+
def add_new_select_column(self, select_input: SelectInput, side: str) -> None:
|
|
753
|
+
"""Adds a new column to the selection for either the left or right side."""
|
|
754
|
+
target_input = self.right_select if side == "right" else self.left_select
|
|
755
|
+
if select_input.new_name is None:
|
|
756
|
+
select_input.new_name = select_input.old_name
|
|
757
|
+
target_input.renames.append(select_input)
|
|
758
|
+
|
|
507
759
|
@staticmethod
|
|
508
760
|
def _parse_select(select: Any) -> JoinInputs:
|
|
509
761
|
"""Parse various select input formats."""
|
|
@@ -522,25 +774,25 @@ class FuzzyMatchInput(BaseModel):
|
|
|
522
774
|
|
|
523
775
|
# Dict with 'select' (new YAML) or 'renames' (internal) key
|
|
524
776
|
if isinstance(select, dict):
|
|
525
|
-
if
|
|
526
|
-
return JoinInputs(renames=[SelectInput.from_yaml_dict(s) for s in select[
|
|
527
|
-
if
|
|
777
|
+
if "select" in select:
|
|
778
|
+
return JoinInputs(renames=[SelectInput.from_yaml_dict(s) for s in select["select"]])
|
|
779
|
+
if "renames" in select:
|
|
528
780
|
return JoinInputs(**select)
|
|
529
781
|
|
|
530
782
|
raise ValueError(f"Invalid select format: {type(select)}")
|
|
531
783
|
|
|
532
|
-
@model_validator(mode=
|
|
784
|
+
@model_validator(mode="before")
|
|
533
785
|
@classmethod
|
|
534
786
|
def parse_inputs(cls, data: Any) -> Any:
|
|
535
787
|
"""Parse flexible input formats before validation."""
|
|
536
788
|
if isinstance(data, dict):
|
|
537
789
|
# Parse left_select
|
|
538
|
-
if
|
|
539
|
-
data[
|
|
790
|
+
if "left_select" in data:
|
|
791
|
+
data["left_select"] = cls._parse_select(data["left_select"])
|
|
540
792
|
|
|
541
793
|
# Parse right_select
|
|
542
|
-
if
|
|
543
|
-
data[
|
|
794
|
+
if "right_select" in data:
|
|
795
|
+
data["right_select"] = cls._parse_select(data["right_select"])
|
|
544
796
|
|
|
545
797
|
return data
|
|
546
798
|
|
|
@@ -574,26 +826,27 @@ class AggColl(BaseModel):
|
|
|
574
826
|
output_type='float'
|
|
575
827
|
)
|
|
576
828
|
"""
|
|
829
|
+
|
|
577
830
|
old_name: str
|
|
578
831
|
agg: str
|
|
579
|
-
new_name:
|
|
580
|
-
output_type:
|
|
832
|
+
new_name: str | None = None
|
|
833
|
+
output_type: str | None = None
|
|
581
834
|
|
|
582
|
-
def __init__(self, old_name: str, agg: str, new_name:
|
|
583
|
-
data = {
|
|
835
|
+
def __init__(self, old_name: str, agg: str, new_name: str | None = None, output_type: str | None = None):
|
|
836
|
+
data = {"old_name": old_name, "agg": agg}
|
|
584
837
|
if new_name is not None:
|
|
585
|
-
data[
|
|
838
|
+
data["new_name"] = new_name
|
|
586
839
|
if output_type is not None:
|
|
587
|
-
data[
|
|
840
|
+
data["output_type"] = output_type
|
|
588
841
|
|
|
589
842
|
super().__init__(**data)
|
|
590
843
|
|
|
591
|
-
@model_validator(mode=
|
|
844
|
+
@model_validator(mode="after")
|
|
592
845
|
def set_defaults(self):
|
|
593
846
|
"""Set default new_name and output_type based on agg function."""
|
|
594
847
|
# Set new_name
|
|
595
848
|
if self.new_name is None:
|
|
596
|
-
if self.agg !=
|
|
849
|
+
if self.agg != "groupby":
|
|
597
850
|
self.new_name = self.old_name + "_" + self.agg
|
|
598
851
|
else:
|
|
599
852
|
self.new_name = self.old_name
|
|
@@ -610,9 +863,9 @@ class AggColl(BaseModel):
|
|
|
610
863
|
@property
|
|
611
864
|
def agg_func(self):
|
|
612
865
|
"""Returns the corresponding Polars aggregation function from the `agg` string."""
|
|
613
|
-
if self.agg ==
|
|
866
|
+
if self.agg == "groupby":
|
|
614
867
|
return self.agg
|
|
615
|
-
elif self.agg ==
|
|
868
|
+
elif self.agg == "concat":
|
|
616
869
|
return string_concat
|
|
617
870
|
else:
|
|
618
871
|
return getattr(pl, self.agg) if isinstance(self.agg, str) else self.agg
|
|
@@ -636,33 +889,36 @@ class GroupByInput(BaseModel):
|
|
|
636
889
|
AggColl(old_name='col1', agg='sum'), AggColl(old_name='col2', agg='mean')]
|
|
637
890
|
)
|
|
638
891
|
"""
|
|
639
|
-
agg_cols: List[AggColl]
|
|
640
892
|
|
|
641
|
-
|
|
893
|
+
agg_cols: list[AggColl]
|
|
894
|
+
|
|
895
|
+
def __init__(self, agg_cols: list[AggColl]):
|
|
642
896
|
"""Backwards compatibility implementation"""
|
|
643
897
|
super().__init__(agg_cols=agg_cols)
|
|
644
898
|
|
|
645
899
|
|
|
646
900
|
class PivotInput(BaseModel):
|
|
647
901
|
"""Defines the settings for a pivot (long-to-wide) operation."""
|
|
648
|
-
|
|
902
|
+
|
|
903
|
+
index_columns: list[str]
|
|
649
904
|
pivot_column: str
|
|
650
905
|
value_col: str
|
|
651
|
-
aggregations:
|
|
906
|
+
aggregations: list[str]
|
|
652
907
|
|
|
653
908
|
@property
|
|
654
|
-
def grouped_columns(self) ->
|
|
909
|
+
def grouped_columns(self) -> list[str]:
|
|
655
910
|
"""Returns the list of columns to be used for the initial grouping stage of the pivot."""
|
|
656
911
|
return self.index_columns + [self.pivot_column]
|
|
657
912
|
|
|
658
913
|
def get_group_by_input(self) -> GroupByInput:
|
|
659
914
|
"""Constructs the `GroupByInput` needed for the pre-aggregation step of the pivot."""
|
|
660
|
-
group_by_cols = [AggColl(old_name=c, agg=
|
|
661
|
-
agg_cols = [
|
|
662
|
-
|
|
915
|
+
group_by_cols = [AggColl(old_name=c, agg="groupby") for c in self.grouped_columns]
|
|
916
|
+
agg_cols = [
|
|
917
|
+
AggColl(old_name=self.value_col, agg=aggregation, new_name=aggregation) for aggregation in self.aggregations
|
|
918
|
+
]
|
|
663
919
|
return GroupByInput(agg_cols=group_by_cols + agg_cols)
|
|
664
920
|
|
|
665
|
-
def get_index_columns(self) ->
|
|
921
|
+
def get_index_columns(self) -> list[pl.col]:
|
|
666
922
|
"""Returns the index columns as Polars column expressions."""
|
|
667
923
|
return [pl.col(c) for c in self.index_columns]
|
|
668
924
|
|
|
@@ -672,50 +928,54 @@ class PivotInput(BaseModel):
|
|
|
672
928
|
|
|
673
929
|
def get_values_expr(self) -> pl.Expr:
|
|
674
930
|
"""Creates the struct expression used to gather the values for pivoting."""
|
|
675
|
-
return pl.struct([pl.col(c) for c in self.aggregations]).alias(
|
|
931
|
+
return pl.struct([pl.col(c) for c in self.aggregations]).alias("vals")
|
|
676
932
|
|
|
677
933
|
|
|
678
934
|
class SortByInput(BaseModel):
|
|
679
935
|
"""Defines a single sort condition on a column, including the direction."""
|
|
936
|
+
|
|
680
937
|
column: str
|
|
681
|
-
how:
|
|
938
|
+
how: str | None = "asc"
|
|
682
939
|
|
|
683
940
|
|
|
684
941
|
class RecordIdInput(BaseModel):
|
|
685
942
|
"""Defines settings for adding a record ID (row number) column to the data."""
|
|
686
|
-
|
|
943
|
+
|
|
944
|
+
output_column_name: str = "record_id"
|
|
687
945
|
offset: int = 1
|
|
688
|
-
group_by:
|
|
689
|
-
group_by_columns:
|
|
946
|
+
group_by: bool | None = False
|
|
947
|
+
group_by_columns: list[str] | None = Field(default_factory=list)
|
|
690
948
|
|
|
691
949
|
|
|
692
950
|
class TextToRowsInput(BaseModel):
|
|
693
951
|
"""Defines settings for splitting a text column into multiple rows based on a delimiter."""
|
|
952
|
+
|
|
694
953
|
column_to_split: str
|
|
695
|
-
output_column_name:
|
|
696
|
-
split_by_fixed_value:
|
|
697
|
-
split_fixed_value:
|
|
698
|
-
split_by_column:
|
|
954
|
+
output_column_name: str | None = None
|
|
955
|
+
split_by_fixed_value: bool | None = True
|
|
956
|
+
split_fixed_value: str | None = ","
|
|
957
|
+
split_by_column: str | None = None
|
|
699
958
|
|
|
700
959
|
|
|
701
960
|
class UnpivotInput(BaseModel):
|
|
702
961
|
"""Defines settings for an unpivot (wide-to-long) operation."""
|
|
962
|
+
|
|
703
963
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
704
964
|
|
|
705
|
-
index_columns:
|
|
706
|
-
value_columns:
|
|
707
|
-
data_type_selector:
|
|
708
|
-
data_type_selector_mode: Literal[
|
|
965
|
+
index_columns: list[str] = Field(default_factory=list)
|
|
966
|
+
value_columns: list[str] = Field(default_factory=list)
|
|
967
|
+
data_type_selector: Literal["float", "all", "date", "numeric", "string"] | None = None
|
|
968
|
+
data_type_selector_mode: Literal["data_type", "column"] = "column"
|
|
709
969
|
|
|
710
970
|
@property
|
|
711
|
-
def data_type_selector_expr(self) ->
|
|
971
|
+
def data_type_selector_expr(self) -> Callable | None:
|
|
712
972
|
"""Returns a Polars selector function based on the `data_type_selector` string."""
|
|
713
|
-
if self.data_type_selector_mode ==
|
|
973
|
+
if self.data_type_selector_mode == "data_type":
|
|
714
974
|
if self.data_type_selector is not None:
|
|
715
975
|
try:
|
|
716
976
|
return getattr(selectors, self.data_type_selector)
|
|
717
|
-
except Exception
|
|
718
|
-
print(f
|
|
977
|
+
except Exception:
|
|
978
|
+
print(f"Could not find the selector: {self.data_type_selector}")
|
|
719
979
|
return selectors.all
|
|
720
980
|
return selectors.all
|
|
721
981
|
return None
|
|
@@ -723,24 +983,28 @@ class UnpivotInput(BaseModel):
|
|
|
723
983
|
|
|
724
984
|
class UnionInput(BaseModel):
|
|
725
985
|
"""Defines settings for a union (concatenation) operation."""
|
|
726
|
-
|
|
986
|
+
|
|
987
|
+
mode: Literal["selective", "relaxed"] = "relaxed"
|
|
727
988
|
|
|
728
989
|
|
|
729
990
|
class UniqueInput(BaseModel):
|
|
730
991
|
"""Defines settings for a uniqueness operation, specifying columns and which row to keep."""
|
|
731
|
-
|
|
992
|
+
|
|
993
|
+
columns: list[str] | None = None
|
|
732
994
|
strategy: Literal["first", "last", "any", "none"] = "any"
|
|
733
995
|
|
|
734
996
|
|
|
735
997
|
class GraphSolverInput(BaseModel):
|
|
736
998
|
"""Defines settings for a graph-solving operation (e.g., finding connected components)."""
|
|
999
|
+
|
|
737
1000
|
col_from: str
|
|
738
1001
|
col_to: str
|
|
739
|
-
output_column_name:
|
|
1002
|
+
output_column_name: str | None = "graph_group"
|
|
740
1003
|
|
|
741
1004
|
|
|
742
1005
|
class PolarsCodeInput(BaseModel):
|
|
743
1006
|
"""A simple container for a string of user-provided Polars code to be executed."""
|
|
1007
|
+
|
|
744
1008
|
polars_code: str
|
|
745
1009
|
|
|
746
1010
|
|
|
@@ -752,42 +1016,39 @@ class SelectInputsManager:
|
|
|
752
1016
|
|
|
753
1017
|
# === Query Methods (read-only) ===
|
|
754
1018
|
|
|
755
|
-
def get_old_cols(self) ->
|
|
1019
|
+
def get_old_cols(self) -> set[str]:
|
|
756
1020
|
"""Returns a set of original column names to be kept in the selection."""
|
|
757
1021
|
return set(v.old_name for v in self.select_inputs.renames if v.keep)
|
|
758
1022
|
|
|
759
|
-
def get_new_cols(self) ->
|
|
1023
|
+
def get_new_cols(self) -> set[str]:
|
|
760
1024
|
"""Returns a set of new (renamed) column names to be kept in the selection."""
|
|
761
1025
|
return set(v.new_name for v in self.select_inputs.renames if v.keep)
|
|
762
1026
|
|
|
763
1027
|
def get_rename_table(self) -> dict[str, str]:
|
|
764
1028
|
"""Generates a dictionary for use in Polars' `.rename()` method."""
|
|
765
|
-
return {v.old_name: v.new_name for v in self.select_inputs.renames
|
|
766
|
-
if v.is_available and (v.keep or v.join_key)}
|
|
1029
|
+
return {v.old_name: v.new_name for v in self.select_inputs.renames if v.is_available and (v.keep or v.join_key)}
|
|
767
1030
|
|
|
768
|
-
def get_select_cols(self, include_join_key: bool = True) ->
|
|
1031
|
+
def get_select_cols(self, include_join_key: bool = True) -> list[str]:
|
|
769
1032
|
"""Gets a list of original column names to select from the source DataFrame."""
|
|
770
|
-
return [v.old_name for v in self.select_inputs.renames
|
|
771
|
-
if v.keep or (v.join_key and include_join_key)]
|
|
1033
|
+
return [v.old_name for v in self.select_inputs.renames if v.keep or (v.join_key and include_join_key)]
|
|
772
1034
|
|
|
773
1035
|
def has_drop_cols(self) -> bool:
|
|
774
1036
|
"""Checks if any column is marked to be dropped from the selection."""
|
|
775
1037
|
return any(not v.keep for v in self.select_inputs.renames)
|
|
776
1038
|
|
|
777
|
-
def get_drop_columns(self) ->
|
|
1039
|
+
def get_drop_columns(self) -> list[SelectInput]:
|
|
778
1040
|
"""Returns a list of SelectInput objects that are marked to be dropped."""
|
|
779
1041
|
return [v for v in self.select_inputs.renames if not v.keep and v.is_available]
|
|
780
1042
|
|
|
781
|
-
def get_non_jk_drop_columns(self) ->
|
|
1043
|
+
def get_non_jk_drop_columns(self) -> list[SelectInput]:
|
|
782
1044
|
"""Returns drop columns that are not join keys."""
|
|
783
|
-
return [v for v in self.select_inputs.renames
|
|
784
|
-
if not v.keep and v.is_available and not v.join_key]
|
|
1045
|
+
return [v for v in self.select_inputs.renames if not v.keep and v.is_available and not v.join_key]
|
|
785
1046
|
|
|
786
|
-
def find_by_old_name(self, old_name: str) ->
|
|
1047
|
+
def find_by_old_name(self, old_name: str) -> SelectInput | None:
|
|
787
1048
|
"""Find SelectInput by original column name."""
|
|
788
1049
|
return next((v for v in self.select_inputs.renames if v.old_name == old_name), None)
|
|
789
1050
|
|
|
790
|
-
def find_by_new_name(self, new_name: str) ->
|
|
1051
|
+
def find_by_new_name(self, new_name: str) -> SelectInput | None:
|
|
791
1052
|
"""Find SelectInput by new column name."""
|
|
792
1053
|
return next((v for v in self.select_inputs.renames if v.new_name == new_name), None)
|
|
793
1054
|
|
|
@@ -799,10 +1060,7 @@ class SelectInputsManager:
|
|
|
799
1060
|
|
|
800
1061
|
def remove_select_input(self, old_key: str) -> None:
|
|
801
1062
|
"""Removes a SelectInput from the list based on its original name."""
|
|
802
|
-
self.select_inputs.renames = [
|
|
803
|
-
rename for rename in self.select_inputs.renames
|
|
804
|
-
if rename.old_name != old_key
|
|
805
|
-
]
|
|
1063
|
+
self.select_inputs.renames = [rename for rename in self.select_inputs.renames if rename.old_name != old_key]
|
|
806
1064
|
|
|
807
1065
|
def unselect_field(self, old_key: str) -> None:
|
|
808
1066
|
"""Marks a field to be dropped from the final selection by setting `keep` to False."""
|
|
@@ -813,12 +1071,12 @@ class SelectInputsManager:
|
|
|
813
1071
|
# === Backward Compatibility Properties ===
|
|
814
1072
|
|
|
815
1073
|
@property
|
|
816
|
-
def old_cols(self) ->
|
|
1074
|
+
def old_cols(self) -> set[str]:
|
|
817
1075
|
"""Backward compatibility: Returns set of old column names."""
|
|
818
1076
|
return self.get_old_cols()
|
|
819
1077
|
|
|
820
1078
|
@property
|
|
821
|
-
def new_cols(self) ->
|
|
1079
|
+
def new_cols(self) -> set[str]:
|
|
822
1080
|
"""Backward compatibility: Returns set of new column names."""
|
|
823
1081
|
return self.get_new_cols()
|
|
824
1082
|
|
|
@@ -828,25 +1086,25 @@ class SelectInputsManager:
|
|
|
828
1086
|
return self.get_rename_table()
|
|
829
1087
|
|
|
830
1088
|
@property
|
|
831
|
-
def drop_columns(self) ->
|
|
1089
|
+
def drop_columns(self) -> list[SelectInput]:
|
|
832
1090
|
"""Backward compatibility: Returns list of columns to drop."""
|
|
833
1091
|
return self.get_drop_columns()
|
|
834
1092
|
|
|
835
1093
|
@property
|
|
836
|
-
def non_jk_drop_columns(self) ->
|
|
1094
|
+
def non_jk_drop_columns(self) -> list[SelectInput]:
|
|
837
1095
|
"""Backward compatibility: Returns non-join-key columns to drop."""
|
|
838
1096
|
return self.get_non_jk_drop_columns()
|
|
839
1097
|
|
|
840
1098
|
@property
|
|
841
|
-
def renames(self) ->
|
|
1099
|
+
def renames(self) -> list[SelectInput]:
|
|
842
1100
|
"""Backward compatibility: Direct access to renames list."""
|
|
843
1101
|
return self.select_inputs.renames
|
|
844
1102
|
|
|
845
|
-
def get_select_input_on_old_name(self, old_name: str) ->
|
|
1103
|
+
def get_select_input_on_old_name(self, old_name: str) -> SelectInput | None:
|
|
846
1104
|
"""Backward compatibility alias: Find SelectInput by original column name."""
|
|
847
1105
|
return self.find_by_old_name(old_name)
|
|
848
1106
|
|
|
849
|
-
def get_select_input_on_new_name(self, new_name: str) ->
|
|
1107
|
+
def get_select_input_on_new_name(self, new_name: str) -> SelectInput | None:
|
|
850
1108
|
"""Backward compatibility alias: Find SelectInput by new column name."""
|
|
851
1109
|
return self.find_by_new_name(new_name)
|
|
852
1110
|
|
|
@@ -865,7 +1123,7 @@ class JoinInputsManager(SelectInputsManager):
|
|
|
865
1123
|
|
|
866
1124
|
# === Query Methods ===
|
|
867
1125
|
|
|
868
|
-
def get_join_key_selects(self) ->
|
|
1126
|
+
def get_join_key_selects(self) -> list[SelectInput]:
|
|
869
1127
|
"""Returns only the `SelectInput` objects that are marked as join keys."""
|
|
870
1128
|
return [v for v in self.join_inputs.renames if v.join_key]
|
|
871
1129
|
|
|
@@ -879,13 +1137,13 @@ class JoinInputsManager(SelectInputsManager):
|
|
|
879
1137
|
]
|
|
880
1138
|
return JoinKeyRenameResponse(side, join_key_list)
|
|
881
1139
|
|
|
882
|
-
def get_join_key_rename_mapping(self, side: SideLit) ->
|
|
1140
|
+
def get_join_key_rename_mapping(self, side: SideLit) -> dict[str, str]:
|
|
883
1141
|
"""Returns a dictionary mapping original join key names to their temporary names."""
|
|
884
1142
|
join_key_response = self.get_join_key_renames(side)
|
|
885
1143
|
return {jkr.original_name: jkr.temp_name for jkr in join_key_response.join_key_renames}
|
|
886
1144
|
|
|
887
1145
|
@property
|
|
888
|
-
def join_key_selects(self) ->
|
|
1146
|
+
def join_key_selects(self) -> list[SelectInput]:
|
|
889
1147
|
"""Backward compatibility: Returns join key SelectInputs."""
|
|
890
1148
|
return self.get_join_key_selects()
|
|
891
1149
|
|
|
@@ -895,10 +1153,10 @@ class JoinSelectManagerMixin:
|
|
|
895
1153
|
|
|
896
1154
|
left_manager: JoinInputsManager
|
|
897
1155
|
right_manager: JoinInputsManager
|
|
898
|
-
input:
|
|
1156
|
+
input: CrossJoinInput | JoinInput | FuzzyMatchInput
|
|
899
1157
|
|
|
900
1158
|
@staticmethod
|
|
901
|
-
def parse_select(select:
|
|
1159
|
+
def parse_select(select: list[SelectInput] | list[str] | list[dict] | dict) -> JoinInputs:
|
|
902
1160
|
"""Parses various input formats into a standardized `JoinInputs` object."""
|
|
903
1161
|
if not select:
|
|
904
1162
|
return JoinInputs(renames=[])
|
|
@@ -908,7 +1166,7 @@ class JoinSelectManagerMixin:
|
|
|
908
1166
|
elif all(isinstance(c, dict) for c in select):
|
|
909
1167
|
return JoinInputs(renames=[SelectInput(**c) for c in select])
|
|
910
1168
|
elif isinstance(select, dict):
|
|
911
|
-
renames = select.get(
|
|
1169
|
+
renames = select.get("renames")
|
|
912
1170
|
if renames:
|
|
913
1171
|
return JoinInputs(renames=[SelectInput(**c) for c in renames])
|
|
914
1172
|
return JoinInputs(renames=[])
|
|
@@ -917,7 +1175,7 @@ class JoinSelectManagerMixin:
|
|
|
917
1175
|
|
|
918
1176
|
raise ValueError(f"Unable to parse select input: {type(select)}")
|
|
919
1177
|
|
|
920
|
-
def get_overlapping_columns(self) ->
|
|
1178
|
+
def get_overlapping_columns(self) -> set[str]:
|
|
921
1179
|
"""Finds column names that would conflict after the join."""
|
|
922
1180
|
return self.left_manager.get_new_cols() & self.right_manager.get_new_cols()
|
|
923
1181
|
|
|
@@ -929,16 +1187,14 @@ class JoinSelectManagerMixin:
|
|
|
929
1187
|
|
|
930
1188
|
new_name = old_col_name
|
|
931
1189
|
while new_name in current_names:
|
|
932
|
-
new_name = f
|
|
1190
|
+
new_name = f"{side}_{new_name}"
|
|
933
1191
|
return new_name
|
|
934
1192
|
|
|
935
1193
|
def add_new_select_column(self, select_input: SelectInput, side: str) -> None:
|
|
936
1194
|
"""Adds a new column to the selection for either the left or right side."""
|
|
937
|
-
target_input = self.input.right_select if side ==
|
|
1195
|
+
target_input = self.input.right_select if side == "right" else self.input.left_select
|
|
938
1196
|
|
|
939
|
-
select_input.new_name = self.auto_generate_new_col_name(
|
|
940
|
-
select_input.old_name, side=side
|
|
941
|
-
)
|
|
1197
|
+
select_input.new_name = self.auto_generate_new_col_name(select_input.old_name, side=side)
|
|
942
1198
|
|
|
943
1199
|
target_input.renames.append(select_input)
|
|
944
1200
|
|
|
@@ -952,19 +1208,17 @@ class CrossJoinInputManager(JoinSelectManagerMixin):
|
|
|
952
1208
|
self.right_manager = JoinInputsManager(self.input.right_select)
|
|
953
1209
|
|
|
954
1210
|
@classmethod
|
|
955
|
-
def create(
|
|
956
|
-
|
|
1211
|
+
def create(
|
|
1212
|
+
cls, left_select: list[SelectInput] | list[str], right_select: list[SelectInput] | list[str]
|
|
1213
|
+
) -> "CrossJoinInputManager":
|
|
957
1214
|
"""Factory method to create CrossJoinInput from various input formats."""
|
|
958
1215
|
left_inputs = cls.parse_select(left_select)
|
|
959
1216
|
right_inputs = cls.parse_select(right_select)
|
|
960
1217
|
|
|
961
|
-
cross_join = CrossJoinInput(
|
|
962
|
-
left_select=left_inputs,
|
|
963
|
-
right_select=right_inputs
|
|
964
|
-
)
|
|
1218
|
+
cross_join = CrossJoinInput(left_select=left_inputs, right_select=right_inputs)
|
|
965
1219
|
return cls(cross_join)
|
|
966
1220
|
|
|
967
|
-
def get_overlapping_records(self) ->
|
|
1221
|
+
def get_overlapping_records(self) -> set[str]:
|
|
968
1222
|
"""Finds column names that would conflict after the join."""
|
|
969
1223
|
return self.get_overlapping_columns()
|
|
970
1224
|
|
|
@@ -976,11 +1230,11 @@ class CrossJoinInputManager(JoinSelectManagerMixin):
|
|
|
976
1230
|
for right_col in self.input.right_select.renames:
|
|
977
1231
|
if right_col.new_name in overlapping_records:
|
|
978
1232
|
if rename_mode == "prefix":
|
|
979
|
-
right_col.new_name =
|
|
1233
|
+
right_col.new_name = "right_" + right_col.new_name
|
|
980
1234
|
elif rename_mode == "suffix":
|
|
981
|
-
right_col.new_name = right_col.new_name +
|
|
1235
|
+
right_col.new_name = right_col.new_name + "_right"
|
|
982
1236
|
else:
|
|
983
|
-
raise ValueError(f
|
|
1237
|
+
raise ValueError(f"Unknown rename_mode: {rename_mode}")
|
|
984
1238
|
overlapping_records = self.get_overlapping_records()
|
|
985
1239
|
|
|
986
1240
|
# === Backward Compatibility Properties ===
|
|
@@ -996,7 +1250,7 @@ class CrossJoinInputManager(JoinSelectManagerMixin):
|
|
|
996
1250
|
return self.right_manager
|
|
997
1251
|
|
|
998
1252
|
@property
|
|
999
|
-
def overlapping_records(self) ->
|
|
1253
|
+
def overlapping_records(self) -> set[str]:
|
|
1000
1254
|
"""Backward compatibility: Returns overlapping column names."""
|
|
1001
1255
|
return self.get_overlapping_records()
|
|
1002
1256
|
|
|
@@ -1011,7 +1265,7 @@ class CrossJoinInputManager(JoinSelectManagerMixin):
|
|
|
1011
1265
|
"""
|
|
1012
1266
|
return CrossJoinInput(
|
|
1013
1267
|
left_select=JoinInputs(renames=self.input.left_select.renames.copy()),
|
|
1014
|
-
right_select=JoinInputs(renames=self.input.right_select.renames.copy())
|
|
1268
|
+
right_select=JoinInputs(renames=self.input.right_select.renames.copy()),
|
|
1015
1269
|
)
|
|
1016
1270
|
|
|
1017
1271
|
|
|
@@ -1025,18 +1279,16 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1025
1279
|
self.set_join_keys()
|
|
1026
1280
|
|
|
1027
1281
|
@classmethod
|
|
1028
|
-
def create(
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1282
|
+
def create(
|
|
1283
|
+
cls,
|
|
1284
|
+
join_mapping: list[JoinMap] | tuple[str, str] | str,
|
|
1285
|
+
left_select: list[SelectInput] | list[str],
|
|
1286
|
+
right_select: list[SelectInput] | list[str],
|
|
1287
|
+
how: JoinStrategy = "inner",
|
|
1288
|
+
) -> "JoinInputManager":
|
|
1032
1289
|
"""Factory method to create JoinInput from various input formats."""
|
|
1033
1290
|
# Use JoinInput's own create method for parsing
|
|
1034
|
-
join_input = JoinInput(
|
|
1035
|
-
join_mapping=join_mapping,
|
|
1036
|
-
left_select=left_select,
|
|
1037
|
-
right_select=right_select,
|
|
1038
|
-
how=how
|
|
1039
|
-
)
|
|
1291
|
+
join_input = JoinInput(join_mapping=join_mapping, left_select=left_select, right_select=right_select, how=how)
|
|
1040
1292
|
|
|
1041
1293
|
manager = cls(join_input)
|
|
1042
1294
|
manager.set_join_keys()
|
|
@@ -1053,31 +1305,31 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1053
1305
|
for select_input in self.input.right_select.renames:
|
|
1054
1306
|
select_input.join_key = select_input.old_name in right_join_keys
|
|
1055
1307
|
|
|
1056
|
-
def _get_left_join_keys_set(self) ->
|
|
1308
|
+
def _get_left_join_keys_set(self) -> set[str]:
|
|
1057
1309
|
"""Internal: Returns a set of the left-side join key column names."""
|
|
1058
1310
|
return {jm.left_col for jm in self.input.join_mapping}
|
|
1059
1311
|
|
|
1060
|
-
def _get_right_join_keys_set(self) ->
|
|
1312
|
+
def _get_right_join_keys_set(self) -> set[str]:
|
|
1061
1313
|
"""Internal: Returns a set of the right-side join key column names."""
|
|
1062
1314
|
return {jm.right_col for jm in self.input.join_mapping}
|
|
1063
1315
|
|
|
1064
|
-
def get_left_join_keys(self) ->
|
|
1316
|
+
def get_left_join_keys(self) -> set[str]:
|
|
1065
1317
|
"""Returns a set of the left-side join key column names."""
|
|
1066
1318
|
return self._get_left_join_keys_set()
|
|
1067
1319
|
|
|
1068
|
-
def get_right_join_keys(self) ->
|
|
1320
|
+
def get_right_join_keys(self) -> set[str]:
|
|
1069
1321
|
"""Returns a set of the right-side join key column names."""
|
|
1070
1322
|
return self._get_right_join_keys_set()
|
|
1071
1323
|
|
|
1072
|
-
def get_left_join_keys_list(self) ->
|
|
1324
|
+
def get_left_join_keys_list(self) -> list[str]:
|
|
1073
1325
|
"""Returns an ordered list of the left-side join key column names."""
|
|
1074
1326
|
return [jm.left_col for jm in self.used_join_mapping]
|
|
1075
1327
|
|
|
1076
|
-
def get_right_join_keys_list(self) ->
|
|
1328
|
+
def get_right_join_keys_list(self) -> list[str]:
|
|
1077
1329
|
"""Returns an ordered list of the right-side join key column names."""
|
|
1078
1330
|
return [jm.right_col for jm in self.used_join_mapping]
|
|
1079
1331
|
|
|
1080
|
-
def get_overlapping_records(self) ->
|
|
1332
|
+
def get_overlapping_records(self) -> set[str]:
|
|
1081
1333
|
"""Finds column names that would conflict after the join."""
|
|
1082
1334
|
return self.get_overlapping_columns()
|
|
1083
1335
|
|
|
@@ -1089,7 +1341,7 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1089
1341
|
while len(overlapping_records) > 0:
|
|
1090
1342
|
for right_col in self.input.right_select.renames:
|
|
1091
1343
|
if right_col.new_name in overlapping_records:
|
|
1092
|
-
right_col.new_name = right_col.new_name +
|
|
1344
|
+
right_col.new_name = right_col.new_name + "_right"
|
|
1093
1345
|
overlapping_records = self.get_overlapping_records()
|
|
1094
1346
|
|
|
1095
1347
|
def get_join_key_renames(self, filter_drop: bool = False) -> FullJoinKeyResponse:
|
|
@@ -1098,9 +1350,9 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1098
1350
|
right_renames = self.right_manager.get_join_key_renames(side="right", filter_drop=filter_drop)
|
|
1099
1351
|
return FullJoinKeyResponse(left_renames, right_renames)
|
|
1100
1352
|
|
|
1101
|
-
def get_names_for_table_rename(self) ->
|
|
1353
|
+
def get_names_for_table_rename(self) -> list[JoinMap]:
|
|
1102
1354
|
"""Gets join mapping with renamed columns applied."""
|
|
1103
|
-
new_mappings:
|
|
1355
|
+
new_mappings: list[JoinMap] = []
|
|
1104
1356
|
left_rename_table = self.left_manager.get_rename_table()
|
|
1105
1357
|
right_rename_table = self.right_manager.get_rename_table()
|
|
1106
1358
|
|
|
@@ -1111,9 +1363,9 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1111
1363
|
|
|
1112
1364
|
return new_mappings
|
|
1113
1365
|
|
|
1114
|
-
def get_used_join_mapping(self) ->
|
|
1366
|
+
def get_used_join_mapping(self) -> list[JoinMap]:
|
|
1115
1367
|
"""Returns the final join mapping after applying all renames and transformations."""
|
|
1116
|
-
new_mappings:
|
|
1368
|
+
new_mappings: list[JoinMap] = []
|
|
1117
1369
|
left_rename_table = self.left_manager.get_rename_table()
|
|
1118
1370
|
right_rename_table = self.right_manager.get_rename_table()
|
|
1119
1371
|
left_join_rename_mapping = self.left_manager.get_join_key_rename_mapping("left")
|
|
@@ -1142,7 +1394,7 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1142
1394
|
join_mapping=self.input.join_mapping,
|
|
1143
1395
|
left_select=JoinInputs(renames=self.input.left_select.renames.copy()),
|
|
1144
1396
|
right_select=JoinInputs(renames=self.input.right_select.renames.copy()),
|
|
1145
|
-
how=self.input.how
|
|
1397
|
+
how=self.input.how,
|
|
1146
1398
|
)
|
|
1147
1399
|
|
|
1148
1400
|
@property
|
|
@@ -1169,17 +1421,17 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1169
1421
|
return self.input.how
|
|
1170
1422
|
|
|
1171
1423
|
@property
|
|
1172
|
-
def join_mapping(self) ->
|
|
1424
|
+
def join_mapping(self) -> list[JoinMap]:
|
|
1173
1425
|
"""Backward compatibility: Access join mapping."""
|
|
1174
1426
|
return self.input.join_mapping
|
|
1175
1427
|
|
|
1176
1428
|
@property
|
|
1177
|
-
def overlapping_records(self) ->
|
|
1429
|
+
def overlapping_records(self) -> set[str]:
|
|
1178
1430
|
"""Backward compatibility: Returns overlapping column names."""
|
|
1179
1431
|
return self.get_overlapping_records()
|
|
1180
1432
|
|
|
1181
1433
|
@property
|
|
1182
|
-
def used_join_mapping(self) ->
|
|
1434
|
+
def used_join_mapping(self) -> list[JoinMap]:
|
|
1183
1435
|
"""Backward compatibility: Returns used join mapping.
|
|
1184
1436
|
|
|
1185
1437
|
This property is critical - it's used by left_join_keys and right_join_keys.
|
|
@@ -1187,7 +1439,7 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1187
1439
|
return self.get_used_join_mapping()
|
|
1188
1440
|
|
|
1189
1441
|
@property
|
|
1190
|
-
def left_join_keys(self) ->
|
|
1442
|
+
def left_join_keys(self) -> list[str]:
|
|
1191
1443
|
"""Backward compatibility: Returns left join keys list.
|
|
1192
1444
|
|
|
1193
1445
|
IMPORTANT: Uses the used_join_mapping PROPERTY (not method).
|
|
@@ -1195,7 +1447,7 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1195
1447
|
return [jm.left_col for jm in self.used_join_mapping]
|
|
1196
1448
|
|
|
1197
1449
|
@property
|
|
1198
|
-
def right_join_keys(self) ->
|
|
1450
|
+
def right_join_keys(self) -> list[str]:
|
|
1199
1451
|
"""Backward compatibility: Returns right join keys list.
|
|
1200
1452
|
|
|
1201
1453
|
IMPORTANT: Uses the used_join_mapping PROPERTY (not method).
|
|
@@ -1203,12 +1455,12 @@ class JoinInputManager(JoinSelectManagerMixin):
|
|
|
1203
1455
|
return [jm.right_col for jm in self.used_join_mapping]
|
|
1204
1456
|
|
|
1205
1457
|
@property
|
|
1206
|
-
def _left_join_keys(self) ->
|
|
1458
|
+
def _left_join_keys(self) -> set[str]:
|
|
1207
1459
|
"""Backward compatibility: Private property for left join key set."""
|
|
1208
1460
|
return self._get_left_join_keys_set()
|
|
1209
1461
|
|
|
1210
1462
|
@property
|
|
1211
|
-
def _right_join_keys(self) ->
|
|
1463
|
+
def _right_join_keys(self) -> set[str]:
|
|
1212
1464
|
"""Backward compatibility: Private property for right join key set."""
|
|
1213
1465
|
return self._get_right_join_keys_set()
|
|
1214
1466
|
|
|
@@ -1218,20 +1470,26 @@ class FuzzyMatchInputManager(JoinInputManager):
|
|
|
1218
1470
|
|
|
1219
1471
|
def __init__(self, fuzzy_input: FuzzyMatchInput):
|
|
1220
1472
|
self.fuzzy_input = deepcopy(fuzzy_input)
|
|
1221
|
-
super().__init__(
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1473
|
+
super().__init__(
|
|
1474
|
+
JoinInput(
|
|
1475
|
+
join_mapping=[
|
|
1476
|
+
JoinMap(left_col=fm.left_col, right_col=fm.right_col) for fm in self.fuzzy_input.join_mapping
|
|
1477
|
+
],
|
|
1478
|
+
left_select=self.fuzzy_input.left_select,
|
|
1479
|
+
right_select=self.fuzzy_input.right_select,
|
|
1480
|
+
how=self.fuzzy_input.how,
|
|
1481
|
+
)
|
|
1482
|
+
)
|
|
1228
1483
|
|
|
1229
1484
|
@classmethod
|
|
1230
|
-
def create(
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1485
|
+
def create(
|
|
1486
|
+
cls,
|
|
1487
|
+
join_mapping: list[FuzzyMapping] | tuple[str, str] | str,
|
|
1488
|
+
left_select: list[SelectInput] | list[str],
|
|
1489
|
+
right_select: list[SelectInput] | list[str],
|
|
1490
|
+
aggregate_output: bool = False,
|
|
1491
|
+
how: JoinStrategy = "inner",
|
|
1492
|
+
) -> "FuzzyMatchInputManager":
|
|
1235
1493
|
"""Factory method to create FuzzyMatchInput from various input formats."""
|
|
1236
1494
|
parsed_mapping = cls.parse_fuzz_mapping(join_mapping)
|
|
1237
1495
|
left_inputs = cls.parse_select(left_select)
|
|
@@ -1242,7 +1500,7 @@ class FuzzyMatchInputManager(JoinInputManager):
|
|
|
1242
1500
|
left_select=left_inputs,
|
|
1243
1501
|
right_select=right_inputs,
|
|
1244
1502
|
how=how,
|
|
1245
|
-
aggregate_output=aggregate_output
|
|
1503
|
+
aggregate_output=aggregate_output,
|
|
1246
1504
|
)
|
|
1247
1505
|
|
|
1248
1506
|
manager = cls(fuzzy_input)
|
|
@@ -1252,20 +1510,17 @@ class FuzzyMatchInputManager(JoinInputManager):
|
|
|
1252
1510
|
|
|
1253
1511
|
for jm in parsed_mapping:
|
|
1254
1512
|
if jm.right_col not in right_old_names:
|
|
1255
|
-
manager.right_manager.append(
|
|
1256
|
-
SelectInput(old_name=jm.right_col, keep=False, join_key=True)
|
|
1257
|
-
)
|
|
1513
|
+
manager.right_manager.append(SelectInput(old_name=jm.right_col, keep=False, join_key=True))
|
|
1258
1514
|
if jm.left_col not in left_old_names:
|
|
1259
|
-
manager.left_manager.append(
|
|
1260
|
-
SelectInput(old_name=jm.left_col, keep=False, join_key=True)
|
|
1261
|
-
)
|
|
1515
|
+
manager.left_manager.append(SelectInput(old_name=jm.left_col, keep=False, join_key=True))
|
|
1262
1516
|
|
|
1263
1517
|
manager.set_join_keys()
|
|
1264
1518
|
return manager
|
|
1265
1519
|
|
|
1266
1520
|
@staticmethod
|
|
1267
|
-
def parse_fuzz_mapping(
|
|
1268
|
-
|
|
1521
|
+
def parse_fuzz_mapping(
|
|
1522
|
+
fuzz_mapping: list[FuzzyMapping] | tuple[str, str] | str | FuzzyMapping | list[dict],
|
|
1523
|
+
) -> list[FuzzyMapping]:
|
|
1269
1524
|
"""Parses various input formats into a list of FuzzyMapping objects."""
|
|
1270
1525
|
if isinstance(fuzz_mapping, (tuple, list)):
|
|
1271
1526
|
if len(fuzz_mapping) == 0:
|
|
@@ -1290,9 +1545,9 @@ class FuzzyMatchInputManager(JoinInputManager):
|
|
|
1290
1545
|
elif isinstance(fuzz_mapping, FuzzyMapping):
|
|
1291
1546
|
return [fuzz_mapping]
|
|
1292
1547
|
|
|
1293
|
-
raise ValueError(f
|
|
1548
|
+
raise ValueError(f"No valid fuzzy mapping as input: {type(fuzz_mapping)}")
|
|
1294
1549
|
|
|
1295
|
-
def get_fuzzy_maps(self) ->
|
|
1550
|
+
def get_fuzzy_maps(self) -> list[FuzzyMapping]:
|
|
1296
1551
|
"""Returns the final fuzzy mappings after applying all column renames."""
|
|
1297
1552
|
new_mappings = []
|
|
1298
1553
|
left_rename_table = self.left_manager.get_rename_table()
|
|
@@ -1315,12 +1570,12 @@ class FuzzyMatchInputManager(JoinInputManager):
|
|
|
1315
1570
|
# === Backward Compatibility Properties ===
|
|
1316
1571
|
|
|
1317
1572
|
@property
|
|
1318
|
-
def fuzzy_maps(self) ->
|
|
1573
|
+
def fuzzy_maps(self) -> list[FuzzyMapping]:
|
|
1319
1574
|
"""Backward compatibility: Returns fuzzy mappings."""
|
|
1320
1575
|
return self.get_fuzzy_maps()
|
|
1321
1576
|
|
|
1322
1577
|
@property
|
|
1323
|
-
def join_mapping(self) ->
|
|
1578
|
+
def join_mapping(self) -> list[FuzzyMapping]:
|
|
1324
1579
|
"""Backward compatibility: Access fuzzy join mapping."""
|
|
1325
1580
|
return self.get_fuzzy_maps()
|
|
1326
1581
|
|
|
@@ -1343,5 +1598,5 @@ class FuzzyMatchInputManager(JoinInputManager):
|
|
|
1343
1598
|
left_select=JoinInputs(renames=self.input.left_select.renames.copy()),
|
|
1344
1599
|
right_select=JoinInputs(renames=self.input.right_select.renames.copy()),
|
|
1345
1600
|
how=self.fuzzy_input.how,
|
|
1346
|
-
aggregate_output=self.fuzzy_input.aggregate_output
|
|
1347
|
-
)
|
|
1601
|
+
aggregate_output=self.fuzzy_input.aggregate_output,
|
|
1602
|
+
)
|