Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
from typing import List, Dict, Optional, Set, Tuple
|
|
2
1
|
import polars as pl
|
|
3
|
-
|
|
4
2
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
5
3
|
|
|
6
|
-
from flowfile_core.
|
|
4
|
+
from flowfile_core.configs import logger
|
|
7
5
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
|
|
8
6
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
7
|
+
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
9
8
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
10
9
|
from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
|
|
11
10
|
from flowfile_core.schemas import input_schema, transform_schema
|
|
12
|
-
from flowfile_core.configs import logger
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
class FlowGraphToPolarsConverter:
|
|
@@ -19,18 +17,19 @@ class FlowGraphToPolarsConverter:
|
|
|
19
17
|
This class takes a FlowGraph instance and generates standalone Python code
|
|
20
18
|
that uses only Polars, without any Flowfile dependencies.
|
|
21
19
|
"""
|
|
20
|
+
|
|
22
21
|
flow_graph: FlowGraph
|
|
23
|
-
node_var_mapping:
|
|
24
|
-
imports:
|
|
25
|
-
code_lines:
|
|
26
|
-
output_nodes:
|
|
27
|
-
last_node_var:
|
|
22
|
+
node_var_mapping: dict[int, str]
|
|
23
|
+
imports: set[str]
|
|
24
|
+
code_lines: list[str]
|
|
25
|
+
output_nodes: list[tuple[int, str]] = []
|
|
26
|
+
last_node_var: str | None = None
|
|
28
27
|
|
|
29
28
|
def __init__(self, flow_graph: FlowGraph):
|
|
30
29
|
self.flow_graph = flow_graph
|
|
31
|
-
self.node_var_mapping:
|
|
32
|
-
self.imports:
|
|
33
|
-
self.code_lines:
|
|
30
|
+
self.node_var_mapping: dict[int, str] = {} # Maps node_id to variable name
|
|
31
|
+
self.imports: set[str] = {"import polars as pl"}
|
|
32
|
+
self.code_lines: list[str] = []
|
|
34
33
|
self.output_nodes = []
|
|
35
34
|
self.last_node_var = None
|
|
36
35
|
|
|
@@ -44,7 +43,7 @@ class FlowGraphToPolarsConverter:
|
|
|
44
43
|
# Get execution order
|
|
45
44
|
execution_order = determine_execution_order(
|
|
46
45
|
all_nodes=[node for node in self.flow_graph.nodes if node.is_correct],
|
|
47
|
-
flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes()
|
|
46
|
+
flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes(),
|
|
48
47
|
)
|
|
49
48
|
|
|
50
49
|
# Generate code for each node in order
|
|
@@ -56,7 +55,7 @@ class FlowGraphToPolarsConverter:
|
|
|
56
55
|
|
|
57
56
|
def handle_output_node(self, node: FlowNode, var_name: str) -> None:
|
|
58
57
|
settings = node.setting_input
|
|
59
|
-
if hasattr(settings,
|
|
58
|
+
if hasattr(settings, "is_flow_output") and settings.is_flow_output:
|
|
60
59
|
self.output_nodes.append((node.node_id, var_name))
|
|
61
60
|
|
|
62
61
|
def _generate_node_code(self, node: FlowNode) -> None:
|
|
@@ -82,67 +81,59 @@ class FlowGraphToPolarsConverter:
|
|
|
82
81
|
self._add_comment(f"# TODO: Implement handler for node type: {node_type}")
|
|
83
82
|
raise Exception(f"No handler implemented for node type: {node_type}")
|
|
84
83
|
|
|
85
|
-
def _get_input_vars(self, node: FlowNode) ->
|
|
84
|
+
def _get_input_vars(self, node: FlowNode) -> dict[str, str]:
|
|
86
85
|
"""Get input variable names for a node."""
|
|
87
86
|
input_vars = {}
|
|
88
87
|
|
|
89
88
|
if node.node_inputs.main_inputs:
|
|
90
89
|
if len(node.node_inputs.main_inputs) == 1:
|
|
91
|
-
input_vars[
|
|
92
|
-
node.node_inputs.main_inputs[0].node_id, 'df'
|
|
93
|
-
)
|
|
90
|
+
input_vars["main"] = self.node_var_mapping.get(node.node_inputs.main_inputs[0].node_id, "df")
|
|
94
91
|
else:
|
|
95
92
|
for i, input_node in enumerate(node.node_inputs.main_inputs):
|
|
96
|
-
input_vars[f
|
|
97
|
-
input_node.node_id, f'df_{i}'
|
|
98
|
-
)
|
|
93
|
+
input_vars[f"main_{i}"] = self.node_var_mapping.get(input_node.node_id, f"df_{i}")
|
|
99
94
|
|
|
100
95
|
if node.node_inputs.left_input:
|
|
101
|
-
input_vars[
|
|
102
|
-
node.node_inputs.left_input.node_id, 'df_left'
|
|
103
|
-
)
|
|
96
|
+
input_vars["left"] = self.node_var_mapping.get(node.node_inputs.left_input.node_id, "df_left")
|
|
104
97
|
|
|
105
98
|
if node.node_inputs.right_input:
|
|
106
|
-
input_vars[
|
|
107
|
-
node.node_inputs.right_input.node_id, 'df_right'
|
|
108
|
-
)
|
|
99
|
+
input_vars["right"] = self.node_var_mapping.get(node.node_inputs.right_input.node_id, "df_right")
|
|
109
100
|
|
|
110
101
|
return input_vars
|
|
111
102
|
|
|
112
103
|
def _handle_csv_read(self, file_settings: input_schema.ReceivedTable, var_name: str):
|
|
113
|
-
if file_settings.table_settings.encoding.lower() in (
|
|
104
|
+
if file_settings.table_settings.encoding.lower() in ("utf-8", "utf8"):
|
|
114
105
|
encoding = "utf8-lossy"
|
|
115
106
|
self._add_code(f"{var_name} = pl.scan_csv(")
|
|
116
107
|
self._add_code(f' "{file_settings.abs_file_path}",')
|
|
117
108
|
self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
|
|
118
|
-
self._add_code(f
|
|
119
|
-
self._add_code(f
|
|
109
|
+
self._add_code(f" has_header={file_settings.table_settings.has_headers},")
|
|
110
|
+
self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
|
|
120
111
|
self._add_code(f' encoding="{encoding}",')
|
|
121
|
-
self._add_code(f
|
|
112
|
+
self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
|
|
122
113
|
self._add_code(")")
|
|
123
114
|
else:
|
|
124
115
|
self._add_code(f"{var_name} = pl.read_csv(")
|
|
125
116
|
self._add_code(f' "{file_settings.abs_file_path}",')
|
|
126
117
|
self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
|
|
127
|
-
self._add_code(f
|
|
128
|
-
self._add_code(f
|
|
118
|
+
self._add_code(f" has_header={file_settings.table_settings.has_headers},")
|
|
119
|
+
self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
|
|
129
120
|
if file_settings.table_settings.encoding:
|
|
130
121
|
self._add_code(f' encoding="{file_settings.table_settings.encoding}",')
|
|
131
|
-
self._add_code(f
|
|
122
|
+
self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
|
|
132
123
|
self._add_code(").lazy()")
|
|
133
124
|
|
|
134
|
-
def _handle_cloud_storage_reader(
|
|
125
|
+
def _handle_cloud_storage_reader(
|
|
126
|
+
self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: dict[str, str]
|
|
127
|
+
):
|
|
135
128
|
cloud_read_settings = settings.cloud_storage_settings
|
|
136
|
-
self.imports.add(
|
|
137
|
-
"import flowfile as ff"
|
|
138
|
-
)
|
|
129
|
+
self.imports.add("import flowfile as ff")
|
|
139
130
|
if cloud_read_settings.file_format == "csv":
|
|
140
131
|
self._add_code(f"{var_name} = ff.scan_csv_from_cloud_storage(")
|
|
141
132
|
self._add_code(f' "{cloud_read_settings.resource_path}",')
|
|
142
133
|
self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
|
|
143
134
|
self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
|
|
144
135
|
self._add_code(f' delimiter="{cloud_read_settings.csv_delimiter}",')
|
|
145
|
-
self._add_code(f
|
|
136
|
+
self._add_code(f" has_header={cloud_read_settings.csv_has_header},")
|
|
146
137
|
self._add_code(f' encoding="{cloud_read_settings.csv_encoding}",')
|
|
147
138
|
|
|
148
139
|
elif cloud_read_settings.file_format == "parquet":
|
|
@@ -162,22 +153,22 @@ class FlowGraphToPolarsConverter:
|
|
|
162
153
|
self._add_code(f' "{cloud_read_settings.resource_path}",')
|
|
163
154
|
self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
|
|
164
155
|
self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
|
|
165
|
-
self._add_code(f
|
|
156
|
+
self._add_code(f" version_id={cloud_read_settings.delta_version},")
|
|
166
157
|
else:
|
|
167
158
|
return
|
|
168
159
|
self._add_code(").data")
|
|
169
160
|
|
|
170
|
-
def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars:
|
|
161
|
+
def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: dict[str, str]) -> None:
|
|
171
162
|
"""Handle file reading nodes."""
|
|
172
163
|
file_settings = settings.received_file
|
|
173
164
|
|
|
174
|
-
if file_settings.file_type ==
|
|
165
|
+
if file_settings.file_type == "csv":
|
|
175
166
|
self._handle_csv_read(file_settings, var_name)
|
|
176
167
|
|
|
177
|
-
elif file_settings.file_type ==
|
|
168
|
+
elif file_settings.file_type == "parquet":
|
|
178
169
|
self._add_code(f'{var_name} = pl.scan_parquet("{file_settings.abs_file_path}")')
|
|
179
170
|
|
|
180
|
-
elif file_settings.file_type in (
|
|
171
|
+
elif file_settings.file_type in ("xlsx", "excel"):
|
|
181
172
|
self._add_code(f"{var_name} = pl.read_excel(")
|
|
182
173
|
self._add_code(f' "{file_settings.abs_file_path}",')
|
|
183
174
|
if file_settings.table_settings.sheet_name:
|
|
@@ -187,12 +178,18 @@ class FlowGraphToPolarsConverter:
|
|
|
187
178
|
self._add_code("")
|
|
188
179
|
|
|
189
180
|
@staticmethod
|
|
190
|
-
def _generate_pl_schema_with_typing(flowfile_schema:
|
|
191
|
-
polars_schema_str =
|
|
192
|
-
|
|
181
|
+
def _generate_pl_schema_with_typing(flowfile_schema: list[FlowfileColumn]) -> str:
|
|
182
|
+
polars_schema_str = (
|
|
183
|
+
"pl.Schema(["
|
|
184
|
+
+ ", ".join(
|
|
185
|
+
f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
|
|
186
|
+
for flowfile_column in flowfile_schema
|
|
187
|
+
)
|
|
188
|
+
+ "])"
|
|
189
|
+
)
|
|
193
190
|
return polars_schema_str
|
|
194
191
|
|
|
195
|
-
def get_manual_schema_input(self, flowfile_schema:
|
|
192
|
+
def get_manual_schema_input(self, flowfile_schema: list[FlowfileColumn]) -> str:
|
|
196
193
|
polars_schema_str = self._generate_pl_schema_with_typing(flowfile_schema)
|
|
197
194
|
is_valid_pl_schema = self._validate_pl_schema(polars_schema_str)
|
|
198
195
|
if is_valid_pl_schema:
|
|
@@ -210,19 +207,23 @@ class FlowGraphToPolarsConverter:
|
|
|
210
207
|
logger.error(f"Invalid Polars schema: {e}")
|
|
211
208
|
return False
|
|
212
209
|
|
|
213
|
-
def _handle_manual_input(
|
|
210
|
+
def _handle_manual_input(
|
|
211
|
+
self, settings: input_schema.NodeManualInput, var_name: str, input_vars: dict[str, str]
|
|
212
|
+
) -> None:
|
|
214
213
|
"""Handle manual data input nodes."""
|
|
215
214
|
data = settings.raw_data_format.data
|
|
216
|
-
flowfile_schema = list(
|
|
215
|
+
flowfile_schema = list(
|
|
216
|
+
FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns
|
|
217
|
+
)
|
|
217
218
|
schema = self.get_manual_schema_input(flowfile_schema)
|
|
218
219
|
self._add_code(f"{var_name} = pl.LazyFrame({data}, schema={schema}, strict=False)")
|
|
219
220
|
self._add_code("")
|
|
220
221
|
|
|
221
|
-
def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars:
|
|
222
|
+
def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: dict[str, str]) -> None:
|
|
222
223
|
"""Handle filter nodes."""
|
|
223
|
-
input_df = input_vars.get(
|
|
224
|
+
input_df = input_vars.get("main", "df")
|
|
224
225
|
|
|
225
|
-
if settings.filter_input.
|
|
226
|
+
if settings.filter_input.is_advanced():
|
|
226
227
|
# Parse the advanced filter expression
|
|
227
228
|
self.imports.add(
|
|
228
229
|
"from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr"
|
|
@@ -233,28 +234,33 @@ class FlowGraphToPolarsConverter:
|
|
|
233
234
|
else:
|
|
234
235
|
# Handle basic filter
|
|
235
236
|
basic = settings.filter_input.basic_filter
|
|
236
|
-
|
|
237
|
-
|
|
237
|
+
if basic is not None:
|
|
238
|
+
filter_expr = self._create_basic_filter_expr(basic)
|
|
239
|
+
self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
|
|
240
|
+
else:
|
|
241
|
+
self._add_code(f"{var_name} = {input_df} # No filter applied")
|
|
238
242
|
self._add_code("")
|
|
239
243
|
|
|
240
|
-
def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars:
|
|
241
|
-
input_df = input_vars.get(
|
|
244
|
+
def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: dict[str, str]):
|
|
245
|
+
input_df = input_vars.get("main", "df")
|
|
242
246
|
self._add_code(f"{var_name} = {input_df}.select(pl.len().alias('number_of_records'))")
|
|
243
247
|
|
|
244
|
-
def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars:
|
|
245
|
-
input_df = input_vars.get(
|
|
248
|
+
def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: dict[str, str]):
|
|
249
|
+
input_df = input_vars.get("main", "df")
|
|
246
250
|
from_col_name = settings.graph_solver_input.col_from
|
|
247
251
|
to_col_name = settings.graph_solver_input.col_to
|
|
248
252
|
output_col_name = settings.graph_solver_input.output_column_name
|
|
249
|
-
self._add_code(
|
|
250
|
-
|
|
251
|
-
|
|
253
|
+
self._add_code(
|
|
254
|
+
f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
|
|
255
|
+
f'pl.col("{to_col_name}"))'
|
|
256
|
+
f'.alias("{output_col_name}"))'
|
|
257
|
+
)
|
|
252
258
|
self._add_code("")
|
|
253
259
|
self.imports.add("from polars_grouper import graph_solver")
|
|
254
260
|
|
|
255
|
-
def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars:
|
|
261
|
+
def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: dict[str, str]) -> None:
|
|
256
262
|
"""Handle select/rename nodes."""
|
|
257
|
-
input_df = input_vars.get(
|
|
263
|
+
input_df = input_vars.get("main", "df")
|
|
258
264
|
# Get columns to keep and renames
|
|
259
265
|
select_exprs = []
|
|
260
266
|
for select_input in settings.select_input:
|
|
@@ -266,7 +272,7 @@ class FlowGraphToPolarsConverter:
|
|
|
266
272
|
|
|
267
273
|
if (select_input.data_type_change or select_input.is_altered) and select_input.data_type:
|
|
268
274
|
polars_dtype = self._get_polars_dtype(select_input.data_type)
|
|
269
|
-
expr = f
|
|
275
|
+
expr = f"{expr}.cast({polars_dtype})"
|
|
270
276
|
|
|
271
277
|
select_exprs.append(expr)
|
|
272
278
|
|
|
@@ -279,7 +285,7 @@ class FlowGraphToPolarsConverter:
|
|
|
279
285
|
self._add_code(f"{var_name} = {input_df}")
|
|
280
286
|
self._add_code("")
|
|
281
287
|
|
|
282
|
-
def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars:
|
|
288
|
+
def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: dict[str, str]) -> None:
|
|
283
289
|
"""Handle join nodes by routing to appropriate join type handler.
|
|
284
290
|
|
|
285
291
|
This is the main entry point for processing join operations. It determines
|
|
@@ -293,8 +299,8 @@ class FlowGraphToPolarsConverter:
|
|
|
293
299
|
Returns:
|
|
294
300
|
None: Modifies internal state by adding generated code
|
|
295
301
|
"""
|
|
296
|
-
left_df = input_vars.get(
|
|
297
|
-
right_df = input_vars.get(
|
|
302
|
+
left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
|
|
303
|
+
right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
|
|
298
304
|
# Ensure left and right DataFrames are distinct
|
|
299
305
|
if left_df == right_df:
|
|
300
306
|
right_df = "df_right"
|
|
@@ -305,8 +311,9 @@ class FlowGraphToPolarsConverter:
|
|
|
305
311
|
else:
|
|
306
312
|
self._handle_standard_join(settings, var_name, left_df, right_df)
|
|
307
313
|
|
|
308
|
-
def _handle_semi_anti_join(
|
|
309
|
-
|
|
314
|
+
def _handle_semi_anti_join(
|
|
315
|
+
self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
|
|
316
|
+
) -> None:
|
|
310
317
|
"""Handle semi and anti joins which only return rows from the left DataFrame.
|
|
311
318
|
|
|
312
319
|
Semi joins return rows from left DataFrame that have matches in right.
|
|
@@ -333,8 +340,9 @@ class FlowGraphToPolarsConverter:
|
|
|
333
340
|
self._add_code(" )")
|
|
334
341
|
self._add_code(")")
|
|
335
342
|
|
|
336
|
-
def _handle_standard_join(
|
|
337
|
-
|
|
343
|
+
def _handle_standard_join(
|
|
344
|
+
self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
|
|
345
|
+
) -> None:
|
|
338
346
|
"""Handle standard joins (left, right, inner, outer) with full column management.
|
|
339
347
|
|
|
340
348
|
Standard joins may include columns from both DataFrames and require careful
|
|
@@ -370,12 +378,11 @@ class FlowGraphToPolarsConverter:
|
|
|
370
378
|
)
|
|
371
379
|
# Execute the join
|
|
372
380
|
self._execute_join_with_post_processing(
|
|
373
|
-
settings, var_name, left_df, right_df, left_on, right_on,
|
|
374
|
-
after_join_drop_cols, reverse_action
|
|
381
|
+
settings, var_name, left_df, right_df, left_on, right_on, after_join_drop_cols, reverse_action
|
|
375
382
|
)
|
|
376
383
|
|
|
377
384
|
@staticmethod
|
|
378
|
-
def _get_join_keys(settings: transform_schema.JoinInputManager) ->
|
|
385
|
+
def _get_join_keys(settings: transform_schema.JoinInputManager) -> tuple[list[str], list[str]]:
|
|
379
386
|
"""Extract join keys based on join type.
|
|
380
387
|
|
|
381
388
|
Different join types require different handling of join keys:
|
|
@@ -397,8 +404,9 @@ class FlowGraphToPolarsConverter:
|
|
|
397
404
|
|
|
398
405
|
return left_on, right_on
|
|
399
406
|
|
|
400
|
-
def _apply_pre_join_transformations(
|
|
401
|
-
str, str
|
|
407
|
+
def _apply_pre_join_transformations(
|
|
408
|
+
self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str
|
|
409
|
+
) -> tuple[str, str]:
|
|
402
410
|
"""Apply column renames and drops before the join operation.
|
|
403
411
|
|
|
404
412
|
Pre-join transformations prepare DataFrames by:
|
|
@@ -419,8 +427,7 @@ class FlowGraphToPolarsConverter:
|
|
|
419
427
|
right_renames = {
|
|
420
428
|
column.old_name: column.new_name
|
|
421
429
|
for column in settings.right_select.renames
|
|
422
|
-
if
|
|
423
|
-
column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
|
|
430
|
+
if column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
|
|
424
431
|
}
|
|
425
432
|
|
|
426
433
|
left_renames = {
|
|
@@ -430,13 +437,11 @@ class FlowGraphToPolarsConverter:
|
|
|
430
437
|
}
|
|
431
438
|
|
|
432
439
|
left_drop_columns = [
|
|
433
|
-
column.old_name for column in settings.left_select.renames
|
|
434
|
-
if not column.keep and not column.join_key
|
|
440
|
+
column.old_name for column in settings.left_select.renames if not column.keep and not column.join_key
|
|
435
441
|
]
|
|
436
442
|
|
|
437
443
|
right_drop_columns = [
|
|
438
|
-
column.old_name for column in settings.right_select.renames
|
|
439
|
-
if not column.keep and not column.join_key
|
|
444
|
+
column.old_name for column in settings.right_select.renames if not column.keep and not column.join_key
|
|
440
445
|
]
|
|
441
446
|
|
|
442
447
|
# Apply transformations
|
|
@@ -451,9 +456,14 @@ class FlowGraphToPolarsConverter:
|
|
|
451
456
|
|
|
452
457
|
return left_df, right_df
|
|
453
458
|
|
|
454
|
-
def _handle_join_key_transformations(
|
|
455
|
-
|
|
456
|
-
|
|
459
|
+
def _handle_join_key_transformations(
|
|
460
|
+
self,
|
|
461
|
+
settings: transform_schema.JoinInputManager,
|
|
462
|
+
left_df: str,
|
|
463
|
+
right_df: str,
|
|
464
|
+
left_on: list[str],
|
|
465
|
+
right_on: list[str],
|
|
466
|
+
) -> tuple[list[str], list[str], dict | None, list[str]]:
|
|
457
467
|
"""Route to appropriate join-specific key transformation handler.
|
|
458
468
|
|
|
459
469
|
Different join types require different strategies for handling join keys
|
|
@@ -484,9 +494,9 @@ class FlowGraphToPolarsConverter:
|
|
|
484
494
|
else:
|
|
485
495
|
return left_on, right_on, None, []
|
|
486
496
|
|
|
487
|
-
def _handle_left_inner_join_keys(
|
|
488
|
-
|
|
489
|
-
|
|
497
|
+
def _handle_left_inner_join_keys(
|
|
498
|
+
self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
|
|
499
|
+
) -> tuple[list[str], list[str], dict, list[str]]:
|
|
490
500
|
"""Handle key transformations for left and inner joins.
|
|
491
501
|
|
|
492
502
|
For left/inner joins:
|
|
@@ -510,27 +520,26 @@ class FlowGraphToPolarsConverter:
|
|
|
510
520
|
left_join_keys_to_keep = [jk.new_name for jk in settings.left_select.join_key_selects if jk.keep]
|
|
511
521
|
join_key_duplication_command = [
|
|
512
522
|
f'pl.col("{rjk.old_name}").alias("__DROP__{rjk.new_name}__DROP__")'
|
|
513
|
-
for rjk in settings.right_select.join_key_selects
|
|
523
|
+
for rjk in settings.right_select.join_key_selects
|
|
524
|
+
if rjk.keep
|
|
514
525
|
]
|
|
515
526
|
|
|
516
527
|
reverse_action = {
|
|
517
528
|
f"__DROP__{rjk.new_name}__DROP__": rjk.new_name
|
|
518
|
-
for rjk in settings.right_select.join_key_selects
|
|
529
|
+
for rjk in settings.right_select.join_key_selects
|
|
530
|
+
if rjk.keep
|
|
519
531
|
}
|
|
520
532
|
|
|
521
533
|
if join_key_duplication_command:
|
|
522
534
|
self._add_code(f"{right_df} = {right_df}.with_columns([{', '.join(join_key_duplication_command)}])")
|
|
523
535
|
|
|
524
|
-
after_join_drop_cols = [
|
|
525
|
-
k.new_name for k in settings.left_select.join_key_selects
|
|
526
|
-
if not k.keep
|
|
527
|
-
]
|
|
536
|
+
after_join_drop_cols = [k.new_name for k in settings.left_select.join_key_selects if not k.keep]
|
|
528
537
|
|
|
529
538
|
return left_on, right_on, reverse_action, after_join_drop_cols
|
|
530
539
|
|
|
531
|
-
def _handle_right_join_keys(
|
|
532
|
-
|
|
533
|
-
|
|
540
|
+
def _handle_right_join_keys(
|
|
541
|
+
self, settings: transform_schema.JoinInputManager, left_df: str, left_on: list[str], right_on: list[str]
|
|
542
|
+
) -> tuple[list[str], list[str], None, list[str]]:
|
|
534
543
|
"""Handle key transformations for right joins.
|
|
535
544
|
|
|
536
545
|
For right joins:
|
|
@@ -553,7 +562,8 @@ class FlowGraphToPolarsConverter:
|
|
|
553
562
|
"""
|
|
554
563
|
join_key_duplication_command = [
|
|
555
564
|
f'pl.col("{ljk.new_name}").alias("__jk_{ljk.new_name}")'
|
|
556
|
-
for ljk in settings.left_select.join_key_selects
|
|
565
|
+
for ljk in settings.left_select.join_key_selects
|
|
566
|
+
if ljk.keep
|
|
557
567
|
]
|
|
558
568
|
|
|
559
569
|
# Update left_on keys
|
|
@@ -569,14 +579,15 @@ class FlowGraphToPolarsConverter:
|
|
|
569
579
|
left_join_keys_keep = {jk.new_name for jk in settings.left_select.join_key_selects if jk.keep}
|
|
570
580
|
after_join_drop_cols_right = [
|
|
571
581
|
jk.new_name if jk.new_name not in left_join_keys_keep else jk.new_name + "_right"
|
|
572
|
-
for jk in settings.right_select.join_key_selects
|
|
582
|
+
for jk in settings.right_select.join_key_selects
|
|
583
|
+
if not jk.keep
|
|
573
584
|
]
|
|
574
585
|
after_join_drop_cols = list(set(after_join_drop_cols_right))
|
|
575
586
|
return left_on, right_on, None, after_join_drop_cols
|
|
576
587
|
|
|
577
|
-
def _handle_outer_join_keys(
|
|
578
|
-
|
|
579
|
-
|
|
588
|
+
def _handle_outer_join_keys(
|
|
589
|
+
self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
|
|
590
|
+
) -> tuple[list[str], list[str], dict, list[str]]:
|
|
580
591
|
"""Handle key transformations for outer joins.
|
|
581
592
|
|
|
582
593
|
For outer joins:
|
|
@@ -600,14 +611,10 @@ class FlowGraphToPolarsConverter:
|
|
|
600
611
|
left_join_keys = {jk.new_name for jk in settings.left_select.join_key_selects}
|
|
601
612
|
|
|
602
613
|
join_keys_to_keep_and_rename = [
|
|
603
|
-
rjk for rjk in settings.right_select.join_key_selects
|
|
604
|
-
if rjk.keep and rjk.new_name in left_join_keys
|
|
614
|
+
rjk for rjk in settings.right_select.join_key_selects if rjk.keep and rjk.new_name in left_join_keys
|
|
605
615
|
]
|
|
606
616
|
|
|
607
|
-
join_key_rename_command = {
|
|
608
|
-
rjk.new_name: f"__jk_{rjk.new_name}"
|
|
609
|
-
for rjk in join_keys_to_keep_and_rename
|
|
610
|
-
}
|
|
617
|
+
join_key_rename_command = {rjk.new_name: f"__jk_{rjk.new_name}" for rjk in join_keys_to_keep_and_rename}
|
|
611
618
|
|
|
612
619
|
# Update right_on keys
|
|
613
620
|
for position, right_on_key in enumerate(right_on):
|
|
@@ -621,20 +628,27 @@ class FlowGraphToPolarsConverter:
|
|
|
621
628
|
reverse_action = {f"__jk_{rjk.new_name}": rjk.new_name for rjk in join_keys_to_keep_and_rename}
|
|
622
629
|
|
|
623
630
|
# Calculate columns to drop after join
|
|
624
|
-
after_join_drop_cols_left = [
|
|
625
|
-
jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep
|
|
626
|
-
]
|
|
631
|
+
after_join_drop_cols_left = [jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep]
|
|
627
632
|
after_join_drop_cols_right = [
|
|
628
633
|
jk.new_name if jk.new_name not in left_join_keys else jk.new_name + "_right"
|
|
629
|
-
for jk in settings.right_select.join_key_selects
|
|
634
|
+
for jk in settings.right_select.join_key_selects
|
|
635
|
+
if not jk.keep
|
|
630
636
|
]
|
|
631
637
|
after_join_drop_cols = after_join_drop_cols_left + after_join_drop_cols_right
|
|
632
638
|
|
|
633
639
|
return left_on, right_on, reverse_action, after_join_drop_cols
|
|
634
640
|
|
|
635
|
-
def _execute_join_with_post_processing(
|
|
636
|
-
|
|
637
|
-
|
|
641
|
+
def _execute_join_with_post_processing(
|
|
642
|
+
self,
|
|
643
|
+
settings: input_schema.NodeJoin,
|
|
644
|
+
var_name: str,
|
|
645
|
+
left_df: str,
|
|
646
|
+
right_df: str,
|
|
647
|
+
left_on: list[str],
|
|
648
|
+
right_on: list[str],
|
|
649
|
+
after_join_drop_cols: list[str],
|
|
650
|
+
reverse_action: dict | None,
|
|
651
|
+
) -> None:
|
|
638
652
|
"""Execute the join operation and apply post-processing steps.
|
|
639
653
|
|
|
640
654
|
Generates the actual join code with any necessary post-processing:
|
|
@@ -665,7 +679,7 @@ class FlowGraphToPolarsConverter:
|
|
|
665
679
|
self._add_code(" )")
|
|
666
680
|
|
|
667
681
|
# Handle right join special case
|
|
668
|
-
if settings.join_input.how ==
|
|
682
|
+
if settings.join_input.how == "right":
|
|
669
683
|
self._add_code(".collect()") # Right join needs to be collected first cause of issue with rename
|
|
670
684
|
|
|
671
685
|
# Apply post-join transformations
|
|
@@ -676,21 +690,21 @@ class FlowGraphToPolarsConverter:
|
|
|
676
690
|
self._add_code(f".rename({reverse_action})")
|
|
677
691
|
|
|
678
692
|
# Convert back to lazy for right joins
|
|
679
|
-
if settings.join_input.how ==
|
|
680
|
-
self._add_code(
|
|
693
|
+
if settings.join_input.how == "right":
|
|
694
|
+
self._add_code(".lazy()")
|
|
681
695
|
|
|
682
696
|
self._add_code(")")
|
|
683
697
|
|
|
684
|
-
def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars:
|
|
698
|
+
def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: dict[str, str]) -> None:
|
|
685
699
|
"""Handle group by nodes."""
|
|
686
|
-
input_df = input_vars.get(
|
|
700
|
+
input_df = input_vars.get("main", "df")
|
|
687
701
|
|
|
688
702
|
# Separate groupby columns from aggregation columns
|
|
689
703
|
group_cols = []
|
|
690
704
|
agg_exprs = []
|
|
691
705
|
|
|
692
706
|
for agg_col in settings.groupby_input.agg_cols:
|
|
693
|
-
if agg_col.agg ==
|
|
707
|
+
if agg_col.agg == "groupby":
|
|
694
708
|
group_cols.append(agg_col.old_name)
|
|
695
709
|
else:
|
|
696
710
|
agg_func = self._get_agg_function(agg_col.agg)
|
|
@@ -703,9 +717,9 @@ class FlowGraphToPolarsConverter:
|
|
|
703
717
|
self._add_code("])")
|
|
704
718
|
self._add_code("")
|
|
705
719
|
|
|
706
|
-
def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars:
|
|
720
|
+
def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: dict[str, str]) -> None:
|
|
707
721
|
"""Handle formula/expression nodes."""
|
|
708
|
-
input_df = input_vars.get(
|
|
722
|
+
input_df = input_vars.get("main", "df")
|
|
709
723
|
self.imports.add("from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr")
|
|
710
724
|
|
|
711
725
|
# Convert SQL-like formula to Polars expression
|
|
@@ -717,7 +731,7 @@ class FlowGraphToPolarsConverter:
|
|
|
717
731
|
output_type = convert_pl_type_to_string(cast_str_to_polars_type(settings.function.field.data_type))
|
|
718
732
|
if output_type[:3] != "pl.":
|
|
719
733
|
output_type = "pl." + output_type
|
|
720
|
-
self._add_code(f
|
|
734
|
+
self._add_code(f" .cast({output_type})")
|
|
721
735
|
|
|
722
736
|
self._add_code("])")
|
|
723
737
|
self._add_code("")
|
|
@@ -725,11 +739,11 @@ class FlowGraphToPolarsConverter:
|
|
|
725
739
|
def _handle_pivot_no_index(self, settings: input_schema.NodePivot, var_name: str, input_df: str, agg_func: str):
|
|
726
740
|
pivot_input = settings.pivot_input
|
|
727
741
|
|
|
728
|
-
self._add_code(f
|
|
742
|
+
self._add_code(f"{var_name} = ({input_df}.collect()")
|
|
729
743
|
self._add_code(' .with_columns(pl.lit(1).alias("__temp_index__"))')
|
|
730
|
-
self._add_code(
|
|
744
|
+
self._add_code(" .pivot(")
|
|
731
745
|
self._add_code(f' values="{pivot_input.value_col}",')
|
|
732
|
-
self._add_code(
|
|
746
|
+
self._add_code(' index=["__temp_index__"],')
|
|
733
747
|
self._add_code(f' columns="{pivot_input.pivot_column}",')
|
|
734
748
|
self._add_code(f' aggregate_function="{agg_func}"')
|
|
735
749
|
self._add_code(" )")
|
|
@@ -737,17 +751,16 @@ class FlowGraphToPolarsConverter:
|
|
|
737
751
|
self._add_code(").lazy()")
|
|
738
752
|
self._add_code("")
|
|
739
753
|
|
|
740
|
-
def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars:
|
|
754
|
+
def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: dict[str, str]) -> None:
|
|
741
755
|
"""Handle pivot nodes."""
|
|
742
|
-
input_df = input_vars.get(
|
|
756
|
+
input_df = input_vars.get("main", "df")
|
|
743
757
|
pivot_input = settings.pivot_input
|
|
744
758
|
if len(pivot_input.aggregations) > 1:
|
|
745
|
-
logger.error("Multiple aggregations are not convertable to polars code. "
|
|
746
|
-
"Taking the first value")
|
|
759
|
+
logger.error("Multiple aggregations are not convertable to polars code. " "Taking the first value")
|
|
747
760
|
if len(pivot_input.aggregations) > 0:
|
|
748
761
|
agg_func = pivot_input.aggregations[0]
|
|
749
762
|
else:
|
|
750
|
-
agg_func =
|
|
763
|
+
agg_func = "first"
|
|
751
764
|
if len(settings.pivot_input.index_columns) == 0:
|
|
752
765
|
self._handle_pivot_no_index(settings, var_name, input_df, agg_func)
|
|
753
766
|
else:
|
|
@@ -761,9 +774,9 @@ class FlowGraphToPolarsConverter:
|
|
|
761
774
|
self._add_code(").lazy()")
|
|
762
775
|
self._add_code("")
|
|
763
776
|
|
|
764
|
-
def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars:
|
|
777
|
+
def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: dict[str, str]) -> None:
|
|
765
778
|
"""Handle unpivot nodes."""
|
|
766
|
-
input_df = input_vars.get(
|
|
779
|
+
input_df = input_vars.get("main", "df")
|
|
767
780
|
unpivot_input = settings.unpivot_input
|
|
768
781
|
|
|
769
782
|
self._add_code(f"{var_name} = {input_df}.unpivot(")
|
|
@@ -779,22 +792,22 @@ class FlowGraphToPolarsConverter:
|
|
|
779
792
|
self._add_code(")")
|
|
780
793
|
self._add_code("")
|
|
781
794
|
|
|
782
|
-
def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars:
|
|
795
|
+
def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: dict[str, str]) -> None:
|
|
783
796
|
"""Handle union nodes."""
|
|
784
797
|
# Get all input LazyFrame
|
|
785
798
|
dfs = []
|
|
786
|
-
if
|
|
787
|
-
dfs.append(input_vars[
|
|
799
|
+
if "main" in input_vars:
|
|
800
|
+
dfs.append(input_vars["main"])
|
|
788
801
|
else:
|
|
789
802
|
# Multiple main inputs
|
|
790
803
|
for key, df_var in input_vars.items():
|
|
791
|
-
if key.startswith(
|
|
804
|
+
if key.startswith("main"):
|
|
792
805
|
dfs.append(df_var)
|
|
793
806
|
|
|
794
|
-
if settings.union_input.mode ==
|
|
795
|
-
how =
|
|
807
|
+
if settings.union_input.mode == "relaxed":
|
|
808
|
+
how = "diagonal_relaxed"
|
|
796
809
|
else:
|
|
797
|
-
how =
|
|
810
|
+
how = "diagonal"
|
|
798
811
|
|
|
799
812
|
self._add_code(f"{var_name} = pl.concat([")
|
|
800
813
|
for df in dfs:
|
|
@@ -802,76 +815,88 @@ class FlowGraphToPolarsConverter:
|
|
|
802
815
|
self._add_code(f"], how='{how}')")
|
|
803
816
|
self._add_code("")
|
|
804
817
|
|
|
805
|
-
def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars:
|
|
818
|
+
def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: dict[str, str]) -> None:
|
|
806
819
|
"""Handle sort nodes."""
|
|
807
|
-
input_df = input_vars.get(
|
|
820
|
+
input_df = input_vars.get("main", "df")
|
|
808
821
|
|
|
809
822
|
sort_cols = []
|
|
810
823
|
descending = []
|
|
811
824
|
|
|
812
825
|
for sort_input in settings.sort_input:
|
|
813
826
|
sort_cols.append(f'"{sort_input.column}"')
|
|
814
|
-
descending.append(sort_input.how ==
|
|
827
|
+
descending.append(sort_input.how == "desc")
|
|
815
828
|
|
|
816
829
|
self._add_code(f"{var_name} = {input_df}.sort([{', '.join(sort_cols)}], descending={descending})")
|
|
817
830
|
self._add_code("")
|
|
818
831
|
|
|
819
|
-
def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars:
|
|
832
|
+
def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: dict[str, str]) -> None:
|
|
820
833
|
"""Handle sample nodes."""
|
|
821
|
-
input_df = input_vars.get(
|
|
834
|
+
input_df = input_vars.get("main", "df")
|
|
822
835
|
self._add_code(f"{var_name} = {input_df}.head(n={settings.sample_size})")
|
|
823
836
|
self._add_code("")
|
|
824
837
|
|
|
825
838
|
@staticmethod
|
|
826
|
-
def _transform_fuzzy_mappings_to_string(fuzzy_mappings:
|
|
827
|
-
|
|
839
|
+
def _transform_fuzzy_mappings_to_string(fuzzy_mappings: list[FuzzyMapping]) -> str:
|
|
828
840
|
output_str = "["
|
|
829
841
|
for i, fuzzy_mapping in enumerate(fuzzy_mappings):
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
842
|
+
output_str += (
|
|
843
|
+
f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
|
|
844
|
+
f" right_col='{fuzzy_mapping.right_col}', "
|
|
845
|
+
f"threshold_score={fuzzy_mapping.threshold_score}, "
|
|
846
|
+
f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')"
|
|
847
|
+
)
|
|
835
848
|
if i < len(fuzzy_mappings) - 1:
|
|
836
849
|
output_str += ",\n"
|
|
837
850
|
output_str += "]"
|
|
838
851
|
return output_str
|
|
839
852
|
|
|
840
|
-
def _handle_fuzzy_match(
|
|
853
|
+
def _handle_fuzzy_match(
|
|
854
|
+
self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: dict[str, str]
|
|
855
|
+
) -> None:
|
|
841
856
|
"""Handle fuzzy match nodes."""
|
|
842
857
|
self.imports.add("from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs")
|
|
843
858
|
fuzzy_match_handler = transform_schema.FuzzyMatchInputManager(settings.join_input)
|
|
844
|
-
left_df = input_vars.get(
|
|
845
|
-
right_df = input_vars.get(
|
|
859
|
+
left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
|
|
860
|
+
right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
|
|
846
861
|
|
|
847
862
|
if left_df == right_df:
|
|
848
863
|
right_df = "df_right"
|
|
849
864
|
self._add_code(f"{right_df} = {left_df}")
|
|
850
865
|
|
|
851
866
|
if fuzzy_match_handler.left_select.has_drop_cols():
|
|
852
|
-
self._add_code(
|
|
867
|
+
self._add_code(
|
|
868
|
+
f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})"
|
|
869
|
+
)
|
|
853
870
|
if fuzzy_match_handler.right_select.has_drop_cols():
|
|
854
|
-
self._add_code(
|
|
871
|
+
self._add_code(
|
|
872
|
+
f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})"
|
|
873
|
+
)
|
|
855
874
|
|
|
856
875
|
fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(fuzzy_match_handler.join_mapping)
|
|
857
|
-
self._add_code(
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
876
|
+
self._add_code(
|
|
877
|
+
f"{var_name} = fuzzy_match_dfs(\n"
|
|
878
|
+
f" left_df={left_df}, right_df={right_df},\n"
|
|
879
|
+
f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
|
|
880
|
+
f" ).lazy()"
|
|
881
|
+
)
|
|
861
882
|
|
|
862
|
-
def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars:
|
|
883
|
+
def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: dict[str, str]) -> None:
|
|
863
884
|
"""Handle unique/distinct nodes."""
|
|
864
|
-
input_df = input_vars.get(
|
|
885
|
+
input_df = input_vars.get("main", "df")
|
|
865
886
|
|
|
866
887
|
if settings.unique_input.columns:
|
|
867
|
-
self._add_code(
|
|
888
|
+
self._add_code(
|
|
889
|
+
f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')"
|
|
890
|
+
)
|
|
868
891
|
else:
|
|
869
892
|
self._add_code(f"{var_name} = {input_df}.unique(keep='{settings.unique_input.strategy}')")
|
|
870
893
|
self._add_code("")
|
|
871
894
|
|
|
872
|
-
def _handle_text_to_rows(
|
|
895
|
+
def _handle_text_to_rows(
|
|
896
|
+
self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: dict[str, str]
|
|
897
|
+
) -> None:
|
|
873
898
|
"""Handle text to rows (explode) nodes."""
|
|
874
|
-
input_df = input_vars.get(
|
|
899
|
+
input_df = input_vars.get("main", "df")
|
|
875
900
|
text_input = settings.text_to_rows_input
|
|
876
901
|
|
|
877
902
|
# First split the column
|
|
@@ -884,96 +909,108 @@ class FlowGraphToPolarsConverter:
|
|
|
884
909
|
|
|
885
910
|
self._add_code(f"{var_name} = {input_df}.with_columns({split_expr}).explode('{explode_col}')")
|
|
886
911
|
self._add_code("")
|
|
912
|
+
|
|
887
913
|
# .with_columns(
|
|
888
914
|
# (pl.cum_count(record_id_settings.output_column_name)
|
|
889
915
|
# .over(record_id_settings.group_by_columns) + record_id_settings.offset - 1)
|
|
890
916
|
# .alias(record_id_settings.output_column_name)
|
|
891
917
|
# )
|
|
892
|
-
def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars:
|
|
918
|
+
def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: dict[str, str]) -> None:
|
|
893
919
|
"""Handle record ID nodes."""
|
|
894
|
-
input_df = input_vars.get(
|
|
920
|
+
input_df = input_vars.get("main", "df")
|
|
895
921
|
record_input = settings.record_id_input
|
|
896
922
|
if record_input.group_by and record_input.group_by_columns:
|
|
897
|
-
|
|
898
923
|
# Row number within groups
|
|
899
924
|
self._add_code(f"{var_name} = ({input_df}")
|
|
900
925
|
self._add_code(f" .with_columns(pl.lit(1).alias('{record_input.output_column_name}'))")
|
|
901
|
-
self._add_code(
|
|
902
|
-
self._add_code(
|
|
926
|
+
self._add_code(" .with_columns([")
|
|
927
|
+
self._add_code(
|
|
928
|
+
f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)"
|
|
929
|
+
)
|
|
903
930
|
self._add_code(f" .alias('{record_input.output_column_name}')")
|
|
904
931
|
self._add_code("])")
|
|
905
|
-
self._add_code(
|
|
932
|
+
self._add_code(
|
|
933
|
+
f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])"
|
|
934
|
+
)
|
|
906
935
|
self._add_code(")")
|
|
907
936
|
else:
|
|
908
937
|
# Simple row number
|
|
909
|
-
self._add_code(
|
|
938
|
+
self._add_code(
|
|
939
|
+
f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})"
|
|
940
|
+
)
|
|
910
941
|
self._add_code("")
|
|
911
942
|
|
|
912
|
-
def _handle_cross_join(
|
|
943
|
+
def _handle_cross_join(
|
|
944
|
+
self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: dict[str, str]
|
|
945
|
+
) -> None:
|
|
913
946
|
"""Handle cross join nodes."""
|
|
914
|
-
left_df = input_vars.get(
|
|
915
|
-
right_df = input_vars.get(
|
|
947
|
+
left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
|
|
948
|
+
right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
|
|
916
949
|
|
|
917
950
|
self._add_code(f"{var_name} = {left_df}.join({right_df}, how='cross')")
|
|
918
951
|
self._add_code("")
|
|
919
952
|
|
|
920
|
-
def _handle_cloud_storage_writer(
|
|
953
|
+
def _handle_cloud_storage_writer(
|
|
954
|
+
self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: dict[str, str]
|
|
955
|
+
) -> None:
|
|
921
956
|
"""Handle cloud storage writer nodes."""
|
|
922
|
-
input_df = input_vars.get(
|
|
957
|
+
input_df = input_vars.get("main", "df")
|
|
923
958
|
# def write_csv_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: Optional[str] = None) -> 'FlowFrame': ...
|
|
924
959
|
|
|
925
960
|
output_settings = settings.cloud_storage_settings
|
|
926
961
|
self.imports.add("import flowfile as ff")
|
|
927
962
|
self._add_code(f"(ff.FlowFrame({input_df})")
|
|
928
963
|
if output_settings.file_format == "csv":
|
|
929
|
-
self._add_code(
|
|
964
|
+
self._add_code(" .write_csv_to_cloud_storage(")
|
|
930
965
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
931
966
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
932
967
|
self._add_code(f' delimiter="{output_settings.csv_delimiter}",')
|
|
933
968
|
self._add_code(f' encoding="{output_settings.csv_encoding}",')
|
|
934
969
|
self._add_code(f' description="{settings.description}"')
|
|
935
970
|
elif output_settings.file_format == "parquet":
|
|
936
|
-
self._add_code(
|
|
971
|
+
self._add_code(" .write_parquet_to_cloud_storage(")
|
|
937
972
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
938
973
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
939
974
|
self._add_code(f' description="{settings.description}"')
|
|
940
975
|
elif output_settings.file_format == "json":
|
|
941
|
-
self._add_code(
|
|
976
|
+
self._add_code(" .write_json_to_cloud_storage(")
|
|
942
977
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
943
978
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
944
979
|
self._add_code(f' description="{settings.description}"')
|
|
945
980
|
elif output_settings.file_format == "delta":
|
|
946
|
-
self._add_code(
|
|
981
|
+
self._add_code(" .write_delta(")
|
|
947
982
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
948
983
|
self._add_code(f' write_mode="{output_settings.write_mode}",')
|
|
949
984
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
950
985
|
self._add_code(f' description="{settings.description}"')
|
|
951
|
-
self._add_code(
|
|
952
|
-
self._add_code(
|
|
986
|
+
self._add_code(" )")
|
|
987
|
+
self._add_code(")")
|
|
953
988
|
|
|
954
|
-
def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars:
|
|
989
|
+
def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: dict[str, str]) -> None:
|
|
955
990
|
"""Handle output nodes."""
|
|
956
|
-
input_df = input_vars.get(
|
|
991
|
+
input_df = input_vars.get("main", "df")
|
|
957
992
|
output_settings = settings.output_settings
|
|
958
993
|
|
|
959
|
-
if output_settings.file_type ==
|
|
960
|
-
self._add_code(f
|
|
994
|
+
if output_settings.file_type == "csv":
|
|
995
|
+
self._add_code(f"{input_df}.sink_csv(")
|
|
961
996
|
self._add_code(f' "{output_settings.abs_file_path}",')
|
|
962
997
|
self._add_code(f' separator="{output_settings.table_settings.delimiter}"')
|
|
963
|
-
self._add_code(
|
|
998
|
+
self._add_code(")")
|
|
964
999
|
|
|
965
|
-
elif output_settings.file_type ==
|
|
1000
|
+
elif output_settings.file_type == "parquet":
|
|
966
1001
|
self._add_code(f'{input_df}.sink_parquet("{output_settings.abs_file_path}")')
|
|
967
1002
|
|
|
968
|
-
elif output_settings.file_type ==
|
|
969
|
-
self._add_code(f
|
|
1003
|
+
elif output_settings.file_type == "excel":
|
|
1004
|
+
self._add_code(f"{input_df}.collect().write_excel(")
|
|
970
1005
|
self._add_code(f' "{output_settings.abs_file_path}",')
|
|
971
1006
|
self._add_code(f' worksheet="{output_settings.table_settings.sheet_name}"')
|
|
972
|
-
self._add_code(
|
|
1007
|
+
self._add_code(")")
|
|
973
1008
|
|
|
974
1009
|
self._add_code("")
|
|
975
1010
|
|
|
976
|
-
def _handle_polars_code(
|
|
1011
|
+
def _handle_polars_code(
|
|
1012
|
+
self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: dict[str, str]
|
|
1013
|
+
) -> None:
|
|
977
1014
|
"""Handle custom Polars code nodes."""
|
|
978
1015
|
code = settings.polars_code_input.polars_code.strip()
|
|
979
1016
|
# Determine function parameters based on number of inputs
|
|
@@ -990,7 +1027,7 @@ class FlowGraphToPolarsConverter:
|
|
|
990
1027
|
arg_list = []
|
|
991
1028
|
i = 1
|
|
992
1029
|
for key in sorted(input_vars.keys()):
|
|
993
|
-
if key.startswith(
|
|
1030
|
+
if key.startswith("main"):
|
|
994
1031
|
param_list.append(f"input_df_{i}: pl.LazyFrame")
|
|
995
1032
|
arg_list.append(input_vars[key])
|
|
996
1033
|
i += 1
|
|
@@ -1001,7 +1038,7 @@ class FlowGraphToPolarsConverter:
|
|
|
1001
1038
|
is_expression = "output_df" not in code
|
|
1002
1039
|
|
|
1003
1040
|
# Wrap the code in a function
|
|
1004
|
-
self._add_code(
|
|
1041
|
+
self._add_code("# Custom Polars code")
|
|
1005
1042
|
self._add_code(f"def _polars_code_{var_name.replace('df_', '')}({params}):")
|
|
1006
1043
|
|
|
1007
1044
|
# Handle the code based on its structure
|
|
@@ -1010,18 +1047,18 @@ class FlowGraphToPolarsConverter:
|
|
|
1010
1047
|
self._add_code(f" return {code}")
|
|
1011
1048
|
else:
|
|
1012
1049
|
# It contains assignments
|
|
1013
|
-
for line in code.split(
|
|
1050
|
+
for line in code.split("\n"):
|
|
1014
1051
|
if line.strip():
|
|
1015
1052
|
self._add_code(f" {line}")
|
|
1016
1053
|
|
|
1017
1054
|
# If no explicit return, try to detect what to return
|
|
1018
|
-
if
|
|
1055
|
+
if "return" not in code:
|
|
1019
1056
|
# Try to find the last assignment
|
|
1020
|
-
lines = [l.strip() for l in code.split(
|
|
1057
|
+
lines = [l.strip() for l in code.split("\n") if l.strip() and "=" in l]
|
|
1021
1058
|
if lines:
|
|
1022
1059
|
last_assignment = lines[-1]
|
|
1023
|
-
if
|
|
1024
|
-
output_var = last_assignment.split(
|
|
1060
|
+
if "=" in last_assignment:
|
|
1061
|
+
output_var = last_assignment.split("=")[0].strip()
|
|
1025
1062
|
self._add_code(f" return {output_var}")
|
|
1026
1063
|
|
|
1027
1064
|
self._add_code("")
|
|
@@ -1054,14 +1091,7 @@ class FlowGraphToPolarsConverter:
|
|
|
1054
1091
|
col, op, val = match.groups()
|
|
1055
1092
|
|
|
1056
1093
|
# Map operators
|
|
1057
|
-
op_map = {
|
|
1058
|
-
'=': '==',
|
|
1059
|
-
'!=': '!=',
|
|
1060
|
-
'>': '>',
|
|
1061
|
-
'<': '<',
|
|
1062
|
-
'>=': '>=',
|
|
1063
|
-
'<=': '<='
|
|
1064
|
-
}
|
|
1094
|
+
op_map = {"=": "==", "!=": "!=", ">": ">", "<": "<", ">=": ">=", "<=": "<="}
|
|
1065
1095
|
|
|
1066
1096
|
polars_op = op_map.get(op, op)
|
|
1067
1097
|
|
|
@@ -1075,45 +1105,129 @@ class FlowGraphToPolarsConverter:
|
|
|
1075
1105
|
return re.sub(pattern, replace_expr, expr)
|
|
1076
1106
|
|
|
1077
1107
|
def _create_basic_filter_expr(self, basic: transform_schema.BasicFilter) -> str:
|
|
1078
|
-
"""Create Polars expression from basic filter.
|
|
1108
|
+
"""Create Polars expression from basic filter.
|
|
1109
|
+
|
|
1110
|
+
Generates proper Polars code for all supported filter operators.
|
|
1111
|
+
|
|
1112
|
+
Args:
|
|
1113
|
+
basic: The BasicFilter configuration.
|
|
1114
|
+
|
|
1115
|
+
Returns:
|
|
1116
|
+
A string containing valid Polars filter expression code.
|
|
1117
|
+
"""
|
|
1118
|
+
from flowfile_core.schemas.transform_schema import FilterOperator
|
|
1119
|
+
|
|
1079
1120
|
col = f'pl.col("{basic.field}")'
|
|
1121
|
+
value = basic.value
|
|
1122
|
+
value2 = basic.value2
|
|
1123
|
+
|
|
1124
|
+
# Determine if value is numeric (for proper quoting)
|
|
1125
|
+
is_numeric = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
|
|
1126
|
+
|
|
1127
|
+
# Get the operator
|
|
1128
|
+
try:
|
|
1129
|
+
operator = basic.get_operator()
|
|
1130
|
+
except (ValueError, AttributeError):
|
|
1131
|
+
operator = FilterOperator.from_symbol(str(basic.operator))
|
|
1132
|
+
|
|
1133
|
+
# Generate expression based on operator
|
|
1134
|
+
if operator == FilterOperator.EQUALS:
|
|
1135
|
+
if is_numeric:
|
|
1136
|
+
return f"{col} == {value}"
|
|
1137
|
+
return f'{col} == "{value}"'
|
|
1138
|
+
|
|
1139
|
+
elif operator == FilterOperator.NOT_EQUALS:
|
|
1140
|
+
if is_numeric:
|
|
1141
|
+
return f"{col} != {value}"
|
|
1142
|
+
return f'{col} != "{value}"'
|
|
1143
|
+
|
|
1144
|
+
elif operator == FilterOperator.GREATER_THAN:
|
|
1145
|
+
if is_numeric:
|
|
1146
|
+
return f"{col} > {value}"
|
|
1147
|
+
return f'{col} > "{value}"'
|
|
1148
|
+
|
|
1149
|
+
elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
|
|
1150
|
+
if is_numeric:
|
|
1151
|
+
return f"{col} >= {value}"
|
|
1152
|
+
return f'{col} >= "{value}"'
|
|
1153
|
+
|
|
1154
|
+
elif operator == FilterOperator.LESS_THAN:
|
|
1155
|
+
if is_numeric:
|
|
1156
|
+
return f"{col} < {value}"
|
|
1157
|
+
return f'{col} < "{value}"'
|
|
1158
|
+
|
|
1159
|
+
elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
|
|
1160
|
+
if is_numeric:
|
|
1161
|
+
return f"{col} <= {value}"
|
|
1162
|
+
return f'{col} <= "{value}"'
|
|
1163
|
+
|
|
1164
|
+
elif operator == FilterOperator.CONTAINS:
|
|
1165
|
+
return f'{col}.str.contains("{value}")'
|
|
1166
|
+
|
|
1167
|
+
elif operator == FilterOperator.NOT_CONTAINS:
|
|
1168
|
+
return f'{col}.str.contains("{value}").not_()'
|
|
1169
|
+
|
|
1170
|
+
elif operator == FilterOperator.STARTS_WITH:
|
|
1171
|
+
return f'{col}.str.starts_with("{value}")'
|
|
1172
|
+
|
|
1173
|
+
elif operator == FilterOperator.ENDS_WITH:
|
|
1174
|
+
return f'{col}.str.ends_with("{value}")'
|
|
1175
|
+
|
|
1176
|
+
elif operator == FilterOperator.IS_NULL:
|
|
1177
|
+
return f"{col}.is_null()"
|
|
1178
|
+
|
|
1179
|
+
elif operator == FilterOperator.IS_NOT_NULL:
|
|
1180
|
+
return f"{col}.is_not_null()"
|
|
1181
|
+
|
|
1182
|
+
elif operator == FilterOperator.IN:
|
|
1183
|
+
values = [v.strip() for v in value.split(",")]
|
|
1184
|
+
if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
|
|
1185
|
+
values_str = ", ".join(values)
|
|
1186
|
+
else:
|
|
1187
|
+
values_str = ", ".join(f'"{v}"' for v in values)
|
|
1188
|
+
return f"{col}.is_in([{values_str}])"
|
|
1189
|
+
|
|
1190
|
+
elif operator == FilterOperator.NOT_IN:
|
|
1191
|
+
values = [v.strip() for v in value.split(",")]
|
|
1192
|
+
if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
|
|
1193
|
+
values_str = ", ".join(values)
|
|
1194
|
+
else:
|
|
1195
|
+
values_str = ", ".join(f'"{v}"' for v in values)
|
|
1196
|
+
return f"{col}.is_in([{values_str}]).not_()"
|
|
1197
|
+
|
|
1198
|
+
elif operator == FilterOperator.BETWEEN:
|
|
1199
|
+
if value2 is None:
|
|
1200
|
+
return f"{col} # BETWEEN requires two values"
|
|
1201
|
+
if is_numeric and value2.replace(".", "", 1).replace("-", "", 1).isnumeric():
|
|
1202
|
+
return f"({col} >= {value}) & ({col} <= {value2})"
|
|
1203
|
+
return f'({col} >= "{value}") & ({col} <= "{value2}")'
|
|
1080
1204
|
|
|
1081
|
-
|
|
1082
|
-
return f'{col} == "{basic.filter_value}"'
|
|
1083
|
-
elif basic.filter_type == 'not_equals':
|
|
1084
|
-
return f'{col} != "{basic.filter_value}"'
|
|
1085
|
-
elif basic.filter_type == 'greater':
|
|
1086
|
-
return f'{col} > {basic.filter_value}'
|
|
1087
|
-
elif basic.filter_type == 'less':
|
|
1088
|
-
return f'{col} < {basic.filter_value}'
|
|
1089
|
-
elif basic.filter_type == 'in':
|
|
1090
|
-
values = basic.filter_value.split(',')
|
|
1091
|
-
return f"pl.col('{col}').is_in({values})"
|
|
1205
|
+
# Fallback
|
|
1092
1206
|
return col
|
|
1093
1207
|
|
|
1094
1208
|
def _get_polars_dtype(self, dtype_str: str) -> str:
|
|
1095
1209
|
"""Convert Flowfile dtype string to Polars dtype."""
|
|
1096
1210
|
dtype_map = {
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1211
|
+
"String": "pl.Utf8",
|
|
1212
|
+
"Integer": "pl.Int64",
|
|
1213
|
+
"Double": "pl.Float64",
|
|
1214
|
+
"Boolean": "pl.Boolean",
|
|
1215
|
+
"Date": "pl.Date",
|
|
1216
|
+
"Datetime": "pl.Datetime",
|
|
1217
|
+
"Float32": "pl.Float32",
|
|
1218
|
+
"Float64": "pl.Float64",
|
|
1219
|
+
"Int32": "pl.Int32",
|
|
1220
|
+
"Int64": "pl.Int64",
|
|
1221
|
+
"Utf8": "pl.Utf8",
|
|
1108
1222
|
}
|
|
1109
|
-
return dtype_map.get(dtype_str,
|
|
1223
|
+
return dtype_map.get(dtype_str, "pl.Utf8")
|
|
1110
1224
|
|
|
1111
1225
|
def _get_agg_function(self, agg: str) -> str:
|
|
1112
1226
|
"""Get Polars aggregation function name."""
|
|
1113
1227
|
agg_map = {
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1228
|
+
"avg": "mean",
|
|
1229
|
+
"average": "mean",
|
|
1230
|
+
"concat": "str.concat",
|
|
1117
1231
|
}
|
|
1118
1232
|
return agg_map.get(agg, agg)
|
|
1119
1233
|
|
|
@@ -1126,12 +1240,12 @@ class FlowGraphToPolarsConverter:
|
|
|
1126
1240
|
import re
|
|
1127
1241
|
|
|
1128
1242
|
# Pattern for column names (simplified)
|
|
1129
|
-
col_pattern = r
|
|
1243
|
+
col_pattern = r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b"
|
|
1130
1244
|
|
|
1131
1245
|
def replace_col(match):
|
|
1132
1246
|
col_name = match.group(1)
|
|
1133
1247
|
# Skip SQL keywords
|
|
1134
|
-
keywords = {
|
|
1248
|
+
keywords = {"CASE", "WHEN", "THEN", "ELSE", "END", "AND", "OR", "NOT", "IN", "AS"}
|
|
1135
1249
|
if col_name.upper() in keywords:
|
|
1136
1250
|
return col_name
|
|
1137
1251
|
return f'pl.col("{col_name}")'
|
|
@@ -1139,13 +1253,13 @@ class FlowGraphToPolarsConverter:
|
|
|
1139
1253
|
result = re.sub(col_pattern, replace_col, sql_expr)
|
|
1140
1254
|
|
|
1141
1255
|
# Handle CASE WHEN
|
|
1142
|
-
if
|
|
1256
|
+
if "CASE" in result:
|
|
1143
1257
|
# This would need proper parsing
|
|
1144
1258
|
result = "pl.when(...).then(...).otherwise(...)"
|
|
1145
1259
|
|
|
1146
1260
|
return result
|
|
1147
1261
|
|
|
1148
|
-
def add_return_code(self, lines:
|
|
1262
|
+
def add_return_code(self, lines: list[str]) -> None:
|
|
1149
1263
|
if self.output_nodes:
|
|
1150
1264
|
# Return marked output nodes
|
|
1151
1265
|
if len(self.output_nodes) == 1:
|
|
@@ -1175,8 +1289,8 @@ class FlowGraphToPolarsConverter:
|
|
|
1175
1289
|
# Add main function
|
|
1176
1290
|
lines.append("def run_etl_pipeline():")
|
|
1177
1291
|
lines.append(' """')
|
|
1178
|
-
lines.append(f
|
|
1179
|
-
lines.append(
|
|
1292
|
+
lines.append(f" ETL Pipeline: {self.flow_graph.__name__}")
|
|
1293
|
+
lines.append(" Generated from Flowfile")
|
|
1180
1294
|
lines.append(' """')
|
|
1181
1295
|
lines.append(" ")
|
|
1182
1296
|
|