Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +179 -73
- flowfile/__main__.py +10 -7
- flowfile/api.py +52 -59
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
- flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
- flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
- flowfile-0.5.3.dist-info/RECORD +402 -0
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +13 -3
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +391 -279
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
- flowfile_core/flowfile/flow_graph.py +1011 -561
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +332 -232
- flowfile_core/flowfile/flow_node/models.py +54 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +82 -32
- flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
- flowfile_core/flowfile/manage/io_flowfile.py +391 -0
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +136 -35
- flowfile_core/flowfile/schema_callbacks.py +77 -54
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +72 -55
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +77 -43
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -55
- flowfile_core/schemas/input_schema.py +398 -154
- flowfile_core/schemas/output_model.py +50 -35
- flowfile_core/schemas/schemas.py +207 -67
- flowfile_core/schemas/transform_schema.py +1360 -435
- flowfile_core/schemas/yaml_types.py +117 -0
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +107 -50
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +581 -489
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +236 -252
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -4
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -27
- flowfile_worker/create/funcs.py +143 -94
- flowfile_worker/create/models.py +139 -68
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -93
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +118 -0
- tools/migrate/legacy_schemas.py +682 -0
- tools/migrate/migrate.py +610 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +21 -0
- tools/migrate/tests/test_migrate.py +622 -0
- tools/migrate/tests/test_migration_e2e.py +1009 -0
- tools/migrate/tests/test_node_migrations.py +843 -0
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-812dcbca.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-538058f3.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.4.1.dist-info/RECORD +0 -376
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -1,15 +1,13 @@
|
|
|
1
|
-
from typing import List, Dict, Optional, Set, Tuple
|
|
2
1
|
import polars as pl
|
|
3
|
-
|
|
4
2
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
5
3
|
|
|
6
|
-
from flowfile_core.
|
|
4
|
+
from flowfile_core.configs import logger
|
|
7
5
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
|
|
8
6
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
7
|
+
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
9
8
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
10
9
|
from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
|
|
11
10
|
from flowfile_core.schemas import input_schema, transform_schema
|
|
12
|
-
from flowfile_core.configs import logger
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
class FlowGraphToPolarsConverter:
|
|
@@ -19,18 +17,19 @@ class FlowGraphToPolarsConverter:
|
|
|
19
17
|
This class takes a FlowGraph instance and generates standalone Python code
|
|
20
18
|
that uses only Polars, without any Flowfile dependencies.
|
|
21
19
|
"""
|
|
20
|
+
|
|
22
21
|
flow_graph: FlowGraph
|
|
23
|
-
node_var_mapping:
|
|
24
|
-
imports:
|
|
25
|
-
code_lines:
|
|
26
|
-
output_nodes:
|
|
27
|
-
last_node_var:
|
|
22
|
+
node_var_mapping: dict[int, str]
|
|
23
|
+
imports: set[str]
|
|
24
|
+
code_lines: list[str]
|
|
25
|
+
output_nodes: list[tuple[int, str]] = []
|
|
26
|
+
last_node_var: str | None = None
|
|
28
27
|
|
|
29
28
|
def __init__(self, flow_graph: FlowGraph):
|
|
30
29
|
self.flow_graph = flow_graph
|
|
31
|
-
self.node_var_mapping:
|
|
32
|
-
self.imports:
|
|
33
|
-
self.code_lines:
|
|
30
|
+
self.node_var_mapping: dict[int, str] = {} # Maps node_id to variable name
|
|
31
|
+
self.imports: set[str] = {"import polars as pl"}
|
|
32
|
+
self.code_lines: list[str] = []
|
|
34
33
|
self.output_nodes = []
|
|
35
34
|
self.last_node_var = None
|
|
36
35
|
|
|
@@ -44,7 +43,7 @@ class FlowGraphToPolarsConverter:
|
|
|
44
43
|
# Get execution order
|
|
45
44
|
execution_order = determine_execution_order(
|
|
46
45
|
all_nodes=[node for node in self.flow_graph.nodes if node.is_correct],
|
|
47
|
-
flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes()
|
|
46
|
+
flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes(),
|
|
48
47
|
)
|
|
49
48
|
|
|
50
49
|
# Generate code for each node in order
|
|
@@ -56,14 +55,13 @@ class FlowGraphToPolarsConverter:
|
|
|
56
55
|
|
|
57
56
|
def handle_output_node(self, node: FlowNode, var_name: str) -> None:
|
|
58
57
|
settings = node.setting_input
|
|
59
|
-
if hasattr(settings,
|
|
58
|
+
if hasattr(settings, "is_flow_output") and settings.is_flow_output:
|
|
60
59
|
self.output_nodes.append((node.node_id, var_name))
|
|
61
60
|
|
|
62
61
|
def _generate_node_code(self, node: FlowNode) -> None:
|
|
63
62
|
"""Generate Polars code for a specific node."""
|
|
64
63
|
node_type = node.node_type
|
|
65
64
|
settings = node.setting_input
|
|
66
|
-
# Skip placeholder nodes
|
|
67
65
|
if isinstance(settings, input_schema.NodePromise):
|
|
68
66
|
self._add_comment(f"# Skipping uninitialized node: {node.node_id}")
|
|
69
67
|
return
|
|
@@ -71,7 +69,7 @@ class FlowGraphToPolarsConverter:
|
|
|
71
69
|
var_name = f"df_{node.node_id}"
|
|
72
70
|
self.node_var_mapping[node.node_id] = var_name
|
|
73
71
|
self.handle_output_node(node, var_name)
|
|
74
|
-
if node.node_template.output>0:
|
|
72
|
+
if node.node_template.output > 0:
|
|
75
73
|
self.last_node_var = var_name
|
|
76
74
|
# Get input variable names
|
|
77
75
|
input_vars = self._get_input_vars(node)
|
|
@@ -83,67 +81,59 @@ class FlowGraphToPolarsConverter:
|
|
|
83
81
|
self._add_comment(f"# TODO: Implement handler for node type: {node_type}")
|
|
84
82
|
raise Exception(f"No handler implemented for node type: {node_type}")
|
|
85
83
|
|
|
86
|
-
def _get_input_vars(self, node: FlowNode) ->
|
|
84
|
+
def _get_input_vars(self, node: FlowNode) -> dict[str, str]:
|
|
87
85
|
"""Get input variable names for a node."""
|
|
88
86
|
input_vars = {}
|
|
89
87
|
|
|
90
88
|
if node.node_inputs.main_inputs:
|
|
91
89
|
if len(node.node_inputs.main_inputs) == 1:
|
|
92
|
-
input_vars[
|
|
93
|
-
node.node_inputs.main_inputs[0].node_id, 'df'
|
|
94
|
-
)
|
|
90
|
+
input_vars["main"] = self.node_var_mapping.get(node.node_inputs.main_inputs[0].node_id, "df")
|
|
95
91
|
else:
|
|
96
92
|
for i, input_node in enumerate(node.node_inputs.main_inputs):
|
|
97
|
-
input_vars[f
|
|
98
|
-
input_node.node_id, f'df_{i}'
|
|
99
|
-
)
|
|
93
|
+
input_vars[f"main_{i}"] = self.node_var_mapping.get(input_node.node_id, f"df_{i}")
|
|
100
94
|
|
|
101
95
|
if node.node_inputs.left_input:
|
|
102
|
-
input_vars[
|
|
103
|
-
node.node_inputs.left_input.node_id, 'df_left'
|
|
104
|
-
)
|
|
96
|
+
input_vars["left"] = self.node_var_mapping.get(node.node_inputs.left_input.node_id, "df_left")
|
|
105
97
|
|
|
106
98
|
if node.node_inputs.right_input:
|
|
107
|
-
input_vars[
|
|
108
|
-
node.node_inputs.right_input.node_id, 'df_right'
|
|
109
|
-
)
|
|
99
|
+
input_vars["right"] = self.node_var_mapping.get(node.node_inputs.right_input.node_id, "df_right")
|
|
110
100
|
|
|
111
101
|
return input_vars
|
|
112
102
|
|
|
113
103
|
def _handle_csv_read(self, file_settings: input_schema.ReceivedTable, var_name: str):
|
|
114
|
-
if file_settings.encoding.lower() in (
|
|
104
|
+
if file_settings.table_settings.encoding.lower() in ("utf-8", "utf8"):
|
|
115
105
|
encoding = "utf8-lossy"
|
|
116
106
|
self._add_code(f"{var_name} = pl.scan_csv(")
|
|
117
107
|
self._add_code(f' "{file_settings.abs_file_path}",')
|
|
118
|
-
self._add_code(f' separator="{file_settings.delimiter}",')
|
|
119
|
-
self._add_code(f
|
|
120
|
-
self._add_code(f
|
|
108
|
+
self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
|
|
109
|
+
self._add_code(f" has_header={file_settings.table_settings.has_headers},")
|
|
110
|
+
self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
|
|
121
111
|
self._add_code(f' encoding="{encoding}",')
|
|
122
|
-
self._add_code(f
|
|
112
|
+
self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
|
|
123
113
|
self._add_code(")")
|
|
124
114
|
else:
|
|
125
115
|
self._add_code(f"{var_name} = pl.read_csv(")
|
|
126
116
|
self._add_code(f' "{file_settings.abs_file_path}",')
|
|
127
|
-
self._add_code(f' separator="{file_settings.delimiter}",')
|
|
128
|
-
self._add_code(f
|
|
129
|
-
self._add_code(f
|
|
130
|
-
if file_settings.encoding:
|
|
131
|
-
self._add_code(f' encoding="{file_settings.encoding}",')
|
|
132
|
-
self._add_code(f
|
|
117
|
+
self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
|
|
118
|
+
self._add_code(f" has_header={file_settings.table_settings.has_headers},")
|
|
119
|
+
self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
|
|
120
|
+
if file_settings.table_settings.encoding:
|
|
121
|
+
self._add_code(f' encoding="{file_settings.table_settings.encoding}",')
|
|
122
|
+
self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
|
|
133
123
|
self._add_code(").lazy()")
|
|
134
124
|
|
|
135
|
-
def _handle_cloud_storage_reader(
|
|
125
|
+
def _handle_cloud_storage_reader(
|
|
126
|
+
self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: dict[str, str]
|
|
127
|
+
):
|
|
136
128
|
cloud_read_settings = settings.cloud_storage_settings
|
|
137
|
-
self.imports.add(
|
|
138
|
-
"import flowfile as ff"
|
|
139
|
-
)
|
|
129
|
+
self.imports.add("import flowfile as ff")
|
|
140
130
|
if cloud_read_settings.file_format == "csv":
|
|
141
131
|
self._add_code(f"{var_name} = ff.scan_csv_from_cloud_storage(")
|
|
142
132
|
self._add_code(f' "{cloud_read_settings.resource_path}",')
|
|
143
133
|
self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
|
|
144
134
|
self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
|
|
145
135
|
self._add_code(f' delimiter="{cloud_read_settings.csv_delimiter}",')
|
|
146
|
-
self._add_code(f
|
|
136
|
+
self._add_code(f" has_header={cloud_read_settings.csv_has_header},")
|
|
147
137
|
self._add_code(f' encoding="{cloud_read_settings.csv_encoding}",')
|
|
148
138
|
|
|
149
139
|
elif cloud_read_settings.file_format == "parquet":
|
|
@@ -163,37 +153,43 @@ class FlowGraphToPolarsConverter:
|
|
|
163
153
|
self._add_code(f' "{cloud_read_settings.resource_path}",')
|
|
164
154
|
self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
|
|
165
155
|
self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
|
|
166
|
-
self._add_code(f
|
|
156
|
+
self._add_code(f" version_id={cloud_read_settings.delta_version},")
|
|
167
157
|
else:
|
|
168
158
|
return
|
|
169
159
|
self._add_code(").data")
|
|
170
160
|
|
|
171
|
-
def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars:
|
|
161
|
+
def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: dict[str, str]) -> None:
|
|
172
162
|
"""Handle file reading nodes."""
|
|
173
163
|
file_settings = settings.received_file
|
|
174
164
|
|
|
175
|
-
if file_settings.file_type ==
|
|
165
|
+
if file_settings.file_type == "csv":
|
|
176
166
|
self._handle_csv_read(file_settings, var_name)
|
|
177
167
|
|
|
178
|
-
elif file_settings.file_type ==
|
|
168
|
+
elif file_settings.file_type == "parquet":
|
|
179
169
|
self._add_code(f'{var_name} = pl.scan_parquet("{file_settings.abs_file_path}")')
|
|
180
170
|
|
|
181
|
-
elif file_settings.file_type in (
|
|
171
|
+
elif file_settings.file_type in ("xlsx", "excel"):
|
|
182
172
|
self._add_code(f"{var_name} = pl.read_excel(")
|
|
183
173
|
self._add_code(f' "{file_settings.abs_file_path}",')
|
|
184
|
-
if file_settings.sheet_name:
|
|
185
|
-
self._add_code(f' sheet_name="{file_settings.sheet_name}",')
|
|
174
|
+
if file_settings.table_settings.sheet_name:
|
|
175
|
+
self._add_code(f' sheet_name="{file_settings.table_settings.sheet_name}",')
|
|
186
176
|
self._add_code(").lazy()")
|
|
187
177
|
|
|
188
178
|
self._add_code("")
|
|
189
179
|
|
|
190
180
|
@staticmethod
|
|
191
|
-
def _generate_pl_schema_with_typing(flowfile_schema:
|
|
192
|
-
polars_schema_str =
|
|
193
|
-
|
|
181
|
+
def _generate_pl_schema_with_typing(flowfile_schema: list[FlowfileColumn]) -> str:
|
|
182
|
+
polars_schema_str = (
|
|
183
|
+
"pl.Schema(["
|
|
184
|
+
+ ", ".join(
|
|
185
|
+
f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
|
|
186
|
+
for flowfile_column in flowfile_schema
|
|
187
|
+
)
|
|
188
|
+
+ "])"
|
|
189
|
+
)
|
|
194
190
|
return polars_schema_str
|
|
195
191
|
|
|
196
|
-
def get_manual_schema_input(self, flowfile_schema:
|
|
192
|
+
def get_manual_schema_input(self, flowfile_schema: list[FlowfileColumn]) -> str:
|
|
197
193
|
polars_schema_str = self._generate_pl_schema_with_typing(flowfile_schema)
|
|
198
194
|
is_valid_pl_schema = self._validate_pl_schema(polars_schema_str)
|
|
199
195
|
if is_valid_pl_schema:
|
|
@@ -211,19 +207,23 @@ class FlowGraphToPolarsConverter:
|
|
|
211
207
|
logger.error(f"Invalid Polars schema: {e}")
|
|
212
208
|
return False
|
|
213
209
|
|
|
214
|
-
def _handle_manual_input(
|
|
210
|
+
def _handle_manual_input(
|
|
211
|
+
self, settings: input_schema.NodeManualInput, var_name: str, input_vars: dict[str, str]
|
|
212
|
+
) -> None:
|
|
215
213
|
"""Handle manual data input nodes."""
|
|
216
214
|
data = settings.raw_data_format.data
|
|
217
|
-
flowfile_schema = list(
|
|
215
|
+
flowfile_schema = list(
|
|
216
|
+
FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns
|
|
217
|
+
)
|
|
218
218
|
schema = self.get_manual_schema_input(flowfile_schema)
|
|
219
219
|
self._add_code(f"{var_name} = pl.LazyFrame({data}, schema={schema}, strict=False)")
|
|
220
220
|
self._add_code("")
|
|
221
221
|
|
|
222
|
-
def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars:
|
|
222
|
+
def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: dict[str, str]) -> None:
|
|
223
223
|
"""Handle filter nodes."""
|
|
224
|
-
input_df = input_vars.get(
|
|
224
|
+
input_df = input_vars.get("main", "df")
|
|
225
225
|
|
|
226
|
-
if settings.filter_input.
|
|
226
|
+
if settings.filter_input.is_advanced():
|
|
227
227
|
# Parse the advanced filter expression
|
|
228
228
|
self.imports.add(
|
|
229
229
|
"from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr"
|
|
@@ -234,28 +234,33 @@ class FlowGraphToPolarsConverter:
|
|
|
234
234
|
else:
|
|
235
235
|
# Handle basic filter
|
|
236
236
|
basic = settings.filter_input.basic_filter
|
|
237
|
-
|
|
238
|
-
|
|
237
|
+
if basic is not None:
|
|
238
|
+
filter_expr = self._create_basic_filter_expr(basic)
|
|
239
|
+
self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
|
|
240
|
+
else:
|
|
241
|
+
self._add_code(f"{var_name} = {input_df} # No filter applied")
|
|
239
242
|
self._add_code("")
|
|
240
243
|
|
|
241
|
-
def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars:
|
|
242
|
-
input_df = input_vars.get(
|
|
244
|
+
def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: dict[str, str]):
|
|
245
|
+
input_df = input_vars.get("main", "df")
|
|
243
246
|
self._add_code(f"{var_name} = {input_df}.select(pl.len().alias('number_of_records'))")
|
|
244
247
|
|
|
245
|
-
def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars:
|
|
246
|
-
input_df = input_vars.get(
|
|
248
|
+
def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: dict[str, str]):
|
|
249
|
+
input_df = input_vars.get("main", "df")
|
|
247
250
|
from_col_name = settings.graph_solver_input.col_from
|
|
248
251
|
to_col_name = settings.graph_solver_input.col_to
|
|
249
252
|
output_col_name = settings.graph_solver_input.output_column_name
|
|
250
|
-
self._add_code(
|
|
251
|
-
|
|
252
|
-
|
|
253
|
+
self._add_code(
|
|
254
|
+
f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
|
|
255
|
+
f'pl.col("{to_col_name}"))'
|
|
256
|
+
f'.alias("{output_col_name}"))'
|
|
257
|
+
)
|
|
253
258
|
self._add_code("")
|
|
254
259
|
self.imports.add("from polars_grouper import graph_solver")
|
|
255
260
|
|
|
256
|
-
def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars:
|
|
261
|
+
def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: dict[str, str]) -> None:
|
|
257
262
|
"""Handle select/rename nodes."""
|
|
258
|
-
input_df = input_vars.get(
|
|
263
|
+
input_df = input_vars.get("main", "df")
|
|
259
264
|
# Get columns to keep and renames
|
|
260
265
|
select_exprs = []
|
|
261
266
|
for select_input in settings.select_input:
|
|
@@ -267,7 +272,7 @@ class FlowGraphToPolarsConverter:
|
|
|
267
272
|
|
|
268
273
|
if (select_input.data_type_change or select_input.is_altered) and select_input.data_type:
|
|
269
274
|
polars_dtype = self._get_polars_dtype(select_input.data_type)
|
|
270
|
-
expr = f
|
|
275
|
+
expr = f"{expr}.cast({polars_dtype})"
|
|
271
276
|
|
|
272
277
|
select_exprs.append(expr)
|
|
273
278
|
|
|
@@ -280,7 +285,7 @@ class FlowGraphToPolarsConverter:
|
|
|
280
285
|
self._add_code(f"{var_name} = {input_df}")
|
|
281
286
|
self._add_code("")
|
|
282
287
|
|
|
283
|
-
def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars:
|
|
288
|
+
def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: dict[str, str]) -> None:
|
|
284
289
|
"""Handle join nodes by routing to appropriate join type handler.
|
|
285
290
|
|
|
286
291
|
This is the main entry point for processing join operations. It determines
|
|
@@ -294,9 +299,8 @@ class FlowGraphToPolarsConverter:
|
|
|
294
299
|
Returns:
|
|
295
300
|
None: Modifies internal state by adding generated code
|
|
296
301
|
"""
|
|
297
|
-
left_df = input_vars.get(
|
|
298
|
-
right_df = input_vars.get(
|
|
299
|
-
|
|
302
|
+
left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
|
|
303
|
+
right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
|
|
300
304
|
# Ensure left and right DataFrames are distinct
|
|
301
305
|
if left_df == right_df:
|
|
302
306
|
right_df = "df_right"
|
|
@@ -307,8 +311,9 @@ class FlowGraphToPolarsConverter:
|
|
|
307
311
|
else:
|
|
308
312
|
self._handle_standard_join(settings, var_name, left_df, right_df)
|
|
309
313
|
|
|
310
|
-
def _handle_semi_anti_join(
|
|
311
|
-
|
|
314
|
+
def _handle_semi_anti_join(
|
|
315
|
+
self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
|
|
316
|
+
) -> None:
|
|
312
317
|
"""Handle semi and anti joins which only return rows from the left DataFrame.
|
|
313
318
|
|
|
314
319
|
Semi joins return rows from left DataFrame that have matches in right.
|
|
@@ -335,8 +340,9 @@ class FlowGraphToPolarsConverter:
|
|
|
335
340
|
self._add_code(" )")
|
|
336
341
|
self._add_code(")")
|
|
337
342
|
|
|
338
|
-
def _handle_standard_join(
|
|
339
|
-
|
|
343
|
+
def _handle_standard_join(
|
|
344
|
+
self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
|
|
345
|
+
) -> None:
|
|
340
346
|
"""Handle standard joins (left, right, inner, outer) with full column management.
|
|
341
347
|
|
|
342
348
|
Standard joins may include columns from both DataFrames and require careful
|
|
@@ -359,26 +365,24 @@ class FlowGraphToPolarsConverter:
|
|
|
359
365
|
Returns:
|
|
360
366
|
None: Modifies internal state by adding generated code
|
|
361
367
|
"""
|
|
362
|
-
settings.join_input
|
|
363
|
-
|
|
368
|
+
join_input_manager = transform_schema.JoinInputManager(settings.join_input)
|
|
369
|
+
join_input_manager.auto_rename()
|
|
364
370
|
# Get join keys
|
|
365
|
-
left_on, right_on = self._get_join_keys(
|
|
371
|
+
left_on, right_on = self._get_join_keys(join_input_manager)
|
|
366
372
|
|
|
367
373
|
# Apply pre-join transformations
|
|
368
|
-
left_df, right_df = self._apply_pre_join_transformations(
|
|
369
|
-
|
|
374
|
+
left_df, right_df = self._apply_pre_join_transformations(join_input_manager, left_df, right_df)
|
|
370
375
|
# Handle join-specific key transformations
|
|
371
376
|
left_on, right_on, reverse_action, after_join_drop_cols = self._handle_join_key_transformations(
|
|
372
|
-
|
|
377
|
+
join_input_manager, left_df, right_df, left_on, right_on
|
|
373
378
|
)
|
|
374
|
-
|
|
375
379
|
# Execute the join
|
|
376
380
|
self._execute_join_with_post_processing(
|
|
377
|
-
settings, var_name, left_df, right_df, left_on, right_on,
|
|
378
|
-
after_join_drop_cols, reverse_action
|
|
381
|
+
settings, var_name, left_df, right_df, left_on, right_on, after_join_drop_cols, reverse_action
|
|
379
382
|
)
|
|
380
383
|
|
|
381
|
-
|
|
384
|
+
@staticmethod
|
|
385
|
+
def _get_join_keys(settings: transform_schema.JoinInputManager) -> tuple[list[str], list[str]]:
|
|
382
386
|
"""Extract join keys based on join type.
|
|
383
387
|
|
|
384
388
|
Different join types require different handling of join keys:
|
|
@@ -391,17 +395,18 @@ class FlowGraphToPolarsConverter:
|
|
|
391
395
|
Returns:
|
|
392
396
|
Tuple[List[str], List[str]]: Lists of (left_on, right_on) column names
|
|
393
397
|
"""
|
|
394
|
-
left_on = [jm.left_col for jm in settings.
|
|
398
|
+
left_on = [jm.left_col for jm in settings.get_names_for_table_rename()]
|
|
395
399
|
|
|
396
|
-
if settings.
|
|
397
|
-
right_on = [jm.right_col for jm in settings.
|
|
400
|
+
if settings.how in ("outer", "right"):
|
|
401
|
+
right_on = [jm.right_col for jm in settings.get_names_for_table_rename()]
|
|
398
402
|
else:
|
|
399
|
-
right_on = [jm.right_col for jm in settings.
|
|
403
|
+
right_on = [jm.right_col for jm in settings.join_mapping]
|
|
400
404
|
|
|
401
405
|
return left_on, right_on
|
|
402
406
|
|
|
403
|
-
def _apply_pre_join_transformations(
|
|
404
|
-
str, str
|
|
407
|
+
def _apply_pre_join_transformations(
|
|
408
|
+
self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str
|
|
409
|
+
) -> tuple[str, str]:
|
|
405
410
|
"""Apply column renames and drops before the join operation.
|
|
406
411
|
|
|
407
412
|
Pre-join transformations prepare DataFrames by:
|
|
@@ -421,25 +426,22 @@ class FlowGraphToPolarsConverter:
|
|
|
421
426
|
# Calculate renames and drops
|
|
422
427
|
right_renames = {
|
|
423
428
|
column.old_name: column.new_name
|
|
424
|
-
for column in settings.
|
|
425
|
-
if
|
|
426
|
-
column.old_name != column.new_name and not column.join_key or settings.join_input.how in ("outer", "right")
|
|
429
|
+
for column in settings.right_select.renames
|
|
430
|
+
if column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
|
|
427
431
|
}
|
|
428
432
|
|
|
429
433
|
left_renames = {
|
|
430
434
|
column.old_name: column.new_name
|
|
431
|
-
for column in settings.
|
|
435
|
+
for column in settings.left_select.renames
|
|
432
436
|
if column.old_name != column.new_name
|
|
433
437
|
}
|
|
434
438
|
|
|
435
439
|
left_drop_columns = [
|
|
436
|
-
column.old_name for column in settings.
|
|
437
|
-
if not column.keep and not column.join_key
|
|
440
|
+
column.old_name for column in settings.left_select.renames if not column.keep and not column.join_key
|
|
438
441
|
]
|
|
439
442
|
|
|
440
443
|
right_drop_columns = [
|
|
441
|
-
column.old_name for column in settings.
|
|
442
|
-
if not column.keep and not column.join_key
|
|
444
|
+
column.old_name for column in settings.right_select.renames if not column.keep and not column.join_key
|
|
443
445
|
]
|
|
444
446
|
|
|
445
447
|
# Apply transformations
|
|
@@ -454,9 +456,14 @@ class FlowGraphToPolarsConverter:
|
|
|
454
456
|
|
|
455
457
|
return left_df, right_df
|
|
456
458
|
|
|
457
|
-
def _handle_join_key_transformations(
|
|
458
|
-
|
|
459
|
-
|
|
459
|
+
def _handle_join_key_transformations(
|
|
460
|
+
self,
|
|
461
|
+
settings: transform_schema.JoinInputManager,
|
|
462
|
+
left_df: str,
|
|
463
|
+
right_df: str,
|
|
464
|
+
left_on: list[str],
|
|
465
|
+
right_on: list[str],
|
|
466
|
+
) -> tuple[list[str], list[str], dict | None, list[str]]:
|
|
460
467
|
"""Route to appropriate join-specific key transformation handler.
|
|
461
468
|
|
|
462
469
|
Different join types require different strategies for handling join keys
|
|
@@ -476,7 +483,7 @@ class FlowGraphToPolarsConverter:
|
|
|
476
483
|
- reverse_action: Dictionary for renaming columns after join (or None)
|
|
477
484
|
- after_join_drop_cols: List of columns to drop after join
|
|
478
485
|
"""
|
|
479
|
-
join_type = settings.
|
|
486
|
+
join_type = settings.how
|
|
480
487
|
|
|
481
488
|
if join_type in ("left", "inner"):
|
|
482
489
|
return self._handle_left_inner_join_keys(settings, right_df, left_on, right_on)
|
|
@@ -487,9 +494,9 @@ class FlowGraphToPolarsConverter:
|
|
|
487
494
|
else:
|
|
488
495
|
return left_on, right_on, None, []
|
|
489
496
|
|
|
490
|
-
def _handle_left_inner_join_keys(
|
|
491
|
-
|
|
492
|
-
|
|
497
|
+
def _handle_left_inner_join_keys(
|
|
498
|
+
self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
|
|
499
|
+
) -> tuple[list[str], list[str], dict, list[str]]:
|
|
493
500
|
"""Handle key transformations for left and inner joins.
|
|
494
501
|
|
|
495
502
|
For left/inner joins:
|
|
@@ -510,31 +517,29 @@ class FlowGraphToPolarsConverter:
|
|
|
510
517
|
- reverse_action: Mapping to rename __DROP__ columns after join
|
|
511
518
|
- after_join_drop_cols: Left join keys marked for dropping
|
|
512
519
|
"""
|
|
513
|
-
left_join_keys_to_keep = [jk.new_name for jk in settings.
|
|
514
|
-
|
|
520
|
+
left_join_keys_to_keep = [jk.new_name for jk in settings.left_select.join_key_selects if jk.keep]
|
|
515
521
|
join_key_duplication_command = [
|
|
516
522
|
f'pl.col("{rjk.old_name}").alias("__DROP__{rjk.new_name}__DROP__")'
|
|
517
|
-
for rjk in settings.
|
|
523
|
+
for rjk in settings.right_select.join_key_selects
|
|
524
|
+
if rjk.keep
|
|
518
525
|
]
|
|
519
526
|
|
|
520
527
|
reverse_action = {
|
|
521
528
|
f"__DROP__{rjk.new_name}__DROP__": rjk.new_name
|
|
522
|
-
for rjk in settings.
|
|
529
|
+
for rjk in settings.right_select.join_key_selects
|
|
530
|
+
if rjk.keep
|
|
523
531
|
}
|
|
524
532
|
|
|
525
533
|
if join_key_duplication_command:
|
|
526
534
|
self._add_code(f"{right_df} = {right_df}.with_columns([{', '.join(join_key_duplication_command)}])")
|
|
527
535
|
|
|
528
|
-
after_join_drop_cols = [
|
|
529
|
-
k.new_name for k in settings.join_input.left_select.join_key_selects
|
|
530
|
-
if not k.keep
|
|
531
|
-
]
|
|
536
|
+
after_join_drop_cols = [k.new_name for k in settings.left_select.join_key_selects if not k.keep]
|
|
532
537
|
|
|
533
538
|
return left_on, right_on, reverse_action, after_join_drop_cols
|
|
534
539
|
|
|
535
|
-
def _handle_right_join_keys(
|
|
536
|
-
|
|
537
|
-
|
|
540
|
+
def _handle_right_join_keys(
|
|
541
|
+
self, settings: transform_schema.JoinInputManager, left_df: str, left_on: list[str], right_on: list[str]
|
|
542
|
+
) -> tuple[list[str], list[str], None, list[str]]:
|
|
538
543
|
"""Handle key transformations for right joins.
|
|
539
544
|
|
|
540
545
|
For right joins:
|
|
@@ -557,12 +562,13 @@ class FlowGraphToPolarsConverter:
|
|
|
557
562
|
"""
|
|
558
563
|
join_key_duplication_command = [
|
|
559
564
|
f'pl.col("{ljk.new_name}").alias("__jk_{ljk.new_name}")'
|
|
560
|
-
for ljk in settings.
|
|
565
|
+
for ljk in settings.left_select.join_key_selects
|
|
566
|
+
if ljk.keep
|
|
561
567
|
]
|
|
562
568
|
|
|
563
569
|
# Update left_on keys
|
|
564
570
|
for position, left_on_key in enumerate(left_on):
|
|
565
|
-
left_on_select = settings.
|
|
571
|
+
left_on_select = settings.left_select.get_select_input_on_new_name(left_on_key)
|
|
566
572
|
if left_on_select and left_on_select.keep:
|
|
567
573
|
left_on[position] = f"__jk_{left_on_select.new_name}"
|
|
568
574
|
|
|
@@ -570,18 +576,18 @@ class FlowGraphToPolarsConverter:
|
|
|
570
576
|
self._add_code(f"{left_df} = {left_df}.with_columns([{', '.join(join_key_duplication_command)}])")
|
|
571
577
|
|
|
572
578
|
# Calculate columns to drop after join
|
|
573
|
-
left_join_keys_keep = {jk.new_name for jk in settings.
|
|
579
|
+
left_join_keys_keep = {jk.new_name for jk in settings.left_select.join_key_selects if jk.keep}
|
|
574
580
|
after_join_drop_cols_right = [
|
|
575
581
|
jk.new_name if jk.new_name not in left_join_keys_keep else jk.new_name + "_right"
|
|
576
|
-
for jk in settings.
|
|
582
|
+
for jk in settings.right_select.join_key_selects
|
|
583
|
+
if not jk.keep
|
|
577
584
|
]
|
|
578
585
|
after_join_drop_cols = list(set(after_join_drop_cols_right))
|
|
579
|
-
|
|
580
586
|
return left_on, right_on, None, after_join_drop_cols
|
|
581
587
|
|
|
582
|
-
def _handle_outer_join_keys(
|
|
583
|
-
|
|
584
|
-
|
|
588
|
+
def _handle_outer_join_keys(
|
|
589
|
+
self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
|
|
590
|
+
) -> tuple[list[str], list[str], dict, list[str]]:
|
|
585
591
|
"""Handle key transformations for outer joins.
|
|
586
592
|
|
|
587
593
|
For outer joins:
|
|
@@ -602,21 +608,17 @@ class FlowGraphToPolarsConverter:
|
|
|
602
608
|
- reverse_action: Mapping to remove __jk_ prefix after join
|
|
603
609
|
- after_join_drop_cols: Combined list of columns to drop from both sides
|
|
604
610
|
"""
|
|
605
|
-
left_join_keys = {jk.new_name for jk in settings.
|
|
611
|
+
left_join_keys = {jk.new_name for jk in settings.left_select.join_key_selects}
|
|
606
612
|
|
|
607
613
|
join_keys_to_keep_and_rename = [
|
|
608
|
-
rjk for rjk in settings.
|
|
609
|
-
if rjk.keep and rjk.new_name in left_join_keys
|
|
614
|
+
rjk for rjk in settings.right_select.join_key_selects if rjk.keep and rjk.new_name in left_join_keys
|
|
610
615
|
]
|
|
611
616
|
|
|
612
|
-
join_key_rename_command = {
|
|
613
|
-
rjk.new_name: f"__jk_{rjk.new_name}"
|
|
614
|
-
for rjk in join_keys_to_keep_and_rename
|
|
615
|
-
}
|
|
617
|
+
join_key_rename_command = {rjk.new_name: f"__jk_{rjk.new_name}" for rjk in join_keys_to_keep_and_rename}
|
|
616
618
|
|
|
617
619
|
# Update right_on keys
|
|
618
620
|
for position, right_on_key in enumerate(right_on):
|
|
619
|
-
right_on_select = settings.
|
|
621
|
+
right_on_select = settings.right_select.get_select_input_on_new_name(right_on_key)
|
|
620
622
|
if right_on_select and right_on_select.keep and right_on_select.new_name in left_join_keys:
|
|
621
623
|
right_on[position] = f"__jk_{right_on_select.new_name}"
|
|
622
624
|
|
|
@@ -626,20 +628,27 @@ class FlowGraphToPolarsConverter:
|
|
|
626
628
|
reverse_action = {f"__jk_{rjk.new_name}": rjk.new_name for rjk in join_keys_to_keep_and_rename}
|
|
627
629
|
|
|
628
630
|
# Calculate columns to drop after join
|
|
629
|
-
after_join_drop_cols_left = [
|
|
630
|
-
jk.new_name for jk in settings.join_input.left_select.join_key_selects if not jk.keep
|
|
631
|
-
]
|
|
631
|
+
after_join_drop_cols_left = [jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep]
|
|
632
632
|
after_join_drop_cols_right = [
|
|
633
633
|
jk.new_name if jk.new_name not in left_join_keys else jk.new_name + "_right"
|
|
634
|
-
for jk in settings.
|
|
634
|
+
for jk in settings.right_select.join_key_selects
|
|
635
|
+
if not jk.keep
|
|
635
636
|
]
|
|
636
637
|
after_join_drop_cols = after_join_drop_cols_left + after_join_drop_cols_right
|
|
637
638
|
|
|
638
639
|
return left_on, right_on, reverse_action, after_join_drop_cols
|
|
639
640
|
|
|
640
|
-
def _execute_join_with_post_processing(
|
|
641
|
-
|
|
642
|
-
|
|
641
|
+
def _execute_join_with_post_processing(
|
|
642
|
+
self,
|
|
643
|
+
settings: input_schema.NodeJoin,
|
|
644
|
+
var_name: str,
|
|
645
|
+
left_df: str,
|
|
646
|
+
right_df: str,
|
|
647
|
+
left_on: list[str],
|
|
648
|
+
right_on: list[str],
|
|
649
|
+
after_join_drop_cols: list[str],
|
|
650
|
+
reverse_action: dict | None,
|
|
651
|
+
) -> None:
|
|
643
652
|
"""Execute the join operation and apply post-processing steps.
|
|
644
653
|
|
|
645
654
|
Generates the actual join code with any necessary post-processing:
|
|
@@ -670,7 +679,7 @@ class FlowGraphToPolarsConverter:
|
|
|
670
679
|
self._add_code(" )")
|
|
671
680
|
|
|
672
681
|
# Handle right join special case
|
|
673
|
-
if settings.join_input.how ==
|
|
682
|
+
if settings.join_input.how == "right":
|
|
674
683
|
self._add_code(".collect()") # Right join needs to be collected first cause of issue with rename
|
|
675
684
|
|
|
676
685
|
# Apply post-join transformations
|
|
@@ -681,21 +690,21 @@ class FlowGraphToPolarsConverter:
|
|
|
681
690
|
self._add_code(f".rename({reverse_action})")
|
|
682
691
|
|
|
683
692
|
# Convert back to lazy for right joins
|
|
684
|
-
if settings.join_input.how ==
|
|
685
|
-
self._add_code(
|
|
693
|
+
if settings.join_input.how == "right":
|
|
694
|
+
self._add_code(".lazy()")
|
|
686
695
|
|
|
687
696
|
self._add_code(")")
|
|
688
697
|
|
|
689
|
-
def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars:
|
|
698
|
+
def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: dict[str, str]) -> None:
|
|
690
699
|
"""Handle group by nodes."""
|
|
691
|
-
input_df = input_vars.get(
|
|
700
|
+
input_df = input_vars.get("main", "df")
|
|
692
701
|
|
|
693
702
|
# Separate groupby columns from aggregation columns
|
|
694
703
|
group_cols = []
|
|
695
704
|
agg_exprs = []
|
|
696
705
|
|
|
697
706
|
for agg_col in settings.groupby_input.agg_cols:
|
|
698
|
-
if agg_col.agg ==
|
|
707
|
+
if agg_col.agg == "groupby":
|
|
699
708
|
group_cols.append(agg_col.old_name)
|
|
700
709
|
else:
|
|
701
710
|
agg_func = self._get_agg_function(agg_col.agg)
|
|
@@ -708,9 +717,9 @@ class FlowGraphToPolarsConverter:
|
|
|
708
717
|
self._add_code("])")
|
|
709
718
|
self._add_code("")
|
|
710
719
|
|
|
711
|
-
def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars:
|
|
720
|
+
def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: dict[str, str]) -> None:
|
|
712
721
|
"""Handle formula/expression nodes."""
|
|
713
|
-
input_df = input_vars.get(
|
|
722
|
+
input_df = input_vars.get("main", "df")
|
|
714
723
|
self.imports.add("from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr")
|
|
715
724
|
|
|
716
725
|
# Convert SQL-like formula to Polars expression
|
|
@@ -718,11 +727,11 @@ class FlowGraphToPolarsConverter:
|
|
|
718
727
|
col_name = settings.function.field.name
|
|
719
728
|
self._add_code(f"{var_name} = {input_df}.with_columns([")
|
|
720
729
|
self._add_code(f'simple_function_to_expr({repr(formula)}).alias("{col_name}")')
|
|
721
|
-
if settings.function.field.data_type not in (None,
|
|
730
|
+
if settings.function.field.data_type not in (None, transform_schema.AUTO_DATA_TYPE):
|
|
722
731
|
output_type = convert_pl_type_to_string(cast_str_to_polars_type(settings.function.field.data_type))
|
|
723
732
|
if output_type[:3] != "pl.":
|
|
724
733
|
output_type = "pl." + output_type
|
|
725
|
-
self._add_code(f
|
|
734
|
+
self._add_code(f" .cast({output_type})")
|
|
726
735
|
|
|
727
736
|
self._add_code("])")
|
|
728
737
|
self._add_code("")
|
|
@@ -730,11 +739,11 @@ class FlowGraphToPolarsConverter:
|
|
|
730
739
|
def _handle_pivot_no_index(self, settings: input_schema.NodePivot, var_name: str, input_df: str, agg_func: str):
|
|
731
740
|
pivot_input = settings.pivot_input
|
|
732
741
|
|
|
733
|
-
self._add_code(f
|
|
742
|
+
self._add_code(f"{var_name} = ({input_df}.collect()")
|
|
734
743
|
self._add_code(' .with_columns(pl.lit(1).alias("__temp_index__"))')
|
|
735
|
-
self._add_code(
|
|
744
|
+
self._add_code(" .pivot(")
|
|
736
745
|
self._add_code(f' values="{pivot_input.value_col}",')
|
|
737
|
-
self._add_code(
|
|
746
|
+
self._add_code(' index=["__temp_index__"],')
|
|
738
747
|
self._add_code(f' columns="{pivot_input.pivot_column}",')
|
|
739
748
|
self._add_code(f' aggregate_function="{agg_func}"')
|
|
740
749
|
self._add_code(" )")
|
|
@@ -742,17 +751,16 @@ class FlowGraphToPolarsConverter:
|
|
|
742
751
|
self._add_code(").lazy()")
|
|
743
752
|
self._add_code("")
|
|
744
753
|
|
|
745
|
-
def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars:
|
|
754
|
+
def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: dict[str, str]) -> None:
|
|
746
755
|
"""Handle pivot nodes."""
|
|
747
|
-
input_df = input_vars.get(
|
|
756
|
+
input_df = input_vars.get("main", "df")
|
|
748
757
|
pivot_input = settings.pivot_input
|
|
749
758
|
if len(pivot_input.aggregations) > 1:
|
|
750
|
-
logger.error("Multiple aggregations are not convertable to polars code. "
|
|
751
|
-
"Taking the first value")
|
|
759
|
+
logger.error("Multiple aggregations are not convertable to polars code. " "Taking the first value")
|
|
752
760
|
if len(pivot_input.aggregations) > 0:
|
|
753
761
|
agg_func = pivot_input.aggregations[0]
|
|
754
762
|
else:
|
|
755
|
-
agg_func =
|
|
763
|
+
agg_func = "first"
|
|
756
764
|
if len(settings.pivot_input.index_columns) == 0:
|
|
757
765
|
self._handle_pivot_no_index(settings, var_name, input_df, agg_func)
|
|
758
766
|
else:
|
|
@@ -766,9 +774,9 @@ class FlowGraphToPolarsConverter:
|
|
|
766
774
|
self._add_code(").lazy()")
|
|
767
775
|
self._add_code("")
|
|
768
776
|
|
|
769
|
-
def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars:
|
|
777
|
+
def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: dict[str, str]) -> None:
|
|
770
778
|
"""Handle unpivot nodes."""
|
|
771
|
-
input_df = input_vars.get(
|
|
779
|
+
input_df = input_vars.get("main", "df")
|
|
772
780
|
unpivot_input = settings.unpivot_input
|
|
773
781
|
|
|
774
782
|
self._add_code(f"{var_name} = {input_df}.unpivot(")
|
|
@@ -784,22 +792,22 @@ class FlowGraphToPolarsConverter:
|
|
|
784
792
|
self._add_code(")")
|
|
785
793
|
self._add_code("")
|
|
786
794
|
|
|
787
|
-
def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars:
|
|
795
|
+
def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: dict[str, str]) -> None:
|
|
788
796
|
"""Handle union nodes."""
|
|
789
797
|
# Get all input LazyFrame
|
|
790
798
|
dfs = []
|
|
791
|
-
if
|
|
792
|
-
dfs.append(input_vars[
|
|
799
|
+
if "main" in input_vars:
|
|
800
|
+
dfs.append(input_vars["main"])
|
|
793
801
|
else:
|
|
794
802
|
# Multiple main inputs
|
|
795
803
|
for key, df_var in input_vars.items():
|
|
796
|
-
if key.startswith(
|
|
804
|
+
if key.startswith("main"):
|
|
797
805
|
dfs.append(df_var)
|
|
798
806
|
|
|
799
|
-
if settings.union_input.mode ==
|
|
800
|
-
how =
|
|
807
|
+
if settings.union_input.mode == "relaxed":
|
|
808
|
+
how = "diagonal_relaxed"
|
|
801
809
|
else:
|
|
802
|
-
how =
|
|
810
|
+
how = "diagonal"
|
|
803
811
|
|
|
804
812
|
self._add_code(f"{var_name} = pl.concat([")
|
|
805
813
|
for df in dfs:
|
|
@@ -807,73 +815,88 @@ class FlowGraphToPolarsConverter:
|
|
|
807
815
|
self._add_code(f"], how='{how}')")
|
|
808
816
|
self._add_code("")
|
|
809
817
|
|
|
810
|
-
def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars:
|
|
818
|
+
def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: dict[str, str]) -> None:
|
|
811
819
|
"""Handle sort nodes."""
|
|
812
|
-
input_df = input_vars.get(
|
|
820
|
+
input_df = input_vars.get("main", "df")
|
|
813
821
|
|
|
814
822
|
sort_cols = []
|
|
815
823
|
descending = []
|
|
816
824
|
|
|
817
825
|
for sort_input in settings.sort_input:
|
|
818
826
|
sort_cols.append(f'"{sort_input.column}"')
|
|
819
|
-
descending.append(sort_input.how ==
|
|
827
|
+
descending.append(sort_input.how == "desc")
|
|
820
828
|
|
|
821
829
|
self._add_code(f"{var_name} = {input_df}.sort([{', '.join(sort_cols)}], descending={descending})")
|
|
822
830
|
self._add_code("")
|
|
823
831
|
|
|
824
|
-
def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars:
|
|
832
|
+
def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: dict[str, str]) -> None:
|
|
825
833
|
"""Handle sample nodes."""
|
|
826
|
-
input_df = input_vars.get(
|
|
834
|
+
input_df = input_vars.get("main", "df")
|
|
827
835
|
self._add_code(f"{var_name} = {input_df}.head(n={settings.sample_size})")
|
|
828
836
|
self._add_code("")
|
|
829
837
|
|
|
830
838
|
@staticmethod
|
|
831
|
-
def _transform_fuzzy_mappings_to_string(fuzzy_mappings:
|
|
839
|
+
def _transform_fuzzy_mappings_to_string(fuzzy_mappings: list[FuzzyMapping]) -> str:
|
|
832
840
|
output_str = "["
|
|
833
841
|
for i, fuzzy_mapping in enumerate(fuzzy_mappings):
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
842
|
+
output_str += (
|
|
843
|
+
f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
|
|
844
|
+
f" right_col='{fuzzy_mapping.right_col}', "
|
|
845
|
+
f"threshold_score={fuzzy_mapping.threshold_score}, "
|
|
846
|
+
f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')"
|
|
847
|
+
)
|
|
839
848
|
if i < len(fuzzy_mappings) - 1:
|
|
840
849
|
output_str += ",\n"
|
|
841
850
|
output_str += "]"
|
|
842
851
|
return output_str
|
|
843
852
|
|
|
844
|
-
def _handle_fuzzy_match(
|
|
853
|
+
def _handle_fuzzy_match(
|
|
854
|
+
self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: dict[str, str]
|
|
855
|
+
) -> None:
|
|
845
856
|
"""Handle fuzzy match nodes."""
|
|
846
857
|
self.imports.add("from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs")
|
|
847
|
-
|
|
848
|
-
|
|
858
|
+
fuzzy_match_handler = transform_schema.FuzzyMatchInputManager(settings.join_input)
|
|
859
|
+
left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
|
|
860
|
+
right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
|
|
861
|
+
|
|
849
862
|
if left_df == right_df:
|
|
850
863
|
right_df = "df_right"
|
|
851
864
|
self._add_code(f"{right_df} = {left_df}")
|
|
852
865
|
|
|
853
|
-
if
|
|
854
|
-
self._add_code(
|
|
855
|
-
|
|
856
|
-
|
|
866
|
+
if fuzzy_match_handler.left_select.has_drop_cols():
|
|
867
|
+
self._add_code(
|
|
868
|
+
f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})"
|
|
869
|
+
)
|
|
870
|
+
if fuzzy_match_handler.right_select.has_drop_cols():
|
|
871
|
+
self._add_code(
|
|
872
|
+
f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})"
|
|
873
|
+
)
|
|
857
874
|
|
|
858
|
-
fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(
|
|
859
|
-
self._add_code(
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
875
|
+
fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(fuzzy_match_handler.join_mapping)
|
|
876
|
+
self._add_code(
|
|
877
|
+
f"{var_name} = fuzzy_match_dfs(\n"
|
|
878
|
+
f" left_df={left_df}, right_df={right_df},\n"
|
|
879
|
+
f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
|
|
880
|
+
f" ).lazy()"
|
|
881
|
+
)
|
|
863
882
|
|
|
864
|
-
def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars:
|
|
883
|
+
def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: dict[str, str]) -> None:
|
|
865
884
|
"""Handle unique/distinct nodes."""
|
|
866
|
-
input_df = input_vars.get(
|
|
885
|
+
input_df = input_vars.get("main", "df")
|
|
867
886
|
|
|
868
887
|
if settings.unique_input.columns:
|
|
869
|
-
self._add_code(
|
|
888
|
+
self._add_code(
|
|
889
|
+
f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')"
|
|
890
|
+
)
|
|
870
891
|
else:
|
|
871
892
|
self._add_code(f"{var_name} = {input_df}.unique(keep='{settings.unique_input.strategy}')")
|
|
872
893
|
self._add_code("")
|
|
873
894
|
|
|
874
|
-
def _handle_text_to_rows(
|
|
895
|
+
def _handle_text_to_rows(
|
|
896
|
+
self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: dict[str, str]
|
|
897
|
+
) -> None:
|
|
875
898
|
"""Handle text to rows (explode) nodes."""
|
|
876
|
-
input_df = input_vars.get(
|
|
899
|
+
input_df = input_vars.get("main", "df")
|
|
877
900
|
text_input = settings.text_to_rows_input
|
|
878
901
|
|
|
879
902
|
# First split the column
|
|
@@ -886,96 +909,108 @@ class FlowGraphToPolarsConverter:
|
|
|
886
909
|
|
|
887
910
|
self._add_code(f"{var_name} = {input_df}.with_columns({split_expr}).explode('{explode_col}')")
|
|
888
911
|
self._add_code("")
|
|
912
|
+
|
|
889
913
|
# .with_columns(
|
|
890
914
|
# (pl.cum_count(record_id_settings.output_column_name)
|
|
891
915
|
# .over(record_id_settings.group_by_columns) + record_id_settings.offset - 1)
|
|
892
916
|
# .alias(record_id_settings.output_column_name)
|
|
893
917
|
# )
|
|
894
|
-
def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars:
|
|
918
|
+
def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: dict[str, str]) -> None:
|
|
895
919
|
"""Handle record ID nodes."""
|
|
896
|
-
input_df = input_vars.get(
|
|
920
|
+
input_df = input_vars.get("main", "df")
|
|
897
921
|
record_input = settings.record_id_input
|
|
898
922
|
if record_input.group_by and record_input.group_by_columns:
|
|
899
|
-
|
|
900
923
|
# Row number within groups
|
|
901
924
|
self._add_code(f"{var_name} = ({input_df}")
|
|
902
925
|
self._add_code(f" .with_columns(pl.lit(1).alias('{record_input.output_column_name}'))")
|
|
903
|
-
self._add_code(
|
|
904
|
-
self._add_code(
|
|
926
|
+
self._add_code(" .with_columns([")
|
|
927
|
+
self._add_code(
|
|
928
|
+
f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)"
|
|
929
|
+
)
|
|
905
930
|
self._add_code(f" .alias('{record_input.output_column_name}')")
|
|
906
931
|
self._add_code("])")
|
|
907
|
-
self._add_code(
|
|
932
|
+
self._add_code(
|
|
933
|
+
f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])"
|
|
934
|
+
)
|
|
908
935
|
self._add_code(")")
|
|
909
936
|
else:
|
|
910
937
|
# Simple row number
|
|
911
|
-
self._add_code(
|
|
938
|
+
self._add_code(
|
|
939
|
+
f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})"
|
|
940
|
+
)
|
|
912
941
|
self._add_code("")
|
|
913
942
|
|
|
914
|
-
def _handle_cross_join(
|
|
943
|
+
def _handle_cross_join(
|
|
944
|
+
self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: dict[str, str]
|
|
945
|
+
) -> None:
|
|
915
946
|
"""Handle cross join nodes."""
|
|
916
|
-
left_df = input_vars.get(
|
|
917
|
-
right_df = input_vars.get(
|
|
947
|
+
left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
|
|
948
|
+
right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
|
|
918
949
|
|
|
919
950
|
self._add_code(f"{var_name} = {left_df}.join({right_df}, how='cross')")
|
|
920
951
|
self._add_code("")
|
|
921
952
|
|
|
922
|
-
def _handle_cloud_storage_writer(
|
|
953
|
+
def _handle_cloud_storage_writer(
|
|
954
|
+
self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: dict[str, str]
|
|
955
|
+
) -> None:
|
|
923
956
|
"""Handle cloud storage writer nodes."""
|
|
924
|
-
input_df = input_vars.get(
|
|
957
|
+
input_df = input_vars.get("main", "df")
|
|
925
958
|
# def write_csv_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: Optional[str] = None) -> 'FlowFrame': ...
|
|
926
959
|
|
|
927
960
|
output_settings = settings.cloud_storage_settings
|
|
928
961
|
self.imports.add("import flowfile as ff")
|
|
929
962
|
self._add_code(f"(ff.FlowFrame({input_df})")
|
|
930
963
|
if output_settings.file_format == "csv":
|
|
931
|
-
self._add_code(
|
|
964
|
+
self._add_code(" .write_csv_to_cloud_storage(")
|
|
932
965
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
933
966
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
934
967
|
self._add_code(f' delimiter="{output_settings.csv_delimiter}",')
|
|
935
968
|
self._add_code(f' encoding="{output_settings.csv_encoding}",')
|
|
936
969
|
self._add_code(f' description="{settings.description}"')
|
|
937
970
|
elif output_settings.file_format == "parquet":
|
|
938
|
-
self._add_code(
|
|
971
|
+
self._add_code(" .write_parquet_to_cloud_storage(")
|
|
939
972
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
940
973
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
941
974
|
self._add_code(f' description="{settings.description}"')
|
|
942
975
|
elif output_settings.file_format == "json":
|
|
943
|
-
self._add_code(
|
|
976
|
+
self._add_code(" .write_json_to_cloud_storage(")
|
|
944
977
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
945
978
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
946
979
|
self._add_code(f' description="{settings.description}"')
|
|
947
980
|
elif output_settings.file_format == "delta":
|
|
948
|
-
self._add_code(
|
|
981
|
+
self._add_code(" .write_delta(")
|
|
949
982
|
self._add_code(f' path="{output_settings.resource_path}",')
|
|
950
983
|
self._add_code(f' write_mode="{output_settings.write_mode}",')
|
|
951
984
|
self._add_code(f' connection_name="{output_settings.connection_name}",')
|
|
952
985
|
self._add_code(f' description="{settings.description}"')
|
|
953
|
-
self._add_code(
|
|
954
|
-
self._add_code(
|
|
986
|
+
self._add_code(" )")
|
|
987
|
+
self._add_code(")")
|
|
955
988
|
|
|
956
|
-
def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars:
|
|
989
|
+
def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: dict[str, str]) -> None:
|
|
957
990
|
"""Handle output nodes."""
|
|
958
|
-
input_df = input_vars.get(
|
|
991
|
+
input_df = input_vars.get("main", "df")
|
|
959
992
|
output_settings = settings.output_settings
|
|
960
993
|
|
|
961
|
-
if output_settings.file_type ==
|
|
962
|
-
self._add_code(f
|
|
994
|
+
if output_settings.file_type == "csv":
|
|
995
|
+
self._add_code(f"{input_df}.sink_csv(")
|
|
963
996
|
self._add_code(f' "{output_settings.abs_file_path}",')
|
|
964
|
-
self._add_code(f' separator="{output_settings.
|
|
965
|
-
self._add_code(
|
|
997
|
+
self._add_code(f' separator="{output_settings.table_settings.delimiter}"')
|
|
998
|
+
self._add_code(")")
|
|
966
999
|
|
|
967
|
-
elif output_settings.file_type ==
|
|
1000
|
+
elif output_settings.file_type == "parquet":
|
|
968
1001
|
self._add_code(f'{input_df}.sink_parquet("{output_settings.abs_file_path}")')
|
|
969
1002
|
|
|
970
|
-
elif output_settings.file_type ==
|
|
971
|
-
self._add_code(f
|
|
1003
|
+
elif output_settings.file_type == "excel":
|
|
1004
|
+
self._add_code(f"{input_df}.collect().write_excel(")
|
|
972
1005
|
self._add_code(f' "{output_settings.abs_file_path}",')
|
|
973
|
-
self._add_code(f' worksheet="{output_settings.
|
|
974
|
-
self._add_code(
|
|
1006
|
+
self._add_code(f' worksheet="{output_settings.table_settings.sheet_name}"')
|
|
1007
|
+
self._add_code(")")
|
|
975
1008
|
|
|
976
1009
|
self._add_code("")
|
|
977
1010
|
|
|
978
|
-
def _handle_polars_code(
|
|
1011
|
+
def _handle_polars_code(
|
|
1012
|
+
self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: dict[str, str]
|
|
1013
|
+
) -> None:
|
|
979
1014
|
"""Handle custom Polars code nodes."""
|
|
980
1015
|
code = settings.polars_code_input.polars_code.strip()
|
|
981
1016
|
# Determine function parameters based on number of inputs
|
|
@@ -992,7 +1027,7 @@ class FlowGraphToPolarsConverter:
|
|
|
992
1027
|
arg_list = []
|
|
993
1028
|
i = 1
|
|
994
1029
|
for key in sorted(input_vars.keys()):
|
|
995
|
-
if key.startswith(
|
|
1030
|
+
if key.startswith("main"):
|
|
996
1031
|
param_list.append(f"input_df_{i}: pl.LazyFrame")
|
|
997
1032
|
arg_list.append(input_vars[key])
|
|
998
1033
|
i += 1
|
|
@@ -1003,7 +1038,7 @@ class FlowGraphToPolarsConverter:
|
|
|
1003
1038
|
is_expression = "output_df" not in code
|
|
1004
1039
|
|
|
1005
1040
|
# Wrap the code in a function
|
|
1006
|
-
self._add_code(
|
|
1041
|
+
self._add_code("# Custom Polars code")
|
|
1007
1042
|
self._add_code(f"def _polars_code_{var_name.replace('df_', '')}({params}):")
|
|
1008
1043
|
|
|
1009
1044
|
# Handle the code based on its structure
|
|
@@ -1012,18 +1047,18 @@ class FlowGraphToPolarsConverter:
|
|
|
1012
1047
|
self._add_code(f" return {code}")
|
|
1013
1048
|
else:
|
|
1014
1049
|
# It contains assignments
|
|
1015
|
-
for line in code.split(
|
|
1050
|
+
for line in code.split("\n"):
|
|
1016
1051
|
if line.strip():
|
|
1017
1052
|
self._add_code(f" {line}")
|
|
1018
1053
|
|
|
1019
1054
|
# If no explicit return, try to detect what to return
|
|
1020
|
-
if
|
|
1055
|
+
if "return" not in code:
|
|
1021
1056
|
# Try to find the last assignment
|
|
1022
|
-
lines = [l.strip() for l in code.split(
|
|
1057
|
+
lines = [l.strip() for l in code.split("\n") if l.strip() and "=" in l]
|
|
1023
1058
|
if lines:
|
|
1024
1059
|
last_assignment = lines[-1]
|
|
1025
|
-
if
|
|
1026
|
-
output_var = last_assignment.split(
|
|
1060
|
+
if "=" in last_assignment:
|
|
1061
|
+
output_var = last_assignment.split("=")[0].strip()
|
|
1027
1062
|
self._add_code(f" return {output_var}")
|
|
1028
1063
|
|
|
1029
1064
|
self._add_code("")
|
|
@@ -1056,14 +1091,7 @@ class FlowGraphToPolarsConverter:
|
|
|
1056
1091
|
col, op, val = match.groups()
|
|
1057
1092
|
|
|
1058
1093
|
# Map operators
|
|
1059
|
-
op_map = {
|
|
1060
|
-
'=': '==',
|
|
1061
|
-
'!=': '!=',
|
|
1062
|
-
'>': '>',
|
|
1063
|
-
'<': '<',
|
|
1064
|
-
'>=': '>=',
|
|
1065
|
-
'<=': '<='
|
|
1066
|
-
}
|
|
1094
|
+
op_map = {"=": "==", "!=": "!=", ">": ">", "<": "<", ">=": ">=", "<=": "<="}
|
|
1067
1095
|
|
|
1068
1096
|
polars_op = op_map.get(op, op)
|
|
1069
1097
|
|
|
@@ -1077,45 +1105,129 @@ class FlowGraphToPolarsConverter:
|
|
|
1077
1105
|
return re.sub(pattern, replace_expr, expr)
|
|
1078
1106
|
|
|
1079
1107
|
def _create_basic_filter_expr(self, basic: transform_schema.BasicFilter) -> str:
|
|
1080
|
-
"""Create Polars expression from basic filter.
|
|
1108
|
+
"""Create Polars expression from basic filter.
|
|
1109
|
+
|
|
1110
|
+
Generates proper Polars code for all supported filter operators.
|
|
1111
|
+
|
|
1112
|
+
Args:
|
|
1113
|
+
basic: The BasicFilter configuration.
|
|
1114
|
+
|
|
1115
|
+
Returns:
|
|
1116
|
+
A string containing valid Polars filter expression code.
|
|
1117
|
+
"""
|
|
1118
|
+
from flowfile_core.schemas.transform_schema import FilterOperator
|
|
1119
|
+
|
|
1081
1120
|
col = f'pl.col("{basic.field}")'
|
|
1121
|
+
value = basic.value
|
|
1122
|
+
value2 = basic.value2
|
|
1123
|
+
|
|
1124
|
+
# Determine if value is numeric (for proper quoting)
|
|
1125
|
+
is_numeric = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
|
|
1126
|
+
|
|
1127
|
+
# Get the operator
|
|
1128
|
+
try:
|
|
1129
|
+
operator = basic.get_operator()
|
|
1130
|
+
except (ValueError, AttributeError):
|
|
1131
|
+
operator = FilterOperator.from_symbol(str(basic.operator))
|
|
1132
|
+
|
|
1133
|
+
# Generate expression based on operator
|
|
1134
|
+
if operator == FilterOperator.EQUALS:
|
|
1135
|
+
if is_numeric:
|
|
1136
|
+
return f"{col} == {value}"
|
|
1137
|
+
return f'{col} == "{value}"'
|
|
1138
|
+
|
|
1139
|
+
elif operator == FilterOperator.NOT_EQUALS:
|
|
1140
|
+
if is_numeric:
|
|
1141
|
+
return f"{col} != {value}"
|
|
1142
|
+
return f'{col} != "{value}"'
|
|
1143
|
+
|
|
1144
|
+
elif operator == FilterOperator.GREATER_THAN:
|
|
1145
|
+
if is_numeric:
|
|
1146
|
+
return f"{col} > {value}"
|
|
1147
|
+
return f'{col} > "{value}"'
|
|
1148
|
+
|
|
1149
|
+
elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
|
|
1150
|
+
if is_numeric:
|
|
1151
|
+
return f"{col} >= {value}"
|
|
1152
|
+
return f'{col} >= "{value}"'
|
|
1153
|
+
|
|
1154
|
+
elif operator == FilterOperator.LESS_THAN:
|
|
1155
|
+
if is_numeric:
|
|
1156
|
+
return f"{col} < {value}"
|
|
1157
|
+
return f'{col} < "{value}"'
|
|
1158
|
+
|
|
1159
|
+
elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
|
|
1160
|
+
if is_numeric:
|
|
1161
|
+
return f"{col} <= {value}"
|
|
1162
|
+
return f'{col} <= "{value}"'
|
|
1163
|
+
|
|
1164
|
+
elif operator == FilterOperator.CONTAINS:
|
|
1165
|
+
return f'{col}.str.contains("{value}")'
|
|
1166
|
+
|
|
1167
|
+
elif operator == FilterOperator.NOT_CONTAINS:
|
|
1168
|
+
return f'{col}.str.contains("{value}").not_()'
|
|
1169
|
+
|
|
1170
|
+
elif operator == FilterOperator.STARTS_WITH:
|
|
1171
|
+
return f'{col}.str.starts_with("{value}")'
|
|
1172
|
+
|
|
1173
|
+
elif operator == FilterOperator.ENDS_WITH:
|
|
1174
|
+
return f'{col}.str.ends_with("{value}")'
|
|
1175
|
+
|
|
1176
|
+
elif operator == FilterOperator.IS_NULL:
|
|
1177
|
+
return f"{col}.is_null()"
|
|
1178
|
+
|
|
1179
|
+
elif operator == FilterOperator.IS_NOT_NULL:
|
|
1180
|
+
return f"{col}.is_not_null()"
|
|
1181
|
+
|
|
1182
|
+
elif operator == FilterOperator.IN:
|
|
1183
|
+
values = [v.strip() for v in value.split(",")]
|
|
1184
|
+
if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
|
|
1185
|
+
values_str = ", ".join(values)
|
|
1186
|
+
else:
|
|
1187
|
+
values_str = ", ".join(f'"{v}"' for v in values)
|
|
1188
|
+
return f"{col}.is_in([{values_str}])"
|
|
1189
|
+
|
|
1190
|
+
elif operator == FilterOperator.NOT_IN:
|
|
1191
|
+
values = [v.strip() for v in value.split(",")]
|
|
1192
|
+
if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
|
|
1193
|
+
values_str = ", ".join(values)
|
|
1194
|
+
else:
|
|
1195
|
+
values_str = ", ".join(f'"{v}"' for v in values)
|
|
1196
|
+
return f"{col}.is_in([{values_str}]).not_()"
|
|
1197
|
+
|
|
1198
|
+
elif operator == FilterOperator.BETWEEN:
|
|
1199
|
+
if value2 is None:
|
|
1200
|
+
return f"{col} # BETWEEN requires two values"
|
|
1201
|
+
if is_numeric and value2.replace(".", "", 1).replace("-", "", 1).isnumeric():
|
|
1202
|
+
return f"({col} >= {value}) & ({col} <= {value2})"
|
|
1203
|
+
return f'({col} >= "{value}") & ({col} <= "{value2}")'
|
|
1082
1204
|
|
|
1083
|
-
|
|
1084
|
-
return f'{col} == "{basic.filter_value}"'
|
|
1085
|
-
elif basic.filter_type == 'not_equals':
|
|
1086
|
-
return f'{col} != "{basic.filter_value}"'
|
|
1087
|
-
elif basic.filter_type == 'greater':
|
|
1088
|
-
return f'{col} > {basic.filter_value}'
|
|
1089
|
-
elif basic.filter_type == 'less':
|
|
1090
|
-
return f'{col} < {basic.filter_value}'
|
|
1091
|
-
elif basic.filter_type == 'in':
|
|
1092
|
-
values = basic.filter_value.split(',')
|
|
1093
|
-
return f"pl.col('{col}').is_in({values})"
|
|
1205
|
+
# Fallback
|
|
1094
1206
|
return col
|
|
1095
1207
|
|
|
1096
1208
|
def _get_polars_dtype(self, dtype_str: str) -> str:
|
|
1097
1209
|
"""Convert Flowfile dtype string to Polars dtype."""
|
|
1098
1210
|
dtype_map = {
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1211
|
+
"String": "pl.Utf8",
|
|
1212
|
+
"Integer": "pl.Int64",
|
|
1213
|
+
"Double": "pl.Float64",
|
|
1214
|
+
"Boolean": "pl.Boolean",
|
|
1215
|
+
"Date": "pl.Date",
|
|
1216
|
+
"Datetime": "pl.Datetime",
|
|
1217
|
+
"Float32": "pl.Float32",
|
|
1218
|
+
"Float64": "pl.Float64",
|
|
1219
|
+
"Int32": "pl.Int32",
|
|
1220
|
+
"Int64": "pl.Int64",
|
|
1221
|
+
"Utf8": "pl.Utf8",
|
|
1110
1222
|
}
|
|
1111
|
-
return dtype_map.get(dtype_str,
|
|
1223
|
+
return dtype_map.get(dtype_str, "pl.Utf8")
|
|
1112
1224
|
|
|
1113
1225
|
def _get_agg_function(self, agg: str) -> str:
|
|
1114
1226
|
"""Get Polars aggregation function name."""
|
|
1115
1227
|
agg_map = {
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1228
|
+
"avg": "mean",
|
|
1229
|
+
"average": "mean",
|
|
1230
|
+
"concat": "str.concat",
|
|
1119
1231
|
}
|
|
1120
1232
|
return agg_map.get(agg, agg)
|
|
1121
1233
|
|
|
@@ -1128,12 +1240,12 @@ class FlowGraphToPolarsConverter:
|
|
|
1128
1240
|
import re
|
|
1129
1241
|
|
|
1130
1242
|
# Pattern for column names (simplified)
|
|
1131
|
-
col_pattern = r
|
|
1243
|
+
col_pattern = r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b"
|
|
1132
1244
|
|
|
1133
1245
|
def replace_col(match):
|
|
1134
1246
|
col_name = match.group(1)
|
|
1135
1247
|
# Skip SQL keywords
|
|
1136
|
-
keywords = {
|
|
1248
|
+
keywords = {"CASE", "WHEN", "THEN", "ELSE", "END", "AND", "OR", "NOT", "IN", "AS"}
|
|
1137
1249
|
if col_name.upper() in keywords:
|
|
1138
1250
|
return col_name
|
|
1139
1251
|
return f'pl.col("{col_name}")'
|
|
@@ -1141,13 +1253,13 @@ class FlowGraphToPolarsConverter:
|
|
|
1141
1253
|
result = re.sub(col_pattern, replace_col, sql_expr)
|
|
1142
1254
|
|
|
1143
1255
|
# Handle CASE WHEN
|
|
1144
|
-
if
|
|
1256
|
+
if "CASE" in result:
|
|
1145
1257
|
# This would need proper parsing
|
|
1146
1258
|
result = "pl.when(...).then(...).otherwise(...)"
|
|
1147
1259
|
|
|
1148
1260
|
return result
|
|
1149
1261
|
|
|
1150
|
-
def add_return_code(self, lines:
|
|
1262
|
+
def add_return_code(self, lines: list[str]) -> None:
|
|
1151
1263
|
if self.output_nodes:
|
|
1152
1264
|
# Return marked output nodes
|
|
1153
1265
|
if len(self.output_nodes) == 1:
|
|
@@ -1177,8 +1289,8 @@ class FlowGraphToPolarsConverter:
|
|
|
1177
1289
|
# Add main function
|
|
1178
1290
|
lines.append("def run_etl_pipeline():")
|
|
1179
1291
|
lines.append(' """')
|
|
1180
|
-
lines.append(f
|
|
1181
|
-
lines.append(
|
|
1292
|
+
lines.append(f" ETL Pipeline: {self.flow_graph.__name__}")
|
|
1293
|
+
lines.append(" Generated from Flowfile")
|
|
1182
1294
|
lines.append(' """')
|
|
1183
1295
|
lines.append(" ")
|
|
1184
1296
|
|