Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +179 -73
- flowfile/__main__.py +10 -7
- flowfile/api.py +52 -59
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
- flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
- flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
- flowfile-0.5.3.dist-info/RECORD +402 -0
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +13 -3
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +391 -279
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
- flowfile_core/flowfile/flow_graph.py +1011 -561
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +332 -232
- flowfile_core/flowfile/flow_node/models.py +54 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +82 -32
- flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
- flowfile_core/flowfile/manage/io_flowfile.py +391 -0
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +136 -35
- flowfile_core/flowfile/schema_callbacks.py +77 -54
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +72 -55
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +77 -43
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -55
- flowfile_core/schemas/input_schema.py +398 -154
- flowfile_core/schemas/output_model.py +50 -35
- flowfile_core/schemas/schemas.py +207 -67
- flowfile_core/schemas/transform_schema.py +1360 -435
- flowfile_core/schemas/yaml_types.py +117 -0
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +107 -50
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +581 -489
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +236 -252
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -4
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -27
- flowfile_worker/create/funcs.py +143 -94
- flowfile_worker/create/models.py +139 -68
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -93
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +118 -0
- tools/migrate/legacy_schemas.py +682 -0
- tools/migrate/migrate.py +610 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +21 -0
- tools/migrate/tests/test_migrate.py +622 -0
- tools/migrate/tests/test_migration_e2e.py +1009 -0
- tools/migrate/tests/test_node_migrations.py +843 -0
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-812dcbca.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-538058f3.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.4.1.dist-info/RECORD +0 -376
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
flowfile_frame/flow_frame.py
CHANGED
|
@@ -1,34 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import inspect
|
|
2
4
|
import os
|
|
3
|
-
from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable, get_args, get_origin
|
|
4
|
-
|
|
5
5
|
import re
|
|
6
|
+
from collections.abc import Iterable, Iterator, Mapping
|
|
7
|
+
from typing import Any, Literal, Optional, Union, get_args, get_origin
|
|
6
8
|
|
|
7
9
|
import polars as pl
|
|
8
|
-
from
|
|
9
|
-
|
|
10
|
-
from polars._typing import (CsvEncoding, FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
|
|
11
|
-
from collections.abc import Iterator
|
|
12
|
-
|
|
13
|
-
from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs
|
|
10
|
+
from pl_fuzzy_frame_match import FuzzyMapping
|
|
11
|
+
from polars._typing import CsvEncoding, FrameInitTypes, Orientation, SchemaDefinition, SchemaDict
|
|
14
12
|
|
|
13
|
+
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
15
14
|
from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
|
|
16
15
|
from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
|
|
17
|
-
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
18
16
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
19
17
|
from flowfile_core.schemas import input_schema, transform_schema
|
|
20
|
-
|
|
21
|
-
from flowfile_frame.expr import Expr, Column, lit, col
|
|
22
|
-
from flowfile_frame.selectors import Selector
|
|
23
|
-
from flowfile_frame.group_frame import GroupByFrame
|
|
24
|
-
from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
|
|
25
|
-
ensure_inputs_as_iterable, generate_node_id, data as node_id_data)
|
|
26
|
-
from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
|
|
27
|
-
from flowfile_frame.utils import _check_if_convertible_to_code
|
|
28
|
-
from flowfile_frame.config import logger
|
|
29
18
|
from flowfile_frame.cloud_storage.frame_helpers import add_write_ff_to_cloud_storage
|
|
30
|
-
from
|
|
31
|
-
|
|
19
|
+
from flowfile_frame.config import logger
|
|
20
|
+
from flowfile_frame.expr import Column, Expr, col, lit
|
|
21
|
+
from flowfile_frame.group_frame import GroupByFrame
|
|
22
|
+
from flowfile_frame.join import _create_join_mappings, _normalize_columns_to_list
|
|
23
|
+
from flowfile_frame.lazy_methods import add_lazyframe_methods
|
|
24
|
+
from flowfile_frame.selectors import Selector
|
|
25
|
+
from flowfile_frame.utils import (
|
|
26
|
+
_check_if_convertible_to_code,
|
|
27
|
+
_parse_inputs_as_iterable,
|
|
28
|
+
create_flow_graph,
|
|
29
|
+
ensure_inputs_as_iterable,
|
|
30
|
+
generate_node_id,
|
|
31
|
+
stringify_values,
|
|
32
|
+
)
|
|
33
|
+
from flowfile_frame.utils import data as node_id_data
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
def can_be_expr(param: inspect.Parameter) -> bool:
|
|
@@ -83,7 +85,7 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
|
|
|
83
85
|
raw_definitions = []
|
|
84
86
|
|
|
85
87
|
# Add function sources if any
|
|
86
|
-
if hasattr(expr_obj,
|
|
88
|
+
if hasattr(expr_obj, "_function_sources") and expr_obj._function_sources:
|
|
87
89
|
# Remove duplicates while preserving order
|
|
88
90
|
unique_sources = []
|
|
89
91
|
seen = set()
|
|
@@ -101,8 +103,9 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
|
|
|
101
103
|
return pure_expr_str, raw_defs_str
|
|
102
104
|
|
|
103
105
|
|
|
104
|
-
def _check_ok_for_serialization(
|
|
105
|
-
|
|
106
|
+
def _check_ok_for_serialization(
|
|
107
|
+
method_name: str = None, polars_expr: pl.Expr | None = None, group_expr: pl.Expr | None = None
|
|
108
|
+
) -> None:
|
|
106
109
|
if method_name is None:
|
|
107
110
|
raise NotImplementedError("Cannot create a polars lambda expression without the method")
|
|
108
111
|
if polars_expr is None:
|
|
@@ -110,7 +113,7 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
|
|
|
110
113
|
method_ref = getattr(pl.LazyFrame, method_name)
|
|
111
114
|
if method_ref is None:
|
|
112
115
|
raise ModuleNotFoundError(f"Could not find the method {method_name} in polars lazyframe")
|
|
113
|
-
if method_name ==
|
|
116
|
+
if method_name == "group_by":
|
|
114
117
|
if group_expr is None:
|
|
115
118
|
raise NotImplementedError("Cannot create a polars lambda expression without the groupby expression")
|
|
116
119
|
if not all(isinstance(ge, pl.Expr) for ge in group_expr):
|
|
@@ -120,6 +123,7 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
|
|
|
120
123
|
@add_lazyframe_methods
|
|
121
124
|
class FlowFrame:
|
|
122
125
|
"""Main class that wraps FlowDataEngine and maintains the ETL graph."""
|
|
126
|
+
|
|
123
127
|
flow_graph: FlowGraph
|
|
124
128
|
data: pl.LazyFrame
|
|
125
129
|
|
|
@@ -197,8 +201,10 @@ class FlowFrame:
|
|
|
197
201
|
raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
|
|
198
202
|
# Create a FlowDataEngine to get data in the right format for manual input
|
|
199
203
|
flow_table = FlowDataEngine(raw_data=pl_data)
|
|
200
|
-
raw_data_format = input_schema.RawData(
|
|
201
|
-
|
|
204
|
+
raw_data_format = input_schema.RawData(
|
|
205
|
+
data=list(flow_table.to_dict().values()),
|
|
206
|
+
columns=[c.get_minimal_field_info() for c in flow_table.schema],
|
|
207
|
+
)
|
|
202
208
|
# Create a manual input node
|
|
203
209
|
input_node = input_schema.NodeManualInput(
|
|
204
210
|
flow_id=flow_id,
|
|
@@ -220,19 +226,19 @@ class FlowFrame:
|
|
|
220
226
|
)
|
|
221
227
|
|
|
222
228
|
def __new__(
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
229
|
+
cls,
|
|
230
|
+
data: pl.LazyFrame | FrameInitTypes = None,
|
|
231
|
+
schema: SchemaDefinition | None = None,
|
|
232
|
+
*,
|
|
233
|
+
schema_overrides: SchemaDict | None = None,
|
|
234
|
+
strict: bool = True,
|
|
235
|
+
orient: Orientation | None = None,
|
|
236
|
+
infer_schema_length: int | None = 100,
|
|
237
|
+
nan_to_null: bool = False,
|
|
238
|
+
flow_graph: FlowGraph | None = None,
|
|
239
|
+
node_id: int | None = None,
|
|
240
|
+
parent_node_id: int | None = None,
|
|
241
|
+
**kwargs, # Accept and ignore any other kwargs for API compatibility
|
|
236
242
|
) -> "FlowFrame":
|
|
237
243
|
"""
|
|
238
244
|
Unified constructor for FlowFrame.
|
|
@@ -252,11 +258,18 @@ class FlowFrame:
|
|
|
252
258
|
instance.parent_node_id = parent_node_id
|
|
253
259
|
return instance
|
|
254
260
|
elif flow_graph is not None and not isinstance(data, pl.LazyFrame):
|
|
255
|
-
instance = cls.create_from_any_type(
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
261
|
+
instance = cls.create_from_any_type(
|
|
262
|
+
data=data,
|
|
263
|
+
schema=schema,
|
|
264
|
+
schema_overrides=schema_overrides,
|
|
265
|
+
strict=strict,
|
|
266
|
+
orient=orient,
|
|
267
|
+
infer_schema_length=infer_schema_length,
|
|
268
|
+
nan_to_null=nan_to_null,
|
|
269
|
+
flow_graph=flow_graph,
|
|
270
|
+
node_id=node_id,
|
|
271
|
+
parent_node_id=parent_node_id,
|
|
272
|
+
)
|
|
260
273
|
return instance
|
|
261
274
|
|
|
262
275
|
source_graph = create_flow_graph()
|
|
@@ -265,37 +278,41 @@ class FlowFrame:
|
|
|
265
278
|
if data is None:
|
|
266
279
|
data = pl.LazyFrame()
|
|
267
280
|
if not isinstance(data, pl.LazyFrame):
|
|
268
|
-
|
|
269
281
|
description = "Data imported from Python object"
|
|
270
282
|
try:
|
|
271
283
|
pl_df = pl.DataFrame(
|
|
272
|
-
data,
|
|
273
|
-
|
|
274
|
-
|
|
284
|
+
data,
|
|
285
|
+
schema=schema,
|
|
286
|
+
schema_overrides=schema_overrides,
|
|
287
|
+
strict=strict,
|
|
288
|
+
orient=orient,
|
|
289
|
+
infer_schema_length=infer_schema_length,
|
|
290
|
+
nan_to_null=nan_to_null,
|
|
275
291
|
)
|
|
276
292
|
pl_data = pl_df.lazy()
|
|
277
293
|
except Exception as e:
|
|
278
294
|
raise ValueError(f"Could not convert data to a Polars DataFrame: {e}")
|
|
279
295
|
|
|
280
296
|
flow_table = FlowDataEngine(raw_data=pl_data)
|
|
281
|
-
raw_data_format = input_schema.RawData(
|
|
282
|
-
|
|
297
|
+
raw_data_format = input_schema.RawData(
|
|
298
|
+
data=list(flow_table.to_dict().values()),
|
|
299
|
+
columns=[c.get_minimal_field_info() for c in flow_table.schema],
|
|
300
|
+
)
|
|
283
301
|
input_node = input_schema.NodeManualInput(
|
|
284
|
-
flow_id=source_graph.flow_id,
|
|
285
|
-
|
|
286
|
-
|
|
302
|
+
flow_id=source_graph.flow_id,
|
|
303
|
+
node_id=source_node_id,
|
|
304
|
+
raw_data_format=raw_data_format,
|
|
305
|
+
pos_x=100,
|
|
306
|
+
pos_y=100,
|
|
307
|
+
is_setup=True,
|
|
308
|
+
description=description,
|
|
287
309
|
)
|
|
288
310
|
source_graph.add_manual_input(input_node)
|
|
289
311
|
else:
|
|
290
312
|
source_graph.add_dependency_on_polars_lazy_frame(data, source_node_id)
|
|
291
313
|
|
|
292
314
|
final_data = source_graph.get_node(source_node_id).get_resulting_data().data_frame
|
|
293
|
-
return cls(
|
|
294
|
-
data=final_data,
|
|
295
|
-
flow_graph=source_graph,
|
|
296
|
-
node_id=source_node_id,
|
|
297
|
-
parent_node_id=parent_node_id
|
|
298
|
-
)
|
|
315
|
+
return cls(data=final_data, flow_graph=source_graph, node_id=source_node_id, parent_node_id=parent_node_id)
|
|
299
316
|
|
|
300
317
|
def __init__(self, *args, **kwargs):
|
|
301
318
|
"""
|
|
@@ -328,20 +345,20 @@ class FlowFrame:
|
|
|
328
345
|
parent_node_id=self.node_id,
|
|
329
346
|
)
|
|
330
347
|
except AttributeError:
|
|
331
|
-
raise ValueError(
|
|
348
|
+
raise ValueError("Could not execute the function")
|
|
332
349
|
|
|
333
350
|
@staticmethod
|
|
334
351
|
def _generate_sort_polars_code(
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
352
|
+
pure_sort_expr_strs: list[str],
|
|
353
|
+
descending_values: list[bool],
|
|
354
|
+
nulls_last_values: list[bool],
|
|
355
|
+
multithreaded: bool,
|
|
356
|
+
maintain_order: bool,
|
|
340
357
|
) -> str:
|
|
341
358
|
"""
|
|
342
359
|
Generates the `input_df.sort(...)` Polars code string using pure expression strings.
|
|
343
360
|
"""
|
|
344
|
-
kwargs_for_code:
|
|
361
|
+
kwargs_for_code: dict[str, Any] = {}
|
|
345
362
|
if any(descending_values):
|
|
346
363
|
kwargs_for_code["descending"] = descending_values[0] if len(descending_values) == 1 else descending_values
|
|
347
364
|
if any(nulls_last_values):
|
|
@@ -353,19 +370,20 @@ class FlowFrame:
|
|
|
353
370
|
|
|
354
371
|
kwargs_str_for_code = ", ".join(f"{k}={repr(v)}" for k, v in kwargs_for_code.items())
|
|
355
372
|
|
|
356
|
-
by_arg_for_code =
|
|
357
|
-
pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
|
|
373
|
+
by_arg_for_code = (
|
|
374
|
+
pure_sort_expr_strs[0] if len(pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
|
|
375
|
+
)
|
|
358
376
|
return f"input_df.sort({by_arg_for_code}{', ' + kwargs_str_for_code if kwargs_str_for_code else ''})"
|
|
359
377
|
|
|
360
378
|
def sort(
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
379
|
+
self,
|
|
380
|
+
by: list[Expr | str] | Expr | str,
|
|
381
|
+
*more_by: Expr | str,
|
|
382
|
+
descending: bool | list[bool] = False,
|
|
383
|
+
nulls_last: bool | list[bool] = False,
|
|
384
|
+
multithreaded: bool = True,
|
|
385
|
+
maintain_order: bool = False,
|
|
386
|
+
description: str | None = None,
|
|
369
387
|
) -> "FlowFrame":
|
|
370
388
|
"""
|
|
371
389
|
Sort the dataframe by the given columns.
|
|
@@ -377,10 +395,10 @@ class FlowFrame:
|
|
|
377
395
|
if more_by:
|
|
378
396
|
sort_expressions_input.extend(list(_parse_inputs_as_iterable(more_by)))
|
|
379
397
|
|
|
380
|
-
all_processed_expr_objects:
|
|
381
|
-
pure_polars_expr_strings_for_sort:
|
|
382
|
-
collected_raw_definitions:
|
|
383
|
-
column_names_for_native_node:
|
|
398
|
+
all_processed_expr_objects: list[Expr] = []
|
|
399
|
+
pure_polars_expr_strings_for_sort: list[str] = []
|
|
400
|
+
collected_raw_definitions: list[str] = []
|
|
401
|
+
column_names_for_native_node: list[str] = []
|
|
384
402
|
|
|
385
403
|
use_polars_code_path = False
|
|
386
404
|
|
|
@@ -429,10 +447,12 @@ class FlowFrame:
|
|
|
429
447
|
if not is_simple_col_for_native: # If it wasn't a simple string or unaltered Column
|
|
430
448
|
use_polars_code_path = True
|
|
431
449
|
|
|
432
|
-
desc_values =
|
|
433
|
-
all_processed_expr_objects)
|
|
434
|
-
|
|
435
|
-
|
|
450
|
+
desc_values = (
|
|
451
|
+
list(descending) if isinstance(descending, list) else [descending] * len(all_processed_expr_objects)
|
|
452
|
+
)
|
|
453
|
+
null_last_values = (
|
|
454
|
+
list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(all_processed_expr_objects)
|
|
455
|
+
)
|
|
436
456
|
|
|
437
457
|
if len(desc_values) != len(all_processed_expr_objects):
|
|
438
458
|
raise ValueError("Length of 'descending' does not match the number of sort expressions.")
|
|
@@ -448,23 +468,31 @@ class FlowFrame:
|
|
|
448
468
|
if collected_raw_definitions:
|
|
449
469
|
unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions)) # Order-preserving unique
|
|
450
470
|
definitions_section = "\n\n".join(unique_raw_definitions)
|
|
451
|
-
final_code_for_node =
|
|
452
|
-
|
|
453
|
-
|
|
471
|
+
final_code_for_node = (
|
|
472
|
+
definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
|
|
473
|
+
)
|
|
454
474
|
else:
|
|
455
475
|
final_code_for_node = polars_operation_code
|
|
456
476
|
|
|
457
|
-
pl_expressions_for_fallback = [
|
|
458
|
-
|
|
477
|
+
pl_expressions_for_fallback = [
|
|
478
|
+
e.expr for e in all_processed_expr_objects if hasattr(e, "expr") and e.expr is not None
|
|
479
|
+
]
|
|
459
480
|
kwargs_for_fallback = {
|
|
460
481
|
"descending": desc_values[0] if len(desc_values) == 1 else desc_values,
|
|
461
482
|
"nulls_last": null_last_values[0] if len(null_last_values) == 1 else null_last_values,
|
|
462
|
-
"multithreaded": multithreaded,
|
|
483
|
+
"multithreaded": multithreaded,
|
|
484
|
+
"maintain_order": maintain_order,
|
|
485
|
+
}
|
|
463
486
|
|
|
464
|
-
self._add_polars_code(
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
487
|
+
self._add_polars_code(
|
|
488
|
+
new_node_id,
|
|
489
|
+
final_code_for_node,
|
|
490
|
+
description,
|
|
491
|
+
method_name="sort",
|
|
492
|
+
convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
|
|
493
|
+
polars_expr=pl_expressions_for_fallback,
|
|
494
|
+
kwargs_expr=kwargs_for_fallback,
|
|
495
|
+
)
|
|
468
496
|
else:
|
|
469
497
|
sort_inputs_for_node = []
|
|
470
498
|
for i, col_name_for_native in enumerate(column_names_for_native_node):
|
|
@@ -473,30 +501,44 @@ class FlowFrame:
|
|
|
473
501
|
# type: ignore
|
|
474
502
|
)
|
|
475
503
|
sort_settings = input_schema.NodeSort(
|
|
476
|
-
flow_id=self.flow_graph.flow_id,
|
|
477
|
-
|
|
478
|
-
|
|
504
|
+
flow_id=self.flow_graph.flow_id,
|
|
505
|
+
node_id=new_node_id,
|
|
506
|
+
sort_input=sort_inputs_for_node, # type: ignore
|
|
507
|
+
pos_x=200,
|
|
508
|
+
pos_y=150,
|
|
509
|
+
is_setup=True,
|
|
510
|
+
depending_on_id=self.node_id,
|
|
511
|
+
description=description or f"Sort by {', '.join(column_names_for_native_node)}",
|
|
512
|
+
)
|
|
479
513
|
self.flow_graph.add_sort(sort_settings)
|
|
480
514
|
|
|
481
515
|
return self._create_child_frame(new_node_id)
|
|
482
516
|
|
|
483
|
-
def _add_polars_code(
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
517
|
+
def _add_polars_code(
|
|
518
|
+
self,
|
|
519
|
+
new_node_id: int,
|
|
520
|
+
code: str,
|
|
521
|
+
description: str = None,
|
|
522
|
+
depending_on_ids: list[str] | None = None,
|
|
523
|
+
convertable_to_code: bool = True,
|
|
524
|
+
method_name: str = None,
|
|
525
|
+
polars_expr: Expr | list[Expr] | None = None,
|
|
526
|
+
group_expr: Expr | list[Expr] | None = None,
|
|
527
|
+
kwargs_expr: dict | None = None,
|
|
528
|
+
group_kwargs: dict | None = None,
|
|
529
|
+
):
|
|
489
530
|
polars_code_for_node: str
|
|
490
531
|
if not convertable_to_code or _contains_lambda_pattern(code):
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
532
|
+
effective_method_name = (
|
|
533
|
+
get_method_name_from_code(code) if method_name is None and "input_df." in code else method_name
|
|
534
|
+
)
|
|
494
535
|
|
|
495
536
|
pl_expr_list = ensure_inputs_as_iterable(polars_expr) if polars_expr is not None else []
|
|
496
537
|
group_expr_list = ensure_inputs_as_iterable(group_expr) if group_expr is not None else []
|
|
497
538
|
|
|
498
|
-
_check_ok_for_serialization(
|
|
499
|
-
|
|
539
|
+
_check_ok_for_serialization(
|
|
540
|
+
polars_expr=pl_expr_list, method_name=effective_method_name, group_expr=group_expr_list
|
|
541
|
+
)
|
|
500
542
|
|
|
501
543
|
current_kwargs_expr = kwargs_expr if kwargs_expr is not None else {}
|
|
502
544
|
result_lazyframe_or_expr: Any
|
|
@@ -508,22 +550,27 @@ class FlowFrame:
|
|
|
508
550
|
target_obj = getattr(self.data, effective_method_name)(*group_expr_list, **group_kwargs)
|
|
509
551
|
if not pl_expr_list:
|
|
510
552
|
raise ValueError(
|
|
511
|
-
"Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback."
|
|
553
|
+
"Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback."
|
|
554
|
+
)
|
|
512
555
|
result_lazyframe_or_expr = target_obj.agg(*pl_expr_list, **current_kwargs_expr)
|
|
513
556
|
elif effective_method_name:
|
|
514
|
-
result_lazyframe_or_expr = getattr(self.data, effective_method_name)(
|
|
515
|
-
|
|
557
|
+
result_lazyframe_or_expr = getattr(self.data, effective_method_name)(
|
|
558
|
+
*pl_expr_list, **current_kwargs_expr
|
|
559
|
+
)
|
|
516
560
|
else:
|
|
517
561
|
raise ValueError(
|
|
518
|
-
"Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback."
|
|
562
|
+
"Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback."
|
|
563
|
+
)
|
|
519
564
|
try:
|
|
520
565
|
if isinstance(result_lazyframe_or_expr, pl.LazyFrame):
|
|
521
|
-
serialized_value_for_code = result_lazyframe_or_expr.serialize(format=
|
|
522
|
-
polars_code_for_node = "\n".join(
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
566
|
+
serialized_value_for_code = result_lazyframe_or_expr.serialize(format="json")
|
|
567
|
+
polars_code_for_node = "\n".join(
|
|
568
|
+
[
|
|
569
|
+
f"serialized_value = r'''{serialized_value_for_code}'''",
|
|
570
|
+
"buffer = BytesIO(serialized_value.encode('utf-8'))",
|
|
571
|
+
"output_df = pl.LazyFrame.deserialize(buffer, format='json')",
|
|
572
|
+
]
|
|
573
|
+
)
|
|
527
574
|
logger.warning(
|
|
528
575
|
f"Transformation '{effective_method_name}' uses non-serializable elements. "
|
|
529
576
|
"Falling back to serializing the resulting Polars LazyFrame object."
|
|
@@ -556,18 +603,18 @@ class FlowFrame:
|
|
|
556
603
|
self.flow_graph.add_polars_code(polars_code_settings)
|
|
557
604
|
|
|
558
605
|
def join(
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
606
|
+
self,
|
|
607
|
+
other,
|
|
608
|
+
on: list[str | Column] | str | Column = None,
|
|
609
|
+
how: str = "inner",
|
|
610
|
+
left_on: list[str | Column] | str | Column = None,
|
|
611
|
+
right_on: list[str | Column] | str | Column = None,
|
|
612
|
+
suffix: str = "_right",
|
|
613
|
+
validate: str = None,
|
|
614
|
+
nulls_equal: bool = False,
|
|
615
|
+
coalesce: bool = None,
|
|
616
|
+
maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
|
|
617
|
+
description: str = None,
|
|
571
618
|
) -> "FlowFrame":
|
|
572
619
|
"""
|
|
573
620
|
Add a join operation to the Logical Plan.
|
|
@@ -613,9 +660,7 @@ class FlowFrame:
|
|
|
613
660
|
New FlowFrame with join operation applied.
|
|
614
661
|
"""
|
|
615
662
|
# Step 1: Determine if we need to use Polars code
|
|
616
|
-
use_polars_code = self._should_use_polars_code_for_join(
|
|
617
|
-
maintain_order, coalesce, nulls_equal, validate, suffix
|
|
618
|
-
)
|
|
663
|
+
use_polars_code = self._should_use_polars_code_for_join(maintain_order, coalesce, nulls_equal, validate, suffix)
|
|
619
664
|
# Step 2: Ensure both FlowFrames are in the same graph
|
|
620
665
|
self._ensure_same_graph(other)
|
|
621
666
|
|
|
@@ -623,12 +668,9 @@ class FlowFrame:
|
|
|
623
668
|
new_node_id = generate_node_id()
|
|
624
669
|
|
|
625
670
|
# Step 4: Parse and validate join columns
|
|
626
|
-
left_columns, right_columns = self._parse_join_columns(
|
|
627
|
-
on, left_on, right_on, how
|
|
628
|
-
)
|
|
629
|
-
|
|
671
|
+
left_columns, right_columns = self._parse_join_columns(on, left_on, right_on, how)
|
|
630
672
|
# Step 5: Validate column lists have same length (except for cross join)
|
|
631
|
-
if how !=
|
|
673
|
+
if how != "cross" and left_columns is not None and right_columns is not None:
|
|
632
674
|
if len(left_columns) != len(right_columns):
|
|
633
675
|
raise ValueError(
|
|
634
676
|
f"Length mismatch: left columns ({len(left_columns)}) != right columns ({len(right_columns)})"
|
|
@@ -636,42 +678,46 @@ class FlowFrame:
|
|
|
636
678
|
|
|
637
679
|
# Step 6: Create join mappings if not using Polars code
|
|
638
680
|
join_mappings = None
|
|
639
|
-
if not use_polars_code and how !=
|
|
640
|
-
join_mappings, use_polars_code = _create_join_mappings(
|
|
641
|
-
left_columns or [], right_columns or []
|
|
642
|
-
)
|
|
681
|
+
if not use_polars_code and how != "cross":
|
|
682
|
+
join_mappings, use_polars_code = _create_join_mappings(left_columns or [], right_columns or [])
|
|
643
683
|
|
|
644
684
|
# Step 7: Execute join based on approach
|
|
645
|
-
if use_polars_code or suffix !=
|
|
685
|
+
if use_polars_code or suffix != "_right":
|
|
646
686
|
return self._execute_polars_code_join(
|
|
647
|
-
other,
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
687
|
+
other,
|
|
688
|
+
new_node_id,
|
|
689
|
+
on,
|
|
690
|
+
left_on,
|
|
691
|
+
right_on,
|
|
692
|
+
left_columns,
|
|
693
|
+
right_columns,
|
|
694
|
+
how,
|
|
695
|
+
suffix,
|
|
696
|
+
validate,
|
|
697
|
+
nulls_equal,
|
|
698
|
+
coalesce,
|
|
699
|
+
maintain_order,
|
|
700
|
+
description,
|
|
653
701
|
)
|
|
702
|
+
elif join_mappings or how == "cross":
|
|
703
|
+
return self._execute_native_join(other, new_node_id, join_mappings, how, description)
|
|
654
704
|
else:
|
|
655
705
|
raise ValueError("Could not execute join")
|
|
656
706
|
|
|
657
|
-
def _should_use_polars_code_for_join(
|
|
658
|
-
self, maintain_order, coalesce, nulls_equal, validate, suffix
|
|
659
|
-
) -> bool:
|
|
707
|
+
def _should_use_polars_code_for_join(self, maintain_order, coalesce, nulls_equal, validate, suffix) -> bool:
|
|
660
708
|
"""Determine if we should use Polars code instead of native join."""
|
|
661
709
|
return not (
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
710
|
+
maintain_order is None
|
|
711
|
+
and coalesce is None
|
|
712
|
+
and nulls_equal is False
|
|
713
|
+
and validate is None
|
|
714
|
+
and suffix == "_right"
|
|
667
715
|
)
|
|
668
716
|
|
|
669
717
|
def _ensure_same_graph(self, other: "FlowFrame") -> None:
|
|
670
718
|
"""Ensure both FlowFrames are in the same graph, combining if necessary."""
|
|
671
719
|
if self.flow_graph.flow_id != other.flow_graph.flow_id:
|
|
672
|
-
combined_graph, node_mappings = combine_flow_graphs_with_mapping(
|
|
673
|
-
self.flow_graph, other.flow_graph
|
|
674
|
-
)
|
|
720
|
+
combined_graph, node_mappings = combine_flow_graphs_with_mapping(self.flow_graph, other.flow_graph)
|
|
675
721
|
|
|
676
722
|
new_self_node_id = node_mappings.get((self.flow_graph.flow_id, self.node_id), None)
|
|
677
723
|
new_other_node_id = node_mappings.get((other.flow_graph.flow_id, other.node_id), None)
|
|
@@ -686,19 +732,19 @@ class FlowFrame:
|
|
|
686
732
|
node_id_data["c"] = node_id_data["c"] + len(combined_graph.nodes)
|
|
687
733
|
|
|
688
734
|
def _parse_join_columns(
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
) -> tuple[
|
|
735
|
+
self,
|
|
736
|
+
on: list[str | Column] | str | Column,
|
|
737
|
+
left_on: list[str | Column] | str | Column,
|
|
738
|
+
right_on: list[str | Column] | str | Column,
|
|
739
|
+
how: str,
|
|
740
|
+
) -> tuple[list[str] | None, list[str] | None]:
|
|
695
741
|
"""Parse and validate join column specifications."""
|
|
696
742
|
if on is not None:
|
|
697
743
|
left_columns = right_columns = _normalize_columns_to_list(on)
|
|
698
744
|
elif left_on is not None and right_on is not None:
|
|
699
745
|
left_columns = _normalize_columns_to_list(left_on)
|
|
700
746
|
right_columns = _normalize_columns_to_list(right_on)
|
|
701
|
-
elif how ==
|
|
747
|
+
elif how == "cross" and left_on is None and right_on is None and on is None:
|
|
702
748
|
left_columns = None
|
|
703
749
|
right_columns = None
|
|
704
750
|
else:
|
|
@@ -707,37 +753,43 @@ class FlowFrame:
|
|
|
707
753
|
return left_columns, right_columns
|
|
708
754
|
|
|
709
755
|
def _execute_polars_code_join(
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
756
|
+
self,
|
|
757
|
+
other: "FlowFrame",
|
|
758
|
+
new_node_id: int,
|
|
759
|
+
on: list[str | Column] | str | Column,
|
|
760
|
+
left_on: list[str | Column] | str | Column,
|
|
761
|
+
right_on: list[str | Column] | str | Column,
|
|
762
|
+
left_columns: list[str] | None,
|
|
763
|
+
right_columns: list[str] | None,
|
|
764
|
+
how: str,
|
|
765
|
+
suffix: str,
|
|
766
|
+
validate: str,
|
|
767
|
+
nulls_equal: bool,
|
|
768
|
+
coalesce: bool,
|
|
769
|
+
maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
|
|
770
|
+
description: str,
|
|
725
771
|
) -> "FlowFrame":
|
|
726
772
|
"""Execute join using Polars code approach."""
|
|
727
773
|
# Build the code arguments
|
|
728
774
|
code_kwargs = self._build_polars_join_kwargs(
|
|
729
|
-
on,
|
|
730
|
-
|
|
775
|
+
on,
|
|
776
|
+
left_on,
|
|
777
|
+
right_on,
|
|
778
|
+
left_columns,
|
|
779
|
+
right_columns,
|
|
780
|
+
how,
|
|
781
|
+
suffix,
|
|
782
|
+
validate,
|
|
783
|
+
nulls_equal,
|
|
784
|
+
coalesce,
|
|
785
|
+
maintain_order,
|
|
731
786
|
)
|
|
732
787
|
|
|
733
788
|
kwargs_str = ", ".join(f"{k}={v}" for k, v in code_kwargs.items() if v is not None)
|
|
734
789
|
code = f"input_df_1.join({kwargs_str})"
|
|
735
790
|
|
|
736
791
|
# Add the Polars code node
|
|
737
|
-
self._add_polars_code(
|
|
738
|
-
new_node_id, code, description,
|
|
739
|
-
depending_on_ids=[self.node_id, other.node_id]
|
|
740
|
-
)
|
|
792
|
+
self._add_polars_code(new_node_id, code, description, depending_on_ids=[self.node_id, other.node_id])
|
|
741
793
|
|
|
742
794
|
# Add connections
|
|
743
795
|
self._add_connection(self.node_id, new_node_id, "main")
|
|
@@ -752,28 +804,29 @@ class FlowFrame:
|
|
|
752
804
|
)
|
|
753
805
|
|
|
754
806
|
def _build_polars_join_kwargs(
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
807
|
+
self,
|
|
808
|
+
on: list[str | Column] | str | Column,
|
|
809
|
+
left_on: list[str | Column] | str | Column,
|
|
810
|
+
right_on: list[str | Column] | str | Column,
|
|
811
|
+
left_columns: list[str] | None,
|
|
812
|
+
right_columns: list[str] | None,
|
|
813
|
+
how: str,
|
|
814
|
+
suffix: str,
|
|
815
|
+
validate: str,
|
|
816
|
+
nulls_equal: bool,
|
|
817
|
+
coalesce: bool,
|
|
818
|
+
maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
|
|
767
819
|
) -> dict:
|
|
768
820
|
"""Build kwargs dictionary for Polars join code."""
|
|
769
821
|
|
|
770
822
|
def format_column_list(cols):
|
|
771
823
|
if cols is None:
|
|
772
824
|
return None
|
|
773
|
-
return
|
|
774
|
-
|
|
775
|
-
for v in _normalize_columns_to_list(cols)
|
|
776
|
-
|
|
825
|
+
return (
|
|
826
|
+
"["
|
|
827
|
+
+ ", ".join(f"'{v}'" if isinstance(v, str) else str(v) for v in _normalize_columns_to_list(cols))
|
|
828
|
+
+ "]"
|
|
829
|
+
)
|
|
777
830
|
|
|
778
831
|
return {
|
|
779
832
|
"other": "input_df_2",
|
|
@@ -785,46 +838,49 @@ class FlowFrame:
|
|
|
785
838
|
"validate": _to_string_val(validate),
|
|
786
839
|
"nulls_equal": nulls_equal,
|
|
787
840
|
"coalesce": coalesce,
|
|
788
|
-
"maintain_order": _to_string_val(maintain_order)
|
|
841
|
+
"maintain_order": _to_string_val(maintain_order),
|
|
789
842
|
}
|
|
790
843
|
|
|
791
844
|
def _execute_native_join(
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
845
|
+
self,
|
|
846
|
+
other: "FlowFrame",
|
|
847
|
+
new_node_id: int,
|
|
848
|
+
join_mappings: list | None,
|
|
849
|
+
how: str,
|
|
850
|
+
description: str,
|
|
798
851
|
) -> "FlowFrame":
|
|
799
852
|
"""Execute join using native FlowFile join nodes."""
|
|
800
853
|
# Create select inputs for both frames
|
|
854
|
+
|
|
801
855
|
left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
|
|
802
856
|
right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
|
|
803
857
|
# Create appropriate join input based on join type
|
|
804
|
-
if how ==
|
|
858
|
+
if how == "cross":
|
|
805
859
|
join_input = transform_schema.CrossJoinInput(
|
|
806
|
-
left_select=left_select.renames,
|
|
860
|
+
left_select=transform_schema.JoinInputs(renames=left_select.renames),
|
|
807
861
|
right_select=right_select.renames,
|
|
808
862
|
)
|
|
863
|
+
join_input_manager = transform_schema.CrossJoinInputManager(join_input)
|
|
864
|
+
|
|
809
865
|
else:
|
|
810
866
|
join_input = transform_schema.JoinInput(
|
|
811
867
|
join_mapping=join_mappings,
|
|
812
|
-
left_select=left_select.renames,
|
|
868
|
+
left_select=transform_schema.JoinInputs(renames=left_select.renames),
|
|
813
869
|
right_select=right_select.renames,
|
|
814
870
|
how=how,
|
|
815
871
|
)
|
|
872
|
+
join_input_manager = transform_schema.JoinInputManager(join_input)
|
|
816
873
|
|
|
817
874
|
# Configure join input
|
|
818
|
-
|
|
819
|
-
for right_column in right_select.renames:
|
|
875
|
+
for right_column in join_input_manager.right_select.renames:
|
|
820
876
|
if right_column.join_key:
|
|
821
877
|
right_column.keep = False
|
|
822
878
|
|
|
823
879
|
# Create and add appropriate node
|
|
824
|
-
if how ==
|
|
825
|
-
self._add_cross_join_node(new_node_id,
|
|
880
|
+
if how == "cross":
|
|
881
|
+
self._add_cross_join_node(new_node_id, join_input_manager.to_cross_join_input(), description, other)
|
|
826
882
|
else:
|
|
827
|
-
self._add_regular_join_node(new_node_id,
|
|
883
|
+
self._add_regular_join_node(new_node_id, join_input_manager.to_join_input(), description, other)
|
|
828
884
|
|
|
829
885
|
# Add connections
|
|
830
886
|
self._add_connection(self.node_id, new_node_id, "main")
|
|
@@ -838,11 +894,11 @@ class FlowFrame:
|
|
|
838
894
|
)
|
|
839
895
|
|
|
840
896
|
def _add_cross_join_node(
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
897
|
+
self,
|
|
898
|
+
new_node_id: int,
|
|
899
|
+
join_input: "transform_schema.CrossJoinInput",
|
|
900
|
+
description: str,
|
|
901
|
+
other: "FlowFrame",
|
|
846
902
|
) -> None:
|
|
847
903
|
"""Add a cross join node to the graph."""
|
|
848
904
|
cross_join_settings = input_schema.NodeCrossJoin(
|
|
@@ -851,18 +907,18 @@ class FlowFrame:
|
|
|
851
907
|
cross_join_input=join_input,
|
|
852
908
|
is_setup=True,
|
|
853
909
|
depending_on_ids=[self.node_id, other.node_id],
|
|
854
|
-
description=description or
|
|
910
|
+
description=description or "Join with cross strategy",
|
|
855
911
|
auto_generate_selection=True,
|
|
856
912
|
verify_integrity=True,
|
|
857
913
|
)
|
|
858
914
|
self.flow_graph.add_cross_join(cross_join_settings)
|
|
859
915
|
|
|
860
916
|
def _add_regular_join_node(
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
917
|
+
self,
|
|
918
|
+
new_node_id: int,
|
|
919
|
+
join_input: "transform_schema.JoinInput",
|
|
920
|
+
description: str,
|
|
921
|
+
other: "FlowFrame",
|
|
866
922
|
) -> None:
|
|
867
923
|
"""Add a regular join node to the graph."""
|
|
868
924
|
join_settings = input_schema.NodeJoin(
|
|
@@ -887,34 +943,41 @@ class FlowFrame:
|
|
|
887
943
|
pos_y=100,
|
|
888
944
|
is_setup=True,
|
|
889
945
|
depending_on_id=self.node_id,
|
|
890
|
-
description=description
|
|
946
|
+
description=description,
|
|
891
947
|
)
|
|
892
948
|
self.flow_graph.add_record_count(node_number_of_records)
|
|
893
949
|
return self._create_child_frame(new_node_id)
|
|
894
950
|
|
|
895
|
-
def rename(self, mapping: Mapping[str, str], *, strict: bool = True,
|
|
896
|
-
description: str = None) -> "FlowFrame":
|
|
951
|
+
def rename(self, mapping: Mapping[str, str], *, strict: bool = True, description: str = None) -> "FlowFrame":
|
|
897
952
|
"""Rename columns based on a mapping or function."""
|
|
898
|
-
return self.select(
|
|
899
|
-
|
|
953
|
+
return self.select(
|
|
954
|
+
[col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
|
|
955
|
+
description=description,
|
|
956
|
+
_keep_missing=True,
|
|
957
|
+
)
|
|
900
958
|
|
|
901
|
-
def select(
|
|
959
|
+
def select(
|
|
960
|
+
self, *columns: str | Expr | Selector, description: str | None = None, _keep_missing: bool = False
|
|
961
|
+
) -> "FlowFrame":
|
|
902
962
|
"""
|
|
903
963
|
Select columns from the frame.
|
|
904
964
|
"""
|
|
905
965
|
columns_iterable = list(_parse_inputs_as_iterable(columns))
|
|
906
966
|
new_node_id = generate_node_id()
|
|
907
|
-
if (
|
|
908
|
-
|
|
967
|
+
if (
|
|
968
|
+
len(columns_iterable) == 1
|
|
969
|
+
and isinstance(columns_iterable[0], Expr)
|
|
970
|
+
and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"
|
|
971
|
+
):
|
|
909
972
|
return self._add_number_of_records(new_node_id, description)
|
|
910
973
|
|
|
911
|
-
all_input_expr_objects:
|
|
912
|
-
pure_polars_expr_strings_for_select:
|
|
913
|
-
collected_raw_definitions:
|
|
914
|
-
selected_col_names_for_native:
|
|
974
|
+
all_input_expr_objects: list[Expr] = []
|
|
975
|
+
pure_polars_expr_strings_for_select: list[str] = []
|
|
976
|
+
collected_raw_definitions: list[str] = []
|
|
977
|
+
selected_col_names_for_native: list[transform_schema.SelectInput] = [] # For native node
|
|
915
978
|
|
|
916
979
|
can_use_native_node = True
|
|
917
|
-
if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] ==
|
|
980
|
+
if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == "*":
|
|
918
981
|
effective_columns_iterable = [col(c_name) for c_name in self.columns]
|
|
919
982
|
else:
|
|
920
983
|
effective_columns_iterable = columns_iterable
|
|
@@ -948,13 +1011,17 @@ class FlowFrame:
|
|
|
948
1011
|
if can_use_native_node:
|
|
949
1012
|
existing_cols = self.columns
|
|
950
1013
|
selected_col_names = {select_col.old_name for select_col in selected_col_names_for_native}
|
|
951
|
-
not_selected_columns = [
|
|
952
|
-
|
|
1014
|
+
not_selected_columns = [
|
|
1015
|
+
transform_schema.SelectInput(c, keep=_keep_missing)
|
|
1016
|
+
for c in existing_cols
|
|
1017
|
+
if c not in selected_col_names
|
|
1018
|
+
]
|
|
953
1019
|
selected_col_names_for_native.extend(not_selected_columns)
|
|
954
1020
|
if _keep_missing:
|
|
955
1021
|
lookup_selection = {_col.old_name: _col for _col in selected_col_names_for_native}
|
|
956
|
-
selected_col_names_for_native = [
|
|
957
|
-
|
|
1022
|
+
selected_col_names_for_native = [
|
|
1023
|
+
lookup_selection.get(_col) for _col in existing_cols if _col in lookup_selection
|
|
1024
|
+
]
|
|
958
1025
|
select_settings = input_schema.NodeSelect(
|
|
959
1026
|
flow_id=self.flow_graph.flow_id,
|
|
960
1027
|
node_id=new_node_id,
|
|
@@ -964,7 +1031,7 @@ class FlowFrame:
|
|
|
964
1031
|
pos_y=100,
|
|
965
1032
|
is_setup=True,
|
|
966
1033
|
depending_on_id=self.node_id,
|
|
967
|
-
description=description
|
|
1034
|
+
description=description,
|
|
968
1035
|
)
|
|
969
1036
|
self.flow_graph.add_select(select_settings)
|
|
970
1037
|
else:
|
|
@@ -973,23 +1040,35 @@ class FlowFrame:
|
|
|
973
1040
|
if collected_raw_definitions:
|
|
974
1041
|
unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
|
|
975
1042
|
definitions_section = "\n\n".join(unique_raw_definitions)
|
|
976
|
-
final_code_for_node =
|
|
977
|
-
|
|
978
|
-
|
|
1043
|
+
final_code_for_node = (
|
|
1044
|
+
definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
|
|
1045
|
+
)
|
|
979
1046
|
else:
|
|
980
1047
|
final_code_for_node = polars_operation_code
|
|
981
1048
|
|
|
982
|
-
pl_expressions_for_fallback = [
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
1049
|
+
pl_expressions_for_fallback = [
|
|
1050
|
+
e.expr
|
|
1051
|
+
for e in all_input_expr_objects
|
|
1052
|
+
if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
|
|
1053
|
+
]
|
|
1054
|
+
self._add_polars_code(
|
|
1055
|
+
new_node_id,
|
|
1056
|
+
final_code_for_node,
|
|
1057
|
+
description,
|
|
1058
|
+
method_name="select",
|
|
1059
|
+
convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
|
|
1060
|
+
polars_expr=pl_expressions_for_fallback,
|
|
1061
|
+
)
|
|
988
1062
|
|
|
989
1063
|
return self._create_child_frame(new_node_id)
|
|
990
1064
|
|
|
991
|
-
def filter(
|
|
992
|
-
|
|
1065
|
+
def filter(
|
|
1066
|
+
self,
|
|
1067
|
+
*predicates: Expr | Any,
|
|
1068
|
+
flowfile_formula: str | None = None,
|
|
1069
|
+
description: str | None = None,
|
|
1070
|
+
**constraints: Any,
|
|
1071
|
+
) -> "FlowFrame":
|
|
993
1072
|
"""
|
|
994
1073
|
Filter rows based on a predicate.
|
|
995
1074
|
"""
|
|
@@ -998,9 +1077,9 @@ class FlowFrame:
|
|
|
998
1077
|
available_columns = self.columns
|
|
999
1078
|
new_node_id = generate_node_id()
|
|
1000
1079
|
if len(predicates) > 0 or len(constraints) > 0:
|
|
1001
|
-
all_input_expr_objects:
|
|
1002
|
-
pure_polars_expr_strings:
|
|
1003
|
-
collected_raw_definitions:
|
|
1080
|
+
all_input_expr_objects: list[Expr] = []
|
|
1081
|
+
pure_polars_expr_strings: list[str] = []
|
|
1082
|
+
collected_raw_definitions: list[str] = []
|
|
1004
1083
|
|
|
1005
1084
|
processed_predicates = []
|
|
1006
1085
|
for pred_item in predicates:
|
|
@@ -1029,10 +1108,11 @@ class FlowFrame:
|
|
|
1029
1108
|
collected_raw_definitions.append(raw_defs_str)
|
|
1030
1109
|
|
|
1031
1110
|
for k, v_val in constraints.items():
|
|
1032
|
-
constraint_expr_obj =
|
|
1111
|
+
constraint_expr_obj = col(k) == lit(v_val)
|
|
1033
1112
|
all_input_expr_objects.append(constraint_expr_obj)
|
|
1034
1113
|
pure_expr_str, raw_defs_str = _extract_expr_parts(
|
|
1035
|
-
constraint_expr_obj
|
|
1114
|
+
constraint_expr_obj
|
|
1115
|
+
) # Constraint exprs are unlikely to have defs
|
|
1036
1116
|
pure_polars_expr_strings.append(f"({pure_expr_str})")
|
|
1037
1117
|
if raw_defs_str and raw_defs_str not in collected_raw_definitions: # Should be rare here
|
|
1038
1118
|
collected_raw_definitions.append(raw_defs_str)
|
|
@@ -1044,31 +1124,36 @@ class FlowFrame:
|
|
|
1044
1124
|
if collected_raw_definitions:
|
|
1045
1125
|
unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions)) # Order-preserving unique
|
|
1046
1126
|
definitions_section = "\n\n".join(unique_raw_definitions)
|
|
1047
|
-
final_code_for_node =
|
|
1048
|
-
|
|
1049
|
-
|
|
1127
|
+
final_code_for_node = (
|
|
1128
|
+
definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
|
|
1129
|
+
)
|
|
1050
1130
|
else:
|
|
1051
1131
|
final_code_for_node = polars_operation_code
|
|
1052
1132
|
|
|
1053
1133
|
convertable_to_code = _check_if_convertible_to_code(all_input_expr_objects)
|
|
1054
|
-
pl_expressions_for_fallback = [
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1134
|
+
pl_expressions_for_fallback = [
|
|
1135
|
+
e.expr
|
|
1136
|
+
for e in all_input_expr_objects
|
|
1137
|
+
if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
|
|
1138
|
+
]
|
|
1139
|
+
self._add_polars_code(
|
|
1140
|
+
new_node_id,
|
|
1141
|
+
final_code_for_node,
|
|
1142
|
+
description,
|
|
1143
|
+
method_name="filter",
|
|
1144
|
+
convertable_to_code=convertable_to_code,
|
|
1145
|
+
polars_expr=pl_expressions_for_fallback,
|
|
1146
|
+
)
|
|
1059
1147
|
elif flowfile_formula:
|
|
1060
1148
|
filter_settings = input_schema.NodeFilter(
|
|
1061
1149
|
flow_id=self.flow_graph.flow_id,
|
|
1062
1150
|
node_id=new_node_id,
|
|
1063
|
-
filter_input=transform_schema.FilterInput(
|
|
1064
|
-
advanced_filter=flowfile_formula,
|
|
1065
|
-
filter_type="advanced"
|
|
1066
|
-
),
|
|
1151
|
+
filter_input=transform_schema.FilterInput(advanced_filter=flowfile_formula, filter_type="advanced"),
|
|
1067
1152
|
pos_x=200,
|
|
1068
1153
|
pos_y=150,
|
|
1069
1154
|
is_setup=True,
|
|
1070
1155
|
depending_on_id=self.node_id,
|
|
1071
|
-
description=description
|
|
1156
|
+
description=description,
|
|
1072
1157
|
)
|
|
1073
1158
|
self.flow_graph.add_filter(filter_settings)
|
|
1074
1159
|
else:
|
|
@@ -1077,12 +1162,7 @@ class FlowFrame:
|
|
|
1077
1162
|
|
|
1078
1163
|
return self._create_child_frame(new_node_id)
|
|
1079
1164
|
|
|
1080
|
-
def sink_csv(self,
|
|
1081
|
-
file: str,
|
|
1082
|
-
*args,
|
|
1083
|
-
separator: str = ",",
|
|
1084
|
-
encoding: str = "utf-8",
|
|
1085
|
-
description: str = None):
|
|
1165
|
+
def sink_csv(self, file: str, *args, separator: str = ",", encoding: str = "utf-8", description: str = None):
|
|
1086
1166
|
"""
|
|
1087
1167
|
Write the data to a CSV file.
|
|
1088
1168
|
|
|
@@ -1098,12 +1178,12 @@ class FlowFrame:
|
|
|
1098
1178
|
return self.write_csv(file, *args, separator=separator, encoding=encoding, description=description)
|
|
1099
1179
|
|
|
1100
1180
|
def write_parquet(
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1181
|
+
self,
|
|
1182
|
+
path: str | os.PathLike,
|
|
1183
|
+
*,
|
|
1184
|
+
description: str = None,
|
|
1185
|
+
convert_to_absolute_path: bool = True,
|
|
1186
|
+
**kwargs: Any,
|
|
1107
1187
|
) -> "FlowFrame":
|
|
1108
1188
|
"""
|
|
1109
1189
|
Write the data to a Parquet file. Creates a standard Output node if only
|
|
@@ -1140,16 +1220,11 @@ class FlowFrame:
|
|
|
1140
1220
|
file_name = file_str.split(os.sep)[-1]
|
|
1141
1221
|
use_polars_code = bool(kwargs.items()) or not is_path_input
|
|
1142
1222
|
|
|
1143
|
-
output_parquet_table = input_schema.OutputParquetTable(
|
|
1144
|
-
file_type="parquet"
|
|
1145
|
-
)
|
|
1146
1223
|
output_settings = input_schema.OutputSettings(
|
|
1147
|
-
file_type=
|
|
1224
|
+
file_type="parquet",
|
|
1148
1225
|
name=file_name,
|
|
1149
1226
|
directory=file_str if is_path_input else str(file_str),
|
|
1150
|
-
|
|
1151
|
-
output_csv_table=input_schema.OutputCsvTable(),
|
|
1152
|
-
output_excel_table=input_schema.OutputExcelTable()
|
|
1227
|
+
table_settings=input_schema.OutputParquetTable(),
|
|
1153
1228
|
)
|
|
1154
1229
|
|
|
1155
1230
|
if is_path_input:
|
|
@@ -1166,7 +1241,7 @@ class FlowFrame:
|
|
|
1166
1241
|
node_id=new_node_id,
|
|
1167
1242
|
output_settings=output_settings,
|
|
1168
1243
|
depending_on_id=self.node_id,
|
|
1169
|
-
description=description
|
|
1244
|
+
description=description,
|
|
1170
1245
|
)
|
|
1171
1246
|
self.flow_graph.add_output(node_output)
|
|
1172
1247
|
else:
|
|
@@ -1192,16 +1267,15 @@ class FlowFrame:
|
|
|
1192
1267
|
return self._create_child_frame(new_node_id)
|
|
1193
1268
|
|
|
1194
1269
|
def write_csv(
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1270
|
+
self,
|
|
1271
|
+
file: str | os.PathLike,
|
|
1272
|
+
*,
|
|
1273
|
+
separator: str = ",",
|
|
1274
|
+
encoding: str = "utf-8",
|
|
1275
|
+
description: str = None,
|
|
1276
|
+
convert_to_absolute_path: bool = True,
|
|
1277
|
+
**kwargs: Any,
|
|
1203
1278
|
) -> "FlowFrame":
|
|
1204
|
-
|
|
1205
1279
|
new_node_id = generate_node_id()
|
|
1206
1280
|
is_path_input = isinstance(file, (str, os.PathLike))
|
|
1207
1281
|
if isinstance(file, os.PathLike):
|
|
@@ -1217,13 +1291,10 @@ class FlowFrame:
|
|
|
1217
1291
|
|
|
1218
1292
|
use_polars_code = bool(kwargs) or not is_path_input
|
|
1219
1293
|
output_settings = input_schema.OutputSettings(
|
|
1220
|
-
file_type=
|
|
1294
|
+
file_type="csv",
|
|
1221
1295
|
name=file_name,
|
|
1222
1296
|
directory=file_str if is_path_input else str(file_str),
|
|
1223
|
-
|
|
1224
|
-
file_type="csv", delimiter=separator, encoding=encoding),
|
|
1225
|
-
output_excel_table=input_schema.OutputExcelTable(),
|
|
1226
|
-
output_parquet_table=input_schema.OutputParquetTable()
|
|
1297
|
+
table_settings=input_schema.OutputCsvTable(delimiter=separator, encoding=encoding),
|
|
1227
1298
|
)
|
|
1228
1299
|
if is_path_input:
|
|
1229
1300
|
try:
|
|
@@ -1239,7 +1310,7 @@ class FlowFrame:
|
|
|
1239
1310
|
node_id=new_node_id,
|
|
1240
1311
|
output_settings=output_settings,
|
|
1241
1312
|
depending_on_id=self.node_id,
|
|
1242
|
-
description=description
|
|
1313
|
+
description=description,
|
|
1243
1314
|
)
|
|
1244
1315
|
self.flow_graph.add_output(node_output)
|
|
1245
1316
|
else:
|
|
@@ -1253,9 +1324,9 @@ class FlowFrame:
|
|
|
1253
1324
|
path_arg_repr = repr(output_settings.directory)
|
|
1254
1325
|
|
|
1255
1326
|
all_kwargs_for_code = {
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
**kwargs # Add the extra kwargs
|
|
1327
|
+
"separator": separator,
|
|
1328
|
+
"encoding": encoding,
|
|
1329
|
+
**kwargs, # Add the extra kwargs
|
|
1259
1330
|
}
|
|
1260
1331
|
kwargs_repr = ", ".join(f"{k}={repr(v)}" for k, v in all_kwargs_for_code.items())
|
|
1261
1332
|
|
|
@@ -1269,42 +1340,47 @@ class FlowFrame:
|
|
|
1269
1340
|
|
|
1270
1341
|
return self._create_child_frame(new_node_id)
|
|
1271
1342
|
|
|
1272
|
-
def write_parquet_to_cloud_storage(
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1343
|
+
def write_parquet_to_cloud_storage(
|
|
1344
|
+
self,
|
|
1345
|
+
path: str,
|
|
1346
|
+
connection_name: str | None = None,
|
|
1347
|
+
compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy",
|
|
1348
|
+
description: str | None = None,
|
|
1349
|
+
) -> "FlowFrame":
|
|
1278
1350
|
"""
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1351
|
+
Write the data frame to cloud storage in Parquet format.
|
|
1352
|
+
|
|
1353
|
+
Args:
|
|
1354
|
+
path (str): The destination path in cloud storage where the Parquet file will be written.
|
|
1355
|
+
connection_name (Optional[str], optional): The name of the storage connection
|
|
1356
|
+
that a user can create. If None, uses the default connection. Defaults to None.
|
|
1357
|
+
compression (Literal["snappy", "gzip", "brotli", "lz4", "zstd"], optional):
|
|
1358
|
+
The compression algorithm to use for the Parquet file. Defaults to "snappy".
|
|
1359
|
+
description (Optional[str], optional): Description of this operation for the ETL graph.
|
|
1360
|
+
|
|
1361
|
+
Returns:
|
|
1362
|
+
FlowFrame: A new child data frame representing the written data.
|
|
1291
1363
|
"""
|
|
1292
1364
|
|
|
1293
|
-
new_node_id = add_write_ff_to_cloud_storage(
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1365
|
+
new_node_id = add_write_ff_to_cloud_storage(
|
|
1366
|
+
path,
|
|
1367
|
+
flow_graph=self.flow_graph,
|
|
1368
|
+
connection_name=connection_name,
|
|
1369
|
+
depends_on_node_id=self.node_id,
|
|
1370
|
+
parquet_compression=compression,
|
|
1371
|
+
file_format="parquet",
|
|
1372
|
+
description=description,
|
|
1373
|
+
)
|
|
1299
1374
|
return self._create_child_frame(new_node_id)
|
|
1300
1375
|
|
|
1301
|
-
def write_csv_to_cloud_storage(
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1376
|
+
def write_csv_to_cloud_storage(
|
|
1377
|
+
self,
|
|
1378
|
+
path: str,
|
|
1379
|
+
connection_name: str | None = None,
|
|
1380
|
+
delimiter: str = ";",
|
|
1381
|
+
encoding: CsvEncoding = "utf8",
|
|
1382
|
+
description: str | None = None,
|
|
1383
|
+
) -> "FlowFrame":
|
|
1308
1384
|
"""
|
|
1309
1385
|
Write the data frame to cloud storage in CSV format.
|
|
1310
1386
|
|
|
@@ -1321,21 +1397,25 @@ class FlowFrame:
|
|
|
1321
1397
|
Returns:
|
|
1322
1398
|
FlowFrame: A new child data frame representing the written data.
|
|
1323
1399
|
"""
|
|
1324
|
-
new_node_id = add_write_ff_to_cloud_storage(
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1400
|
+
new_node_id = add_write_ff_to_cloud_storage(
|
|
1401
|
+
path,
|
|
1402
|
+
flow_graph=self.flow_graph,
|
|
1403
|
+
connection_name=connection_name,
|
|
1404
|
+
depends_on_node_id=self.node_id,
|
|
1405
|
+
csv_delimiter=delimiter,
|
|
1406
|
+
csv_encoding=encoding,
|
|
1407
|
+
file_format="csv",
|
|
1408
|
+
description=description,
|
|
1409
|
+
)
|
|
1331
1410
|
return self._create_child_frame(new_node_id)
|
|
1332
1411
|
|
|
1333
|
-
def write_delta(
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1412
|
+
def write_delta(
|
|
1413
|
+
self,
|
|
1414
|
+
path: str,
|
|
1415
|
+
connection_name: str | None = None,
|
|
1416
|
+
write_mode: Literal["overwrite", "append"] = "overwrite",
|
|
1417
|
+
description: str | None = None,
|
|
1418
|
+
) -> "FlowFrame":
|
|
1339
1419
|
"""
|
|
1340
1420
|
Write the data frame to cloud storage in Delta Lake format.
|
|
1341
1421
|
|
|
@@ -1349,19 +1429,23 @@ class FlowFrame:
|
|
|
1349
1429
|
Returns:
|
|
1350
1430
|
FlowFrame: A new child data frame representing the written data.
|
|
1351
1431
|
"""
|
|
1352
|
-
new_node_id = add_write_ff_to_cloud_storage(
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1432
|
+
new_node_id = add_write_ff_to_cloud_storage(
|
|
1433
|
+
path,
|
|
1434
|
+
flow_graph=self.flow_graph,
|
|
1435
|
+
connection_name=connection_name,
|
|
1436
|
+
depends_on_node_id=self.node_id,
|
|
1437
|
+
write_mode=write_mode,
|
|
1438
|
+
file_format="delta",
|
|
1439
|
+
description=description,
|
|
1440
|
+
)
|
|
1358
1441
|
return self._create_child_frame(new_node_id)
|
|
1359
1442
|
|
|
1360
|
-
def write_json_to_cloud_storage(
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1443
|
+
def write_json_to_cloud_storage(
|
|
1444
|
+
self,
|
|
1445
|
+
path: str,
|
|
1446
|
+
connection_name: str | None = None,
|
|
1447
|
+
description: str | None = None,
|
|
1448
|
+
) -> "FlowFrame":
|
|
1365
1449
|
"""
|
|
1366
1450
|
Write the data frame to cloud storage in JSON format.
|
|
1367
1451
|
|
|
@@ -1373,11 +1457,14 @@ class FlowFrame:
|
|
|
1373
1457
|
Returns:
|
|
1374
1458
|
FlowFrame: A new child data frame representing the written data.
|
|
1375
1459
|
"""
|
|
1376
|
-
new_node_id = add_write_ff_to_cloud_storage(
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1460
|
+
new_node_id = add_write_ff_to_cloud_storage(
|
|
1461
|
+
path,
|
|
1462
|
+
flow_graph=self.flow_graph,
|
|
1463
|
+
connection_name=connection_name,
|
|
1464
|
+
depends_on_node_id=self.node_id,
|
|
1465
|
+
file_format="json",
|
|
1466
|
+
description=description,
|
|
1467
|
+
)
|
|
1381
1468
|
return self._create_child_frame(new_node_id)
|
|
1382
1469
|
|
|
1383
1470
|
def group_by(self, *by, description: str = None, maintain_order=False, **named_by) -> GroupByFrame:
|
|
@@ -1414,7 +1501,10 @@ class FlowFrame:
|
|
|
1414
1501
|
# Create a GroupByFrame
|
|
1415
1502
|
return GroupByFrame(
|
|
1416
1503
|
node_id=new_node_id,
|
|
1417
|
-
parent_frame=self,
|
|
1504
|
+
parent_frame=self,
|
|
1505
|
+
by_cols=by_cols,
|
|
1506
|
+
maintain_order=maintain_order,
|
|
1507
|
+
description=description,
|
|
1418
1508
|
)
|
|
1419
1509
|
|
|
1420
1510
|
def to_graph(self):
|
|
@@ -1422,7 +1512,7 @@ class FlowFrame:
|
|
|
1422
1512
|
return self.flow_graph
|
|
1423
1513
|
|
|
1424
1514
|
def save_graph(self, file_path: str, auto_arrange: bool = True):
|
|
1425
|
-
"""Save the graph
|
|
1515
|
+
"""Save the graph"""
|
|
1426
1516
|
if auto_arrange:
|
|
1427
1517
|
self.flow_graph.apply_layout()
|
|
1428
1518
|
self.flow_graph.save_flow(file_path)
|
|
@@ -1435,23 +1525,27 @@ class FlowFrame:
|
|
|
1435
1525
|
|
|
1436
1526
|
def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> "FlowFrame":
|
|
1437
1527
|
new_node_id = generate_node_id()
|
|
1438
|
-
function_settings = (
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1528
|
+
function_settings = input_schema.NodeFormula(
|
|
1529
|
+
flow_id=self.flow_graph.flow_id,
|
|
1530
|
+
node_id=new_node_id,
|
|
1531
|
+
depending_on_id=self.node_id,
|
|
1532
|
+
function=transform_schema.FunctionInput(
|
|
1533
|
+
function=flowfile_formula, field=transform_schema.FieldInput(name=output_column_name, data_type="Auto")
|
|
1534
|
+
),
|
|
1535
|
+
description=description,
|
|
1536
|
+
)
|
|
1444
1537
|
self.flow_graph.add_formula(function_settings)
|
|
1445
1538
|
return self._create_child_frame(new_node_id)
|
|
1446
1539
|
|
|
1447
1540
|
def head(self, n: int, description: str = None):
|
|
1448
1541
|
new_node_id = generate_node_id()
|
|
1449
|
-
settings = input_schema.NodeSample(
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1542
|
+
settings = input_schema.NodeSample(
|
|
1543
|
+
flow_id=self.flow_graph.flow_id,
|
|
1544
|
+
node_id=new_node_id,
|
|
1545
|
+
depending_on_id=self.node_id,
|
|
1546
|
+
sample_size=n,
|
|
1547
|
+
description=description,
|
|
1548
|
+
)
|
|
1455
1549
|
self.flow_graph.add_sample(settings)
|
|
1456
1550
|
return self._create_child_frame(new_node_id)
|
|
1457
1551
|
|
|
@@ -1467,16 +1561,18 @@ class FlowFrame:
|
|
|
1467
1561
|
def get_node_settings(self) -> FlowNode:
|
|
1468
1562
|
return self.flow_graph.get_node(self.node_id)
|
|
1469
1563
|
|
|
1470
|
-
def pivot(
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1564
|
+
def pivot(
|
|
1565
|
+
self,
|
|
1566
|
+
on: str | list[str],
|
|
1567
|
+
*,
|
|
1568
|
+
index: str | list[str] | None = None,
|
|
1569
|
+
values: str | list[str] | None = None,
|
|
1570
|
+
aggregate_function: str | None = "first",
|
|
1571
|
+
maintain_order: bool = True,
|
|
1572
|
+
sort_columns: bool = False,
|
|
1573
|
+
separator: str = "_",
|
|
1574
|
+
description: str = None,
|
|
1575
|
+
) -> "FlowFrame":
|
|
1480
1576
|
"""
|
|
1481
1577
|
Pivot a DataFrame from long to wide format.
|
|
1482
1578
|
|
|
@@ -1525,17 +1621,14 @@ class FlowFrame:
|
|
|
1525
1621
|
value_col = values if isinstance(values, str) else values[0]
|
|
1526
1622
|
|
|
1527
1623
|
# Set valid aggregations
|
|
1528
|
-
valid_aggs = [
|
|
1624
|
+
valid_aggs = ["first", "last", "min", "max", "sum", "mean", "median", "count"]
|
|
1529
1625
|
if aggregate_function not in valid_aggs:
|
|
1530
|
-
raise ValueError(
|
|
1531
|
-
|
|
1626
|
+
raise ValueError(
|
|
1627
|
+
f"Invalid aggregate_function: {aggregate_function}. " f"Must be one of: {', '.join(valid_aggs)}"
|
|
1628
|
+
)
|
|
1532
1629
|
|
|
1533
1630
|
# Check if we can use the native implementation
|
|
1534
|
-
can_use_native = (
|
|
1535
|
-
isinstance(on_value, str) and
|
|
1536
|
-
isinstance(value_col, str) and
|
|
1537
|
-
aggregate_function in valid_aggs
|
|
1538
|
-
)
|
|
1631
|
+
can_use_native = isinstance(on_value, str) and isinstance(value_col, str) and aggregate_function in valid_aggs
|
|
1539
1632
|
|
|
1540
1633
|
if can_use_native:
|
|
1541
1634
|
# Create pivot input for native implementation
|
|
@@ -1543,7 +1636,7 @@ class FlowFrame:
|
|
|
1543
1636
|
index_columns=index_columns,
|
|
1544
1637
|
pivot_column=on_value,
|
|
1545
1638
|
value_col=value_col,
|
|
1546
|
-
aggregations=[aggregate_function]
|
|
1639
|
+
aggregations=[aggregate_function],
|
|
1547
1640
|
)
|
|
1548
1641
|
|
|
1549
1642
|
# Create node settings
|
|
@@ -1555,7 +1648,7 @@ class FlowFrame:
|
|
|
1555
1648
|
pos_y=150,
|
|
1556
1649
|
is_setup=True,
|
|
1557
1650
|
depending_on_id=self.node_id,
|
|
1558
|
-
description=description or f"Pivot {value_col} by {on_value}"
|
|
1651
|
+
description=description or f"Pivot {value_col} by {on_value}",
|
|
1559
1652
|
)
|
|
1560
1653
|
|
|
1561
1654
|
# Add to graph using native implementation
|
|
@@ -1583,8 +1676,9 @@ class FlowFrame:
|
|
|
1583
1676
|
# Generate description if not provided
|
|
1584
1677
|
if description is None:
|
|
1585
1678
|
on_str = on if isinstance(on, str) else ", ".join(on if isinstance(on, list) else [on])
|
|
1586
|
-
values_str =
|
|
1587
|
-
values if isinstance(values, list) else [values])
|
|
1679
|
+
values_str = (
|
|
1680
|
+
values if isinstance(values, str) else ", ".join(values if isinstance(values, list) else [values])
|
|
1681
|
+
)
|
|
1588
1682
|
description = f"Pivot {values_str} by {on_str}"
|
|
1589
1683
|
|
|
1590
1684
|
# Add polars code node
|
|
@@ -1592,13 +1686,15 @@ class FlowFrame:
|
|
|
1592
1686
|
|
|
1593
1687
|
return self._create_child_frame(new_node_id)
|
|
1594
1688
|
|
|
1595
|
-
def unpivot(
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1689
|
+
def unpivot(
|
|
1690
|
+
self,
|
|
1691
|
+
on: list[str | Selector] | str | None | Selector = None,
|
|
1692
|
+
*,
|
|
1693
|
+
index: list[str] | str | None = None,
|
|
1694
|
+
variable_name: str = "variable",
|
|
1695
|
+
value_name: str = "value",
|
|
1696
|
+
description: str = None,
|
|
1697
|
+
) -> "FlowFrame":
|
|
1602
1698
|
"""
|
|
1603
1699
|
Unpivot a DataFrame from wide to long format.
|
|
1604
1700
|
|
|
@@ -1645,13 +1741,13 @@ class FlowFrame:
|
|
|
1645
1741
|
value_columns = [on]
|
|
1646
1742
|
|
|
1647
1743
|
if can_use_native:
|
|
1648
|
-
can_use_native =
|
|
1744
|
+
can_use_native = variable_name == "variable" and value_name == "value"
|
|
1649
1745
|
if can_use_native:
|
|
1650
1746
|
unpivot_input = transform_schema.UnpivotInput(
|
|
1651
1747
|
index_columns=index_columns,
|
|
1652
1748
|
value_columns=value_columns,
|
|
1653
1749
|
data_type_selector=None,
|
|
1654
|
-
data_type_selector_mode=
|
|
1750
|
+
data_type_selector_mode="column",
|
|
1655
1751
|
)
|
|
1656
1752
|
|
|
1657
1753
|
# Create node settings
|
|
@@ -1663,7 +1759,7 @@ class FlowFrame:
|
|
|
1663
1759
|
pos_y=150,
|
|
1664
1760
|
is_setup=True,
|
|
1665
1761
|
depending_on_id=self.node_id,
|
|
1666
|
-
description=description or "Unpivot data from wide to long format"
|
|
1762
|
+
description=description or "Unpivot data from wide to long format",
|
|
1667
1763
|
)
|
|
1668
1764
|
|
|
1669
1765
|
# Add to graph using native implementation
|
|
@@ -1699,7 +1795,7 @@ class FlowFrame:
|
|
|
1699
1795
|
|
|
1700
1796
|
def concat(
|
|
1701
1797
|
self,
|
|
1702
|
-
other: "FlowFrame" |
|
|
1798
|
+
other: "FlowFrame" | list["FlowFrame"],
|
|
1703
1799
|
how: str = "vertical",
|
|
1704
1800
|
rechunk: bool = False,
|
|
1705
1801
|
parallel: bool = True,
|
|
@@ -1800,14 +1896,11 @@ class FlowFrame:
|
|
|
1800
1896
|
|
|
1801
1897
|
# Add polars code node with dependencies on all input frames
|
|
1802
1898
|
depending_on_ids = [self.node_id] + [frame.node_id for frame in others]
|
|
1803
|
-
self._add_polars_code(
|
|
1804
|
-
new_node_id, code, description, depending_on_ids=depending_on_ids
|
|
1805
|
-
)
|
|
1899
|
+
self._add_polars_code(new_node_id, code, description, depending_on_ids=depending_on_ids)
|
|
1806
1900
|
# Add connections to ensure all frames are available
|
|
1807
1901
|
self._add_connection(self.node_id, new_node_id, "main")
|
|
1808
1902
|
|
|
1809
1903
|
for other_frame in others:
|
|
1810
|
-
|
|
1811
1904
|
other_frame.flow_graph = combined_graph
|
|
1812
1905
|
other_frame._add_connection(other_frame.node_id, new_node_id, "main")
|
|
1813
1906
|
# Create and return the new frame
|
|
@@ -1819,8 +1912,8 @@ class FlowFrame:
|
|
|
1819
1912
|
)
|
|
1820
1913
|
|
|
1821
1914
|
def _detect_cum_count_record_id(
|
|
1822
|
-
self, expr: Any, new_node_id: int, description:
|
|
1823
|
-
) ->
|
|
1915
|
+
self, expr: Any, new_node_id: int, description: str | None = None
|
|
1916
|
+
) -> tuple[bool, Optional["FlowFrame"]]:
|
|
1824
1917
|
"""
|
|
1825
1918
|
Detect if the expression is a cum_count operation and use record_id if possible.
|
|
1826
1919
|
|
|
@@ -1841,8 +1934,12 @@ class FlowFrame:
|
|
|
1841
1934
|
- Optional[FlowFrame]: The new FlowFrame if detection was successful, otherwise None
|
|
1842
1935
|
"""
|
|
1843
1936
|
# Check if this is a cum_count operation
|
|
1844
|
-
if (
|
|
1845
|
-
|
|
1937
|
+
if (
|
|
1938
|
+
not isinstance(expr, Expr)
|
|
1939
|
+
or not expr._repr_str
|
|
1940
|
+
or "cum_count" not in expr._repr_str
|
|
1941
|
+
or not hasattr(expr, "name")
|
|
1942
|
+
):
|
|
1846
1943
|
return False, None
|
|
1847
1944
|
|
|
1848
1945
|
# Extract the output name
|
|
@@ -1929,24 +2026,24 @@ class FlowFrame:
|
|
|
1929
2026
|
return False, None
|
|
1930
2027
|
|
|
1931
2028
|
def with_columns(
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
2029
|
+
self,
|
|
2030
|
+
*exprs: Expr | Iterable[Expr] | Any, # Allow Any for implicit lit conversion
|
|
2031
|
+
flowfile_formulas: list[str] | None = None,
|
|
2032
|
+
output_column_names: list[str] | None = None,
|
|
2033
|
+
description: str | None = None,
|
|
2034
|
+
**named_exprs: Expr | Any, # Allow Any for implicit lit conversion
|
|
1938
2035
|
) -> "FlowFrame":
|
|
1939
2036
|
"""
|
|
1940
2037
|
Add or replace columns in the DataFrame.
|
|
1941
2038
|
"""
|
|
1942
2039
|
new_node_id = generate_node_id()
|
|
1943
2040
|
|
|
1944
|
-
all_input_expr_objects:
|
|
1945
|
-
pure_polars_expr_strings_for_wc:
|
|
1946
|
-
collected_raw_definitions:
|
|
2041
|
+
all_input_expr_objects: list[Expr] = []
|
|
2042
|
+
pure_polars_expr_strings_for_wc: list[str] = []
|
|
2043
|
+
collected_raw_definitions: list[str] = []
|
|
1947
2044
|
has_exprs_or_named_exprs = bool(exprs or named_exprs)
|
|
1948
2045
|
if has_exprs_or_named_exprs:
|
|
1949
|
-
actual_exprs_to_process:
|
|
2046
|
+
actual_exprs_to_process: list[Expr] = []
|
|
1950
2047
|
temp_exprs_iterable = list(_parse_inputs_as_iterable(exprs))
|
|
1951
2048
|
|
|
1952
2049
|
for item in temp_exprs_iterable:
|
|
@@ -1977,38 +2074,43 @@ class FlowFrame:
|
|
|
1977
2074
|
if collected_raw_definitions:
|
|
1978
2075
|
unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
|
|
1979
2076
|
definitions_section = "\n\n".join(unique_raw_definitions)
|
|
1980
|
-
final_code_for_node =
|
|
1981
|
-
|
|
1982
|
-
|
|
2077
|
+
final_code_for_node = (
|
|
2078
|
+
definitions_section + "\n#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
|
|
2079
|
+
)
|
|
1983
2080
|
else:
|
|
1984
2081
|
final_code_for_node = polars_operation_code
|
|
1985
2082
|
|
|
1986
|
-
pl_expressions_for_fallback = [
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
2083
|
+
pl_expressions_for_fallback = [
|
|
2084
|
+
e.expr
|
|
2085
|
+
for e in all_input_expr_objects
|
|
2086
|
+
if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
|
|
2087
|
+
]
|
|
2088
|
+
self._add_polars_code(
|
|
2089
|
+
new_node_id,
|
|
2090
|
+
final_code_for_node,
|
|
2091
|
+
description,
|
|
2092
|
+
method_name="with_columns",
|
|
2093
|
+
convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
|
|
2094
|
+
polars_expr=pl_expressions_for_fallback,
|
|
2095
|
+
)
|
|
1991
2096
|
return self._create_child_frame(new_node_id)
|
|
1992
2097
|
|
|
1993
2098
|
elif flowfile_formulas is not None and output_column_names is not None:
|
|
1994
|
-
|
|
1995
2099
|
if len(output_column_names) != len(flowfile_formulas):
|
|
1996
|
-
raise ValueError(
|
|
1997
|
-
"Length of both the formulas and the output columns names must be identical"
|
|
1998
|
-
)
|
|
2100
|
+
raise ValueError("Length of both the formulas and the output columns names must be identical")
|
|
1999
2101
|
|
|
2000
2102
|
if len(flowfile_formulas) == 1:
|
|
2001
2103
|
return self._with_flowfile_formula(flowfile_formulas[0], output_column_names[0], description)
|
|
2002
2104
|
ff = self
|
|
2003
|
-
for i, (flowfile_formula, output_column_name) in enumerate(
|
|
2105
|
+
for i, (flowfile_formula, output_column_name) in enumerate(
|
|
2106
|
+
zip(flowfile_formulas, output_column_names, strict=False)
|
|
2107
|
+
):
|
|
2004
2108
|
ff = ff._with_flowfile_formula(flowfile_formula, output_column_name, f"{i}: {description}")
|
|
2005
2109
|
return ff
|
|
2006
2110
|
else:
|
|
2007
2111
|
raise ValueError("Either exprs/named_exprs or flowfile_formulas with output_column_names must be provided")
|
|
2008
2112
|
|
|
2009
|
-
def with_row_index(
|
|
2010
|
-
self, name: str = "index", offset: int = 0, description: str = None
|
|
2011
|
-
) -> "FlowFrame":
|
|
2113
|
+
def with_row_index(self, name: str = "index", offset: int = 0, description: str = None) -> "FlowFrame":
|
|
2012
2114
|
"""
|
|
2013
2115
|
Add a row index as the first column in the DataFrame.
|
|
2014
2116
|
|
|
@@ -2055,9 +2157,7 @@ class FlowFrame:
|
|
|
2055
2157
|
else:
|
|
2056
2158
|
# Use the polars code approach for other cases
|
|
2057
2159
|
code = f"input_df.with_row_index(name='{name}', offset={offset})"
|
|
2058
|
-
self._add_polars_code(
|
|
2059
|
-
new_node_id, code, description or f"Add row index column '{name}'"
|
|
2060
|
-
)
|
|
2160
|
+
self._add_polars_code(new_node_id, code, description or f"Add row index column '{name}'")
|
|
2061
2161
|
|
|
2062
2162
|
return self._create_child_frame(new_node_id)
|
|
2063
2163
|
|
|
@@ -2091,9 +2191,7 @@ class FlowFrame:
|
|
|
2091
2191
|
all_columns = []
|
|
2092
2192
|
|
|
2093
2193
|
if isinstance(columns, (list, tuple)):
|
|
2094
|
-
all_columns.extend(
|
|
2095
|
-
[col.column_name if isinstance(col, Column) else col for col in columns]
|
|
2096
|
-
)
|
|
2194
|
+
all_columns.extend([col.column_name if isinstance(col, Column) else col for col in columns])
|
|
2097
2195
|
else:
|
|
2098
2196
|
all_columns.append(columns.column_name if isinstance(columns, Column) else columns)
|
|
2099
2197
|
|
|
@@ -2102,10 +2200,9 @@ class FlowFrame:
|
|
|
2102
2200
|
all_columns.append(col.column_name if isinstance(col, Column) else col)
|
|
2103
2201
|
|
|
2104
2202
|
if len(all_columns) == 1:
|
|
2105
|
-
|
|
2106
2203
|
columns_str = stringify_values(all_columns[0])
|
|
2107
2204
|
else:
|
|
2108
|
-
columns_str = "[" + ", ".join([
|
|
2205
|
+
columns_str = "[" + ", ".join([stringify_values(col) for col in all_columns]) + "]"
|
|
2109
2206
|
|
|
2110
2207
|
code = f"""
|
|
2111
2208
|
# Explode columns into multiple rows
|
|
@@ -2120,24 +2217,25 @@ class FlowFrame:
|
|
|
2120
2217
|
|
|
2121
2218
|
return self._create_child_frame(new_node_id)
|
|
2122
2219
|
|
|
2123
|
-
def fuzzy_match(
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2220
|
+
def fuzzy_match(
|
|
2221
|
+
self,
|
|
2222
|
+
other: "FlowFrame",
|
|
2223
|
+
fuzzy_mappings: list[FuzzyMapping],
|
|
2224
|
+
description: str = None,
|
|
2225
|
+
) -> "FlowFrame":
|
|
2128
2226
|
self._ensure_same_graph(other)
|
|
2129
2227
|
|
|
2130
2228
|
# Step 3: Generate new node ID
|
|
2131
2229
|
new_node_id = generate_node_id()
|
|
2132
|
-
node_fuzzy_match = input_schema.NodeFuzzyMatch(
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2230
|
+
node_fuzzy_match = input_schema.NodeFuzzyMatch(
|
|
2231
|
+
flow_id=self.flow_graph.flow_id,
|
|
2232
|
+
node_id=new_node_id,
|
|
2233
|
+
join_input=transform_schema.FuzzyMatchInput(
|
|
2234
|
+
join_mapping=fuzzy_mappings, left_select=self.columns, right_select=other.columns
|
|
2235
|
+
),
|
|
2236
|
+
description=description or "Fuzzy match between two FlowFrames",
|
|
2237
|
+
depending_on_ids=[self.node_id, other.node_id],
|
|
2238
|
+
)
|
|
2141
2239
|
self.flow_graph.add_fuzzy_match(node_fuzzy_match)
|
|
2142
2240
|
self._add_connection(self.node_id, new_node_id, "main")
|
|
2143
2241
|
other._add_connection(other.node_id, new_node_id, "right")
|
|
@@ -2216,7 +2314,7 @@ class FlowFrame:
|
|
|
2216
2314
|
|
|
2217
2315
|
def unique(
|
|
2218
2316
|
self,
|
|
2219
|
-
subset: Union[str, "Expr",
|
|
2317
|
+
subset: Union[str, "Expr", list[Union[str, "Expr"]]] = None,
|
|
2220
2318
|
*,
|
|
2221
2319
|
keep: Literal["first", "last", "any", "none"] = "any",
|
|
2222
2320
|
maintain_order: bool = False,
|
|
@@ -2273,17 +2371,11 @@ class FlowFrame:
|
|
|
2273
2371
|
break
|
|
2274
2372
|
|
|
2275
2373
|
# Determine if we can use the native implementation
|
|
2276
|
-
can_use_native =
|
|
2277
|
-
can_use_native
|
|
2278
|
-
and keep in ["any", "first", "last", "none"]
|
|
2279
|
-
and not maintain_order
|
|
2280
|
-
)
|
|
2374
|
+
can_use_native = can_use_native and keep in ["any", "first", "last", "none"] and not maintain_order
|
|
2281
2375
|
|
|
2282
2376
|
if can_use_native:
|
|
2283
2377
|
# Use the native NodeUnique implementation
|
|
2284
|
-
unique_input = transform_schema.UniqueInput(
|
|
2285
|
-
columns=processed_subset, strategy=keep
|
|
2286
|
-
)
|
|
2378
|
+
unique_input = transform_schema.UniqueInput(columns=processed_subset, strategy=keep)
|
|
2287
2379
|
|
|
2288
2380
|
# Create node settings
|
|
2289
2381
|
unique_settings = input_schema.NodeUnique(
|
|
@@ -2336,12 +2428,12 @@ class FlowFrame:
|
|
|
2336
2428
|
return self._create_child_frame(new_node_id)
|
|
2337
2429
|
|
|
2338
2430
|
@property
|
|
2339
|
-
def columns(self) ->
|
|
2431
|
+
def columns(self) -> list[str]:
|
|
2340
2432
|
"""Get the column names."""
|
|
2341
2433
|
return self.data.collect_schema().names()
|
|
2342
2434
|
|
|
2343
2435
|
@property
|
|
2344
|
-
def dtypes(self) ->
|
|
2436
|
+
def dtypes(self) -> list[pl.DataType]:
|
|
2345
2437
|
"""Get the column data types."""
|
|
2346
2438
|
return self.data.dtypes
|
|
2347
2439
|
|