Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
flowfile_frame/expr.py
CHANGED
|
@@ -1,35 +1,28 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from builtins import len as built_in_len
|
|
4
|
+
from functools import wraps
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union
|
|
4
6
|
|
|
5
7
|
import polars as pl
|
|
6
8
|
from polars.expr.string import ExprStringNameSpace
|
|
7
9
|
|
|
8
10
|
from flowfile_core.schemas import transform_schema
|
|
9
|
-
from
|
|
10
|
-
|
|
11
|
-
from builtins import len as built_in_len
|
|
12
|
-
|
|
11
|
+
from flowfile_frame.adding_expr import add_expr_methods
|
|
13
12
|
from flowfile_frame.config import logger
|
|
14
13
|
from flowfile_frame.expr_name import ExprNameNameSpace
|
|
15
|
-
from flowfile_frame.adding_expr import add_expr_methods
|
|
16
14
|
from flowfile_frame.list_name_space import ExprListNameSpace
|
|
17
15
|
|
|
18
16
|
# --- TYPE CHECKING IMPORTS ---
|
|
19
17
|
if TYPE_CHECKING:
|
|
20
18
|
from flowfile_frame.selectors import Selector
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
ExprType = TypeVar("ExprType", bound="Expr")
|
|
22
21
|
ColumnType = "Column" # Use string literal instead of direct class reference
|
|
23
|
-
from polars._typing import
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
PolarsDataType,
|
|
28
|
-
PolarsTemporalType,
|
|
29
|
-
TimeUnit)
|
|
30
|
-
|
|
31
|
-
ExprOrStr = Union['Expr', str]
|
|
32
|
-
ExprOrStrList = List[ExprOrStr]
|
|
22
|
+
from polars._typing import IntoExprColumn, PolarsTemporalType
|
|
23
|
+
|
|
24
|
+
ExprOrStr = Union["Expr", str]
|
|
25
|
+
ExprOrStrList = list[ExprOrStr]
|
|
33
26
|
ExprStrOrList = Union[ExprOrStr, ExprOrStrList]
|
|
34
27
|
|
|
35
28
|
|
|
@@ -38,7 +31,7 @@ def _repr_args(*args, **kwargs):
|
|
|
38
31
|
arg_reprs = [a.__repr__() for a in args]
|
|
39
32
|
kwarg_reprs = []
|
|
40
33
|
for k, v in kwargs.items():
|
|
41
|
-
if k ==
|
|
34
|
+
if k == "_function_sources":
|
|
42
35
|
continue
|
|
43
36
|
if isinstance(v, pl.DataType):
|
|
44
37
|
kwarg_reprs.append(f"{k}={v!s}")
|
|
@@ -49,7 +42,7 @@ def _repr_args(*args, **kwargs):
|
|
|
49
42
|
return ", ".join(arg_reprs + kwarg_reprs)
|
|
50
43
|
|
|
51
44
|
|
|
52
|
-
def _get_expr_and_repr(value: Any) -> tuple[
|
|
45
|
+
def _get_expr_and_repr(value: Any) -> tuple[pl.Expr | None, str]:
|
|
53
46
|
"""Helper to get polars expr and repr string for operands."""
|
|
54
47
|
if isinstance(value, Expr):
|
|
55
48
|
# Ensure we return None if the inner expression is None
|
|
@@ -68,37 +61,55 @@ def _get_expr_and_repr(value: Any) -> tuple[Optional[pl.Expr], str]:
|
|
|
68
61
|
|
|
69
62
|
|
|
70
63
|
class StringMethods:
|
|
71
|
-
expr:
|
|
64
|
+
expr: ExprStringNameSpace | None
|
|
72
65
|
convertable_to_code: bool
|
|
73
|
-
_function_sources:
|
|
74
|
-
|
|
75
|
-
def __init__(
|
|
76
|
-
|
|
66
|
+
_function_sources: list[str] | None
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
parent_expr: Expr,
|
|
71
|
+
parent_repr_str: str,
|
|
72
|
+
convertable_to_code: bool = True,
|
|
73
|
+
_function_sources: list[str] | None = None,
|
|
74
|
+
):
|
|
77
75
|
self.parent = parent_expr
|
|
78
76
|
self.expr = parent_expr.expr.str if parent_expr.expr is not None else None
|
|
79
77
|
self.parent_repr_str = parent_repr_str
|
|
80
78
|
self.convertable_to_code = convertable_to_code
|
|
81
79
|
self._function_sources = _function_sources or []
|
|
82
80
|
|
|
83
|
-
def _create_next_expr(
|
|
84
|
-
|
|
81
|
+
def _create_next_expr(
|
|
82
|
+
self,
|
|
83
|
+
*args,
|
|
84
|
+
method_name: str,
|
|
85
|
+
result_expr: pl.Expr | None,
|
|
86
|
+
is_complex: bool,
|
|
87
|
+
convertable_to_code: bool = None,
|
|
88
|
+
**kwargs,
|
|
89
|
+
) -> Expr:
|
|
85
90
|
args_repr = _repr_args(*args, **kwargs)
|
|
86
91
|
new_repr = f"{self.parent_repr_str}.str.{method_name}({args_repr})"
|
|
87
92
|
if convertable_to_code is None:
|
|
88
93
|
convertable_to_code = self.convertable_to_code
|
|
89
|
-
new_expr = Expr(
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
new_expr = Expr(
|
|
95
|
+
result_expr,
|
|
96
|
+
self.parent.column_name,
|
|
97
|
+
repr_str=new_repr,
|
|
98
|
+
initial_column_name=self.parent._initial_column_name,
|
|
99
|
+
selector=None,
|
|
100
|
+
agg_func=self.parent.agg_func,
|
|
101
|
+
is_complex=is_complex,
|
|
102
|
+
convertable_to_code=convertable_to_code,
|
|
103
|
+
_function_sources=self._function_sources,
|
|
104
|
+
)
|
|
96
105
|
return new_expr
|
|
97
106
|
|
|
98
107
|
# ... (String methods remain unchanged from your provided code) ...
|
|
99
108
|
def contains(self, pattern, *, literal=False):
|
|
100
109
|
res_expr = self.expr.contains(pattern, literal=literal) if self.expr is not None else None
|
|
101
|
-
return self._create_next_expr(
|
|
110
|
+
return self._create_next_expr(
|
|
111
|
+
pattern, literal=literal, method_name="contains", result_expr=res_expr, is_complex=True
|
|
112
|
+
)
|
|
102
113
|
|
|
103
114
|
def starts_with(self, prefix):
|
|
104
115
|
res_expr = self.expr.starts_with(prefix) if self.expr is not None else None
|
|
@@ -106,12 +117,13 @@ class StringMethods:
|
|
|
106
117
|
|
|
107
118
|
def ends_with(self, suffix):
|
|
108
119
|
res_expr = self.expr.ends_with(suffix) if self.expr is not None else None
|
|
109
|
-
return self._create_next_expr(suffix,
|
|
120
|
+
return self._create_next_expr(suffix, result_expr=res_expr, method_name="ends_with", is_complex=True)
|
|
110
121
|
|
|
111
122
|
def replace(self, pattern, replacement, *, literal=False):
|
|
112
123
|
res_expr = self.expr.replace(pattern, replacement, literal=literal) if self.expr is not None else None
|
|
113
|
-
return self._create_next_expr(
|
|
114
|
-
|
|
124
|
+
return self._create_next_expr(
|
|
125
|
+
pattern, replacement, method_name="replace", result_expr=res_expr, literal=literal, is_complex=True
|
|
126
|
+
)
|
|
115
127
|
|
|
116
128
|
def to_uppercase(self):
|
|
117
129
|
res_expr = self.expr.to_uppercase() if self.expr is not None else None
|
|
@@ -119,8 +131,9 @@ class StringMethods:
|
|
|
119
131
|
|
|
120
132
|
def slice(self, offset: int | IntoExprColumn, length: int | IntoExprColumn | None = None) -> Expr:
|
|
121
133
|
res_expr = self.expr.slice(offset=offset, length=length)
|
|
122
|
-
return self._create_next_expr(
|
|
123
|
-
|
|
134
|
+
return self._create_next_expr(
|
|
135
|
+
method_name="slice", result_expr=res_expr, is_complex=True, offset=offset, length=length
|
|
136
|
+
)
|
|
124
137
|
|
|
125
138
|
def to_lowercase(self):
|
|
126
139
|
res_expr = self.expr.to_lowercase() if self.expr is not None else None
|
|
@@ -140,37 +153,71 @@ class StringMethods:
|
|
|
140
153
|
|
|
141
154
|
def to_date(self, format: str, *, strict: bool = True, exact: bool = True, cache: bool = True):
|
|
142
155
|
res_expr = self.expr.to_date(format, strict=strict, exact=exact, cache=cache) if self.expr is not None else None
|
|
143
|
-
return self._create_next_expr(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
cache: bool = True,
|
|
153
|
-
ambiguous: Literal["earliest", "latest", "raise", "null"] | Expr = "raise",) -> 'Expr':
|
|
154
|
-
res_expr = self.expr.to_datetime(format, time_unit=time_unit, time_zone=time_zone, strict=strict,
|
|
155
|
-
exact=exact, cache=cache, ambiguous=ambiguous)
|
|
156
|
-
return self._create_next_expr(method_name="to_datetime", result_expr=res_expr, is_complex=True,
|
|
157
|
-
format=format, time_unit=time_unit, time_zone=time_zone, strict=strict,
|
|
158
|
-
exact=exact, cache=cache, ambiguous=ambiguous)
|
|
159
|
-
|
|
160
|
-
def strptime(self,
|
|
161
|
-
dtype: PolarsTemporalType,
|
|
162
|
-
format: str | None = None,
|
|
163
|
-
*,
|
|
164
|
-
strict: bool = True,
|
|
165
|
-
exact: bool = True,
|
|
166
|
-
cache: bool = True,
|
|
167
|
-
ambiguous: Literal["earliest", "latest", "raise", "null"] | Expr = "raise",) -> 'Expr':
|
|
168
|
-
res_expr = self.expr.strptime(dtype, format, strict=strict, exact=exact, cache=cache, ambiguous=ambiguous)
|
|
169
|
-
return self._create_next_expr(method_name="strptime", dtype=dtype, result_expr=res_expr, is_complex=True,
|
|
170
|
-
format=format, strict=strict,
|
|
171
|
-
exact=exact, cache=cache, ambiguous=ambiguous)
|
|
156
|
+
return self._create_next_expr(
|
|
157
|
+
method_name="to_date",
|
|
158
|
+
result_expr=res_expr,
|
|
159
|
+
is_complex=True,
|
|
160
|
+
format=format,
|
|
161
|
+
strict=strict,
|
|
162
|
+
exact=exact,
|
|
163
|
+
cache=cache,
|
|
164
|
+
)
|
|
172
165
|
|
|
166
|
+
def to_datetime(
|
|
167
|
+
self,
|
|
168
|
+
format: str | None = None,
|
|
169
|
+
*,
|
|
170
|
+
time_unit: Literal["ns", "us", "ms"] | None = None,
|
|
171
|
+
time_zone: str | None = None,
|
|
172
|
+
strict: bool = True,
|
|
173
|
+
exact: bool = True,
|
|
174
|
+
cache: bool = True,
|
|
175
|
+
ambiguous: Literal["earliest", "latest", "raise", "null"] | Expr = "raise",
|
|
176
|
+
) -> Expr:
|
|
177
|
+
res_expr = self.expr.to_datetime(
|
|
178
|
+
format,
|
|
179
|
+
time_unit=time_unit,
|
|
180
|
+
time_zone=time_zone,
|
|
181
|
+
strict=strict,
|
|
182
|
+
exact=exact,
|
|
183
|
+
cache=cache,
|
|
184
|
+
ambiguous=ambiguous,
|
|
185
|
+
)
|
|
186
|
+
return self._create_next_expr(
|
|
187
|
+
method_name="to_datetime",
|
|
188
|
+
result_expr=res_expr,
|
|
189
|
+
is_complex=True,
|
|
190
|
+
format=format,
|
|
191
|
+
time_unit=time_unit,
|
|
192
|
+
time_zone=time_zone,
|
|
193
|
+
strict=strict,
|
|
194
|
+
exact=exact,
|
|
195
|
+
cache=cache,
|
|
196
|
+
ambiguous=ambiguous,
|
|
197
|
+
)
|
|
173
198
|
|
|
199
|
+
def strptime(
|
|
200
|
+
self,
|
|
201
|
+
dtype: PolarsTemporalType,
|
|
202
|
+
format: str | None = None,
|
|
203
|
+
*,
|
|
204
|
+
strict: bool = True,
|
|
205
|
+
exact: bool = True,
|
|
206
|
+
cache: bool = True,
|
|
207
|
+
ambiguous: Literal["earliest", "latest", "raise", "null"] | Expr = "raise",
|
|
208
|
+
) -> Expr:
|
|
209
|
+
res_expr = self.expr.strptime(dtype, format, strict=strict, exact=exact, cache=cache, ambiguous=ambiguous)
|
|
210
|
+
return self._create_next_expr(
|
|
211
|
+
method_name="strptime",
|
|
212
|
+
dtype=dtype,
|
|
213
|
+
result_expr=res_expr,
|
|
214
|
+
is_complex=True,
|
|
215
|
+
format=format,
|
|
216
|
+
strict=strict,
|
|
217
|
+
exact=exact,
|
|
218
|
+
cache=cache,
|
|
219
|
+
ambiguous=ambiguous,
|
|
220
|
+
)
|
|
174
221
|
|
|
175
222
|
def __getattr__(self, name):
|
|
176
223
|
if self.expr is None or not hasattr(self.expr, name):
|
|
@@ -182,40 +229,53 @@ class StringMethods:
|
|
|
182
229
|
raise AttributeError(f"'StringMethods' underlying expression has no attribute '{name}'")
|
|
183
230
|
pl_attr = getattr(self.expr, name)
|
|
184
231
|
if callable(pl_attr):
|
|
232
|
+
|
|
185
233
|
def wrapper(*args, **kwargs):
|
|
186
234
|
result = pl_attr(*args, **kwargs)
|
|
187
235
|
# Assume generic getattr methods don't change aggregation status
|
|
188
236
|
return self._create_next_expr(name, result, *args, **kwargs)
|
|
237
|
+
|
|
189
238
|
return wrapper
|
|
190
239
|
else:
|
|
191
240
|
return pl_attr
|
|
192
241
|
|
|
193
242
|
|
|
194
243
|
class DateTimeMethods:
|
|
195
|
-
expr:
|
|
244
|
+
expr: Any | None
|
|
196
245
|
convertable_to_code: bool
|
|
197
|
-
_function_sources:
|
|
198
|
-
|
|
199
|
-
def __init__(
|
|
200
|
-
|
|
246
|
+
_function_sources: list[str] | None
|
|
247
|
+
|
|
248
|
+
def __init__(
|
|
249
|
+
self,
|
|
250
|
+
parent_expr: Expr,
|
|
251
|
+
parent_repr_str: str,
|
|
252
|
+
convertable_to_code: bool = True,
|
|
253
|
+
_function_sources: list[str] | None = None,
|
|
254
|
+
):
|
|
201
255
|
self.parent = parent_expr
|
|
202
256
|
self.expr = parent_expr.expr.dt if parent_expr.expr is not None else None
|
|
203
257
|
self.parent_repr_str = parent_repr_str
|
|
204
258
|
self.convertable_to_code = convertable_to_code
|
|
205
259
|
self._function_sources = _function_sources or []
|
|
206
260
|
|
|
207
|
-
def _create_next_expr(
|
|
261
|
+
def _create_next_expr(
|
|
262
|
+
self, method_name: str, result_expr: pl.Expr | None, convertable_to_code: bool = None, *args, **kwargs
|
|
263
|
+
) -> Expr:
|
|
208
264
|
args_repr = _repr_args(*args, **kwargs)
|
|
209
265
|
new_repr = f"{self.parent_repr_str}.dt.{method_name}({args_repr})"
|
|
210
266
|
if convertable_to_code is None:
|
|
211
267
|
convertable_to_code = self.convertable_to_code
|
|
212
|
-
new_expr = Expr(
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
268
|
+
new_expr = Expr(
|
|
269
|
+
result_expr,
|
|
270
|
+
self.parent.column_name,
|
|
271
|
+
repr_str=new_repr,
|
|
272
|
+
initial_column_name=self.parent._initial_column_name,
|
|
273
|
+
selector=None,
|
|
274
|
+
agg_func=self.parent.agg_func,
|
|
275
|
+
is_complex=True,
|
|
276
|
+
convertable_to_code=convertable_to_code,
|
|
277
|
+
_function_sources=self._function_sources,
|
|
278
|
+
)
|
|
219
279
|
return new_expr
|
|
220
280
|
|
|
221
281
|
def year(self):
|
|
@@ -252,39 +312,42 @@ class DateTimeMethods:
|
|
|
252
312
|
raise AttributeError(f"'DateTimeMethods' underlying expression has no attribute '{name}'")
|
|
253
313
|
pl_attr = getattr(self.expr, name)
|
|
254
314
|
if callable(pl_attr):
|
|
315
|
+
|
|
255
316
|
def wrapper(*args, **kwargs):
|
|
256
317
|
result = pl_attr(*args, **kwargs)
|
|
257
318
|
# Assume generic getattr methods don't change aggregation status
|
|
258
319
|
return self._create_next_expr(name, result, *args, **kwargs)
|
|
320
|
+
|
|
259
321
|
return wrapper
|
|
260
322
|
else:
|
|
261
323
|
return pl_attr
|
|
262
324
|
|
|
263
325
|
|
|
264
326
|
class Expr:
|
|
265
|
-
_initial_column_name:
|
|
266
|
-
selector:
|
|
267
|
-
expr:
|
|
268
|
-
agg_func:
|
|
327
|
+
_initial_column_name: str | None
|
|
328
|
+
selector: Selector | None
|
|
329
|
+
expr: pl.Expr | None
|
|
330
|
+
agg_func: str | None
|
|
269
331
|
_repr_str: str
|
|
270
|
-
_name_namespace:
|
|
271
|
-
column_name:
|
|
332
|
+
_name_namespace: ExprNameNameSpace | None
|
|
333
|
+
column_name: str | None
|
|
272
334
|
is_complex: bool = False
|
|
273
335
|
convertable_to_code: bool
|
|
274
|
-
_function_sources:
|
|
275
|
-
|
|
276
|
-
def __init__(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
336
|
+
_function_sources: list[str] # Add this attribute
|
|
337
|
+
|
|
338
|
+
def __init__(
|
|
339
|
+
self,
|
|
340
|
+
expr: pl.Expr | None,
|
|
341
|
+
column_name: str | None = None,
|
|
342
|
+
repr_str: str | None = None,
|
|
343
|
+
initial_column_name: str | None = None,
|
|
344
|
+
selector: Selector | None = None,
|
|
345
|
+
agg_func: str | None = None,
|
|
346
|
+
ddof: int | None = None,
|
|
347
|
+
is_complex: bool = False,
|
|
348
|
+
convertable_to_code: bool = True,
|
|
349
|
+
_function_sources: list[str] | None = None,
|
|
350
|
+
):
|
|
288
351
|
self.expr = expr
|
|
289
352
|
self.column_name = column_name
|
|
290
353
|
self.agg_func = agg_func
|
|
@@ -301,7 +364,7 @@ class Expr:
|
|
|
301
364
|
func_name = self.agg_func
|
|
302
365
|
kwargs_dict = {}
|
|
303
366
|
if func_name in ("std", "var") and ddof is not None:
|
|
304
|
-
kwargs_dict[
|
|
367
|
+
kwargs_dict["ddof"] = ddof
|
|
305
368
|
kwargs_repr = _repr_args(**kwargs_dict)
|
|
306
369
|
self._repr_str = f"{selector_repr}.{func_name}({kwargs_repr})"
|
|
307
370
|
self.expr = None
|
|
@@ -322,10 +385,10 @@ class Expr:
|
|
|
322
385
|
self.column_name = self.expr._name
|
|
323
386
|
except AttributeError:
|
|
324
387
|
pass
|
|
325
|
-
self._list_namespace:
|
|
326
|
-
self._str_namespace:
|
|
327
|
-
self._dt_namespace:
|
|
328
|
-
self._name_namespace:
|
|
388
|
+
self._list_namespace: ExprListNameSpace | None = None
|
|
389
|
+
self._str_namespace: StringMethods | None = None
|
|
390
|
+
self._dt_namespace: DateTimeMethods | None = None
|
|
391
|
+
self._name_namespace: ExprNameNameSpace | None = None
|
|
329
392
|
|
|
330
393
|
def __repr__(self) -> str:
|
|
331
394
|
return self._repr_str
|
|
@@ -354,14 +417,27 @@ class Expr:
|
|
|
354
417
|
# Check if this expression has any arithmetic/logical operators
|
|
355
418
|
if hasattr(self, "_repr_str"):
|
|
356
419
|
# Check for when/then/otherwise expressions
|
|
357
|
-
if any(
|
|
358
|
-
marker in self._repr_str
|
|
359
|
-
for marker in ["when(", ".then(", ".otherwise("]
|
|
360
|
-
):
|
|
420
|
+
if any(marker in self._repr_str for marker in ["when(", ".then(", ".otherwise("]):
|
|
361
421
|
return False
|
|
362
422
|
|
|
363
423
|
# Look for arithmetic operators in the expression string
|
|
364
|
-
for op in [
|
|
424
|
+
for op in [
|
|
425
|
+
"+",
|
|
426
|
+
"-",
|
|
427
|
+
"*",
|
|
428
|
+
"/",
|
|
429
|
+
"//",
|
|
430
|
+
"%",
|
|
431
|
+
"**",
|
|
432
|
+
"&",
|
|
433
|
+
"|",
|
|
434
|
+
"==",
|
|
435
|
+
"!=",
|
|
436
|
+
"<",
|
|
437
|
+
">",
|
|
438
|
+
"<=",
|
|
439
|
+
">=",
|
|
440
|
+
]:
|
|
365
441
|
if op in self._repr_str:
|
|
366
442
|
# If the operator is in a .alias() part, it's still simple
|
|
367
443
|
if f".alias('{op}" in self._repr_str:
|
|
@@ -385,22 +461,35 @@ class Expr:
|
|
|
385
461
|
# If we reach here, it's a simple expression (just column reference and maybe aggregation)
|
|
386
462
|
return True
|
|
387
463
|
|
|
388
|
-
def arg_unique(self) ->
|
|
464
|
+
def arg_unique(self) -> Expr:
|
|
389
465
|
result_expr = self.expr.arg_unique() if self.expr is not None else None
|
|
390
466
|
return self._create_next_expr(method_name="arg_unique", result_expr=result_expr, is_complex=True)
|
|
391
467
|
|
|
392
|
-
def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) ->
|
|
393
|
-
result_expr =
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
468
|
+
def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
|
|
469
|
+
result_expr = (
|
|
470
|
+
self.expr.arg_sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
|
|
471
|
+
)
|
|
472
|
+
return self._create_next_expr(
|
|
473
|
+
descending=descending,
|
|
474
|
+
nulls_last=nulls_last,
|
|
475
|
+
method_name="arg_sort",
|
|
476
|
+
result_expr=result_expr,
|
|
477
|
+
is_complex=True,
|
|
478
|
+
)
|
|
397
479
|
|
|
398
|
-
def _create_next_expr(
|
|
399
|
-
|
|
400
|
-
|
|
480
|
+
def _create_next_expr(
|
|
481
|
+
self,
|
|
482
|
+
*args,
|
|
483
|
+
method_name: str,
|
|
484
|
+
result_expr: pl.Expr | None,
|
|
485
|
+
convertable_to_code: bool = None,
|
|
486
|
+
is_complex: bool,
|
|
487
|
+
_function_sources: list[str] | None = None,
|
|
488
|
+
**kwargs,
|
|
489
|
+
) -> Expr:
|
|
401
490
|
"""Creates a new Expr instance, appending method call to repr string."""
|
|
402
491
|
# Filter out _function_sources from kwargs to avoid passing it to _repr_args
|
|
403
|
-
filtered_kwargs = {k: v for k, v in kwargs.items() if k !=
|
|
492
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k != "_function_sources"}
|
|
404
493
|
args_repr = _repr_args(*args, **filtered_kwargs)
|
|
405
494
|
new_repr = f"{self._repr_str}.{method_name}({args_repr})"
|
|
406
495
|
|
|
@@ -422,11 +511,10 @@ class Expr:
|
|
|
422
511
|
agg_func=self.agg_func,
|
|
423
512
|
is_complex=is_complex,
|
|
424
513
|
convertable_to_code=convertable_to_code,
|
|
425
|
-
_function_sources=combined_function_sources # Pass combined function sources
|
|
514
|
+
_function_sources=combined_function_sources, # Pass combined function sources
|
|
426
515
|
)
|
|
427
516
|
return new_expr_instance
|
|
428
517
|
|
|
429
|
-
|
|
430
518
|
@property
|
|
431
519
|
def name(self) -> ExprNameNameSpace:
|
|
432
520
|
"""Access the name namespace for expression name operations."""
|
|
@@ -434,9 +522,7 @@ class Expr:
|
|
|
434
522
|
self._name_namespace = ExprNameNameSpace(self, self._repr_str)
|
|
435
523
|
return self._name_namespace
|
|
436
524
|
|
|
437
|
-
def _create_binary_op_expr(
|
|
438
|
-
self, op_symbol: str, other: Any, result_expr: Optional[pl.Expr]
|
|
439
|
-
) -> "Expr":
|
|
525
|
+
def _create_binary_op_expr(self, op_symbol: str, other: Any, result_expr: pl.Expr | None) -> Expr:
|
|
440
526
|
"""Creates a new Expr for binary operations."""
|
|
441
527
|
if self.expr is None:
|
|
442
528
|
raise ValueError(
|
|
@@ -445,9 +531,7 @@ class Expr:
|
|
|
445
531
|
|
|
446
532
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
447
533
|
|
|
448
|
-
if other_expr is None and not isinstance(
|
|
449
|
-
other, (int, float, str, bool, type(None))
|
|
450
|
-
):
|
|
534
|
+
if other_expr is None and not isinstance(other, (int, float, str, bool, type(None))):
|
|
451
535
|
raise ValueError(
|
|
452
536
|
f"Cannot perform binary operation '{op_symbol}' with operand without underlying polars expression or literal value: {other_repr}"
|
|
453
537
|
)
|
|
@@ -463,7 +547,7 @@ class Expr:
|
|
|
463
547
|
initial_column_name=self._initial_column_name,
|
|
464
548
|
selector=None,
|
|
465
549
|
agg_func=None,
|
|
466
|
-
is_complex=True
|
|
550
|
+
is_complex=True,
|
|
467
551
|
)
|
|
468
552
|
|
|
469
553
|
@property
|
|
@@ -500,7 +584,9 @@ class Expr:
|
|
|
500
584
|
A new expression with the unique counts
|
|
501
585
|
"""
|
|
502
586
|
result_expr = self.expr.unique_counts() if self.expr is not None else None
|
|
503
|
-
result = self._create_next_expr(
|
|
587
|
+
result = self._create_next_expr(
|
|
588
|
+
method_name="unique_counts", result_expr=result_expr, is_complex=self.is_complex
|
|
589
|
+
)
|
|
504
590
|
result.agg_func = "unique_counts"
|
|
505
591
|
return result
|
|
506
592
|
|
|
@@ -570,7 +656,7 @@ class Expr:
|
|
|
570
656
|
result.agg_func = "std"
|
|
571
657
|
return result
|
|
572
658
|
|
|
573
|
-
def cum_count(self, reverse: bool = False) ->
|
|
659
|
+
def cum_count(self, reverse: bool = False) -> Expr:
|
|
574
660
|
"""
|
|
575
661
|
Return the cumulative count of the non-null values in the column.
|
|
576
662
|
|
|
@@ -584,10 +670,10 @@ class Expr:
|
|
|
584
670
|
Expr
|
|
585
671
|
A new expression with the cumulative count
|
|
586
672
|
"""
|
|
587
|
-
result_expr = (
|
|
588
|
-
|
|
673
|
+
result_expr = self.expr.cum_count(reverse=reverse) if self.expr is not None else None
|
|
674
|
+
result = self._create_next_expr(
|
|
675
|
+
method_name="cum_count", result_expr=result_expr, reverse=reverse, is_complex=True
|
|
589
676
|
)
|
|
590
|
-
result = self._create_next_expr(method_name="cum_count", result_expr=result_expr, reverse=reverse, is_complex=True)
|
|
591
677
|
result.agg_func = None
|
|
592
678
|
return result
|
|
593
679
|
|
|
@@ -697,45 +783,45 @@ class Expr:
|
|
|
697
783
|
res_expr = self.expr < other_expr if self.expr is not None and other_expr is not None else None
|
|
698
784
|
return self._create_binary_op_expr("<", other, res_expr)
|
|
699
785
|
|
|
700
|
-
def __ge__(self, other) ->
|
|
786
|
+
def __ge__(self, other) -> Expr:
|
|
701
787
|
other_expr, _ = _get_expr_and_repr(other)
|
|
702
788
|
res_expr = self.expr >= other_expr if self.expr is not None and other_expr is not None else None
|
|
703
789
|
return self._create_binary_op_expr(">=", other, res_expr)
|
|
704
790
|
|
|
705
|
-
def __le__(self, other) ->
|
|
791
|
+
def __le__(self, other) -> Expr:
|
|
706
792
|
other_expr, _ = _get_expr_and_repr(other)
|
|
707
793
|
res_expr = self.expr <= other_expr if self.expr is not None and other_expr is not None else None
|
|
708
794
|
return self._create_binary_op_expr("<=", other, res_expr)
|
|
709
795
|
|
|
710
796
|
# --- Logical operations ---
|
|
711
|
-
def __and__(self, other) ->
|
|
797
|
+
def __and__(self, other) -> Expr:
|
|
712
798
|
from flowfile_frame.selectors import Selector
|
|
799
|
+
|
|
713
800
|
if isinstance(other, Selector):
|
|
714
801
|
raise TypeError("Unsupported operation: Expr & Selector")
|
|
715
802
|
other_expr, _ = _get_expr_and_repr(other)
|
|
716
803
|
res_expr = self.expr & other_expr if self.expr is not None and other_expr is not None else None
|
|
717
804
|
return self._create_binary_op_expr("&", other, res_expr)
|
|
718
805
|
|
|
719
|
-
def __or__(self, other) ->
|
|
806
|
+
def __or__(self, other) -> Expr:
|
|
720
807
|
from flowfile_frame.selectors import Selector
|
|
808
|
+
|
|
721
809
|
if isinstance(other, Selector):
|
|
722
810
|
raise TypeError("Unsupported operation: Expr | Selector")
|
|
723
811
|
other_expr, _ = _get_expr_and_repr(other)
|
|
724
812
|
res_expr = self.expr | other_expr if self.expr is not None and other_expr is not None else None
|
|
725
813
|
return self._create_binary_op_expr("|", other, res_expr)
|
|
726
814
|
|
|
727
|
-
def __invert__(self) ->
|
|
815
|
+
def __invert__(self) -> Expr:
|
|
728
816
|
new_repr = f"~({self._repr_str})"
|
|
729
817
|
res_expr = ~self.expr if self.expr is not None else None
|
|
730
818
|
# Invert clears agg_func
|
|
731
|
-
return Expr(res_expr, None, repr_str=new_repr,
|
|
732
|
-
initial_column_name=self._initial_column_name, agg_func=None)
|
|
819
|
+
return Expr(res_expr, None, repr_str=new_repr, initial_column_name=self._initial_column_name, agg_func=None)
|
|
733
820
|
|
|
734
|
-
def __neg__(self) ->
|
|
821
|
+
def __neg__(self) -> Expr:
|
|
735
822
|
new_repr = f"-{self._repr_str}"
|
|
736
823
|
res_expr = -self.expr if self.expr is not None else None
|
|
737
|
-
return Expr(res_expr, None, repr_str=new_repr,
|
|
738
|
-
initial_column_name=self._initial_column_name, agg_func=None)
|
|
824
|
+
return Expr(res_expr, None, repr_str=new_repr, initial_column_name=self._initial_column_name, agg_func=None)
|
|
739
825
|
|
|
740
826
|
def is_null(self):
|
|
741
827
|
result_expr = self.expr.is_null() if self.expr is not None else None
|
|
@@ -744,7 +830,7 @@ class Expr:
|
|
|
744
830
|
result.agg_func = None
|
|
745
831
|
return result
|
|
746
832
|
|
|
747
|
-
def filter(self, *predicates, **constraints) ->
|
|
833
|
+
def filter(self, *predicates, **constraints) -> Expr:
|
|
748
834
|
"""
|
|
749
835
|
Filter expression
|
|
750
836
|
"""
|
|
@@ -784,7 +870,7 @@ class Expr:
|
|
|
784
870
|
if self.expr is not None:
|
|
785
871
|
try:
|
|
786
872
|
res_expr = self.expr.filter(*processed_predicates)
|
|
787
|
-
except Exception
|
|
873
|
+
except Exception:
|
|
788
874
|
logger.warning("Could not create polars expression for filter(): {e}")
|
|
789
875
|
pass # res_expr will remain None
|
|
790
876
|
|
|
@@ -795,7 +881,7 @@ class Expr:
|
|
|
795
881
|
initial_column_name=self._initial_column_name,
|
|
796
882
|
selector=None, # Filter typically removes selector link
|
|
797
883
|
agg_func=self.agg_func, # Preserve aggregation status
|
|
798
|
-
convertable_to_code=self.convertable_to_code
|
|
884
|
+
convertable_to_code=self.convertable_to_code,
|
|
799
885
|
)
|
|
800
886
|
|
|
801
887
|
def is_not_null(self):
|
|
@@ -816,13 +902,17 @@ class Expr:
|
|
|
816
902
|
new_pl_expr = self.expr.alias(name) if self.expr is not None else None
|
|
817
903
|
new_repr = f"{self._repr_str}.alias({repr(name)})"
|
|
818
904
|
# Alias preserves aggregation status
|
|
819
|
-
new_instance = Expr(
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
905
|
+
new_instance = Expr(
|
|
906
|
+
new_pl_expr,
|
|
907
|
+
name,
|
|
908
|
+
repr_str=new_repr,
|
|
909
|
+
initial_column_name=self._initial_column_name,
|
|
910
|
+
selector=None,
|
|
911
|
+
agg_func=self.agg_func,
|
|
912
|
+
is_complex=self.is_complex,
|
|
913
|
+
convertable_to_code=self.convertable_to_code,
|
|
914
|
+
_function_sources=self._function_sources,
|
|
915
|
+
)
|
|
826
916
|
return new_instance
|
|
827
917
|
|
|
828
918
|
def fill_null(self, value):
|
|
@@ -834,7 +924,7 @@ class Expr:
|
|
|
834
924
|
|
|
835
925
|
def fill_nan(self, value):
|
|
836
926
|
res_expr = None
|
|
837
|
-
if self.expr is not None and hasattr(self.expr,
|
|
927
|
+
if self.expr is not None and hasattr(self.expr, "fill_nan"):
|
|
838
928
|
res_expr = self.expr.fill_nan(value)
|
|
839
929
|
result = self._create_next_expr(value, method_name="fill_nan", result_expr=res_expr, is_complex=True)
|
|
840
930
|
result.agg_func = None
|
|
@@ -857,14 +947,15 @@ class Expr:
|
|
|
857
947
|
else:
|
|
858
948
|
return repr(expr)
|
|
859
949
|
|
|
860
|
-
def over(
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
950
|
+
def over(
|
|
951
|
+
self,
|
|
952
|
+
partition_by: ExprStrOrList, # Use the type alias defined earlier
|
|
953
|
+
*more_exprs: ExprOrStr,
|
|
954
|
+
order_by: ExprStrOrList | None = None,
|
|
955
|
+
descending: bool = False,
|
|
956
|
+
nulls_last: bool = False,
|
|
957
|
+
mapping_strategy: Literal["group_to_rows", "join", "explode"] = "group_to_rows",
|
|
958
|
+
) -> Expr:
|
|
868
959
|
"""
|
|
869
960
|
Compute expressions over the given groups.
|
|
870
961
|
String representation will show 'descending' and 'nulls_last' if they are True,
|
|
@@ -895,9 +986,7 @@ class Expr:
|
|
|
895
986
|
if isinstance(order_by, str):
|
|
896
987
|
processed_order_by = col(order_by)
|
|
897
988
|
elif isinstance(order_by, list):
|
|
898
|
-
processed_order_by = [
|
|
899
|
-
col(o) if isinstance(o, str) else o for o in order_by
|
|
900
|
-
]
|
|
989
|
+
processed_order_by = [col(o) if isinstance(o, str) else o for o in order_by]
|
|
901
990
|
else:
|
|
902
991
|
processed_order_by = order_by
|
|
903
992
|
|
|
@@ -939,10 +1028,7 @@ class Expr:
|
|
|
939
1028
|
else processed_partition_cols[0]
|
|
940
1029
|
)
|
|
941
1030
|
else:
|
|
942
|
-
partition_arg = [
|
|
943
|
-
p.expr if hasattr(p, "expr") else p
|
|
944
|
-
for p in processed_partition_cols
|
|
945
|
-
]
|
|
1031
|
+
partition_arg = [p.expr if hasattr(p, "expr") else p for p in processed_partition_cols]
|
|
946
1032
|
|
|
947
1033
|
# Build kwargs for the actual polars over() call
|
|
948
1034
|
polars_call_kwargs = {"mapping_strategy": mapping_strategy}
|
|
@@ -950,15 +1036,10 @@ class Expr:
|
|
|
950
1036
|
if processed_order_by is not None:
|
|
951
1037
|
# Convert order_by to Polars expressions
|
|
952
1038
|
if isinstance(processed_order_by, list):
|
|
953
|
-
polars_order_by_arg = [
|
|
954
|
-
o.expr if hasattr(o, "expr") else o
|
|
955
|
-
for o in processed_order_by
|
|
956
|
-
]
|
|
1039
|
+
polars_order_by_arg = [o.expr if hasattr(o, "expr") else o for o in processed_order_by]
|
|
957
1040
|
else:
|
|
958
1041
|
polars_order_by_arg = (
|
|
959
|
-
processed_order_by.expr
|
|
960
|
-
if hasattr(processed_order_by, "expr")
|
|
961
|
-
else processed_order_by
|
|
1042
|
+
processed_order_by.expr if hasattr(processed_order_by, "expr") else processed_order_by
|
|
962
1043
|
)
|
|
963
1044
|
polars_call_kwargs["order_by"] = polars_order_by_arg
|
|
964
1045
|
# These are tied to order_by for the actual Polars call
|
|
@@ -967,8 +1048,7 @@ class Expr:
|
|
|
967
1048
|
|
|
968
1049
|
res_expr = self.expr.over(partition_by=partition_arg, **polars_call_kwargs)
|
|
969
1050
|
|
|
970
|
-
except Exception
|
|
971
|
-
|
|
1051
|
+
except Exception:
|
|
972
1052
|
logger.warning("Could not create polars expression for over(): {e}")
|
|
973
1053
|
pass
|
|
974
1054
|
|
|
@@ -979,7 +1059,7 @@ class Expr:
|
|
|
979
1059
|
initial_column_name=self._initial_column_name,
|
|
980
1060
|
selector=None,
|
|
981
1061
|
agg_func=None,
|
|
982
|
-
_function_sources
|
|
1062
|
+
_function_sources=self._function_sources,
|
|
983
1063
|
)
|
|
984
1064
|
|
|
985
1065
|
def get_polars_code(self) -> str:
|
|
@@ -1008,13 +1088,17 @@ class Expr:
|
|
|
1008
1088
|
|
|
1009
1089
|
def sort(self, *, descending=False, nulls_last=False):
|
|
1010
1090
|
res_expr = self.expr.sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
|
|
1011
|
-
return Expr(
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1091
|
+
return Expr(
|
|
1092
|
+
res_expr,
|
|
1093
|
+
self.column_name,
|
|
1094
|
+
repr_str=f"{self._repr_str}.sort(descending={descending}, nulls_last={nulls_last})",
|
|
1095
|
+
initial_column_name=self._initial_column_name,
|
|
1096
|
+
agg_func=None,
|
|
1097
|
+
_function_sources=self._function_sources,
|
|
1098
|
+
)
|
|
1015
1099
|
|
|
1016
|
-
def cast(self, dtype:
|
|
1017
|
-
"""
|
|
1100
|
+
def cast(self, dtype: pl.DataType | str | pl.datatypes.classes.DataTypeClass, *, strict=True):
|
|
1101
|
+
"""Casts the Expr to a specified data type."""
|
|
1018
1102
|
pl_dtype = dtype
|
|
1019
1103
|
dtype_repr = repr(dtype)
|
|
1020
1104
|
|
|
@@ -1024,45 +1108,51 @@ class Expr:
|
|
|
1024
1108
|
dtype_repr = f"pl.{dtype}"
|
|
1025
1109
|
except AttributeError:
|
|
1026
1110
|
pass
|
|
1027
|
-
elif hasattr(dtype,
|
|
1111
|
+
elif hasattr(dtype, "__name__"):
|
|
1028
1112
|
dtype_repr = f"pl.{dtype.__name__}"
|
|
1029
1113
|
elif isinstance(dtype, pl.DataType):
|
|
1030
1114
|
dtype_repr = f"pl.{dtype!s}"
|
|
1031
1115
|
|
|
1032
1116
|
res_expr = self.expr.cast(pl_dtype, strict=strict) if self.expr is not None else None
|
|
1033
1117
|
# Cast preserves aggregation status (e.g., cast(col('a').sum()))
|
|
1034
|
-
new_expr = Expr(
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1118
|
+
new_expr = Expr(
|
|
1119
|
+
res_expr,
|
|
1120
|
+
self.column_name,
|
|
1121
|
+
repr_str=f"{self._repr_str}.cast({dtype_repr}, strict={strict})",
|
|
1122
|
+
initial_column_name=self._initial_column_name,
|
|
1123
|
+
selector=None,
|
|
1124
|
+
agg_func=self.agg_func,
|
|
1125
|
+
is_complex=True,
|
|
1126
|
+
convertable_to_code=self.convertable_to_code,
|
|
1127
|
+
_function_sources=self._function_sources,
|
|
1128
|
+
)
|
|
1042
1129
|
return new_expr
|
|
1043
1130
|
|
|
1044
1131
|
|
|
1045
1132
|
class Column(Expr):
|
|
1046
1133
|
"""Special Expr representing a single column, preserving column identity through alias/cast."""
|
|
1134
|
+
|
|
1047
1135
|
_select_input: transform_schema.SelectInput
|
|
1048
1136
|
|
|
1049
|
-
def __init__(self, name: str, select_input:
|
|
1050
|
-
super().__init__(
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1137
|
+
def __init__(self, name: str, select_input: transform_schema.SelectInput | None = None):
|
|
1138
|
+
super().__init__(
|
|
1139
|
+
expr=pl.col(name),
|
|
1140
|
+
column_name=name,
|
|
1141
|
+
repr_str=f"pl.col('{name}')",
|
|
1142
|
+
initial_column_name=select_input.old_name if select_input else name,
|
|
1143
|
+
selector=None,
|
|
1144
|
+
agg_func=None,
|
|
1145
|
+
)
|
|
1056
1146
|
self._select_input = select_input or transform_schema.SelectInput(old_name=name)
|
|
1057
1147
|
|
|
1058
|
-
def alias(self, new_name: str) ->
|
|
1148
|
+
def alias(self, new_name: str) -> Column:
|
|
1059
1149
|
"""Rename a column, returning a new Column instance."""
|
|
1060
1150
|
new_select = transform_schema.SelectInput(
|
|
1061
1151
|
old_name=self._select_input.old_name,
|
|
1062
1152
|
new_name=new_name,
|
|
1063
1153
|
data_type=self._select_input.data_type,
|
|
1064
1154
|
data_type_change=self._select_input.data_type_change,
|
|
1065
|
-
is_altered=True
|
|
1155
|
+
is_altered=True,
|
|
1066
1156
|
)
|
|
1067
1157
|
if self.expr is None:
|
|
1068
1158
|
raise ValueError("Cannot alias Column without underlying polars expression.")
|
|
@@ -1078,7 +1168,7 @@ class Column(Expr):
|
|
|
1078
1168
|
new_column.is_complex = self.is_complex
|
|
1079
1169
|
return new_column
|
|
1080
1170
|
|
|
1081
|
-
def cast(self, dtype:
|
|
1171
|
+
def cast(self, dtype: pl.DataType | str | pl.datatypes.classes.DataTypeClass, *, strict=True) -> Column:
|
|
1082
1172
|
"""Change the data type of a column, returning a new Column instance."""
|
|
1083
1173
|
pl_dtype = dtype
|
|
1084
1174
|
dtype_repr = repr(dtype)
|
|
@@ -1089,7 +1179,7 @@ class Column(Expr):
|
|
|
1089
1179
|
dtype_repr = f"pl.{dtype}"
|
|
1090
1180
|
except AttributeError:
|
|
1091
1181
|
pass
|
|
1092
|
-
elif hasattr(dtype,
|
|
1182
|
+
elif hasattr(dtype, "__name__"):
|
|
1093
1183
|
dtype_repr = f"pl.{dtype.__name__}"
|
|
1094
1184
|
elif isinstance(dtype, pl.DataType):
|
|
1095
1185
|
dtype_repr = f"pl.{dtype!s}"
|
|
@@ -1107,7 +1197,7 @@ class Column(Expr):
|
|
|
1107
1197
|
new_name=self._select_input.new_name,
|
|
1108
1198
|
data_type=str(pl_dtype),
|
|
1109
1199
|
data_type_change=True,
|
|
1110
|
-
is_altered=True
|
|
1200
|
+
is_altered=True,
|
|
1111
1201
|
)
|
|
1112
1202
|
if self.expr is None:
|
|
1113
1203
|
raise ValueError("Cannot cast Column without underlying polars expression.")
|
|
@@ -1140,7 +1230,7 @@ class Column(Expr):
|
|
|
1140
1230
|
new_name=final_new_name,
|
|
1141
1231
|
data_type=final_data_type,
|
|
1142
1232
|
data_type_change=final_data_type_change,
|
|
1143
|
-
is_altered=final_is_altered
|
|
1233
|
+
is_altered=final_is_altered,
|
|
1144
1234
|
)
|
|
1145
1235
|
|
|
1146
1236
|
@property
|
|
@@ -1173,7 +1263,7 @@ class When(Expr):
|
|
|
1173
1263
|
@staticmethod
|
|
1174
1264
|
def _get_expr_and_repr(value):
|
|
1175
1265
|
"""Extract expression and representation from a value."""
|
|
1176
|
-
if hasattr(value,
|
|
1266
|
+
if hasattr(value, "expr") and hasattr(value, "_repr_str"):
|
|
1177
1267
|
return value.expr, value._repr_str
|
|
1178
1268
|
elif isinstance(value, str) and not value.startswith("pl."):
|
|
1179
1269
|
col_obj = col(value)
|
|
@@ -1265,12 +1355,14 @@ def agg_function(func=None, *, customize_repr=True):
|
|
|
1265
1355
|
function
|
|
1266
1356
|
A wrapped function that returns a properly configured Expr
|
|
1267
1357
|
"""
|
|
1358
|
+
|
|
1268
1359
|
def decorator(func):
|
|
1269
1360
|
agg_func_name = func.__name__ # Use the function name as the agg_func
|
|
1270
1361
|
|
|
1271
1362
|
@wraps(func)
|
|
1272
1363
|
def wrapper(*args, **kwargs):
|
|
1273
1364
|
from flowfile_frame.expr import Expr
|
|
1365
|
+
|
|
1274
1366
|
# Get the Polars expression from the original function
|
|
1275
1367
|
pl_expr = func(*args, **kwargs)
|
|
1276
1368
|
|
|
@@ -1281,7 +1373,7 @@ def agg_function(func=None, *, customize_repr=True):
|
|
|
1281
1373
|
for arg in args:
|
|
1282
1374
|
if isinstance(arg, str):
|
|
1283
1375
|
args_reprs.append(f"'{arg}'")
|
|
1284
|
-
elif hasattr(arg,
|
|
1376
|
+
elif hasattr(arg, "_repr_str"):
|
|
1285
1377
|
args_reprs.append(arg._repr_str)
|
|
1286
1378
|
else:
|
|
1287
1379
|
args_reprs.append(repr(arg))
|
|
@@ -1289,7 +1381,7 @@ def agg_function(func=None, *, customize_repr=True):
|
|
|
1289
1381
|
# Process keyword arguments
|
|
1290
1382
|
kwargs_reprs = []
|
|
1291
1383
|
for k, v in kwargs.items():
|
|
1292
|
-
if isinstance(v, str) and not (k ==
|
|
1384
|
+
if isinstance(v, str) and not (k == "method" or k == "mapping_strategy"):
|
|
1293
1385
|
kwargs_reprs.append(f"{k}='{v}'")
|
|
1294
1386
|
elif isinstance(v, pl.DataType):
|
|
1295
1387
|
kwargs_reprs.append(f"{k}={v!s}")
|
|
@@ -1312,7 +1404,7 @@ def agg_function(func=None, *, customize_repr=True):
|
|
|
1312
1404
|
first_arg = args[0]
|
|
1313
1405
|
if isinstance(first_arg, str):
|
|
1314
1406
|
initial_column_name = first_arg
|
|
1315
|
-
elif hasattr(first_arg,
|
|
1407
|
+
elif hasattr(first_arg, "column_name"):
|
|
1316
1408
|
initial_column_name = first_arg.column_name
|
|
1317
1409
|
|
|
1318
1410
|
# Determine if this is a complex expression
|
|
@@ -1336,6 +1428,7 @@ def agg_function(func=None, *, customize_repr=True):
|
|
|
1336
1428
|
return decorator
|
|
1337
1429
|
return decorator(func)
|
|
1338
1430
|
|
|
1431
|
+
|
|
1339
1432
|
@agg_function
|
|
1340
1433
|
def max(*names) -> Expr:
|
|
1341
1434
|
return pl.max(*names)
|
|
@@ -1384,8 +1477,9 @@ def sum(*names) -> Expr:
|
|
|
1384
1477
|
|
|
1385
1478
|
|
|
1386
1479
|
@agg_function
|
|
1387
|
-
def corr(
|
|
1388
|
-
|
|
1480
|
+
def corr(
|
|
1481
|
+
a: str | Expr, b: str | Expr, *, method: str = "pearson", ddof: int = None, propagate_nans: bool = False
|
|
1482
|
+
) -> Expr:
|
|
1389
1483
|
"""
|
|
1390
1484
|
Compute the correlation between two columns.
|
|
1391
1485
|
"""
|
|
@@ -1396,7 +1490,7 @@ def corr(a: Union[str, Expr], b: Union[str, Expr], *,
|
|
|
1396
1490
|
|
|
1397
1491
|
|
|
1398
1492
|
@agg_function
|
|
1399
|
-
def cov(a:
|
|
1493
|
+
def cov(a: str | Expr, b: str | Expr, ddof: int = 1) -> Expr:
|
|
1400
1494
|
"""
|
|
1401
1495
|
Compute the covariance between two columns.
|
|
1402
1496
|
"""
|
|
@@ -1407,7 +1501,7 @@ def cov(a: Union[str, Expr], b: Union[str, Expr], ddof: int = 1) -> Expr:
|
|
|
1407
1501
|
|
|
1408
1502
|
|
|
1409
1503
|
def std(column, ddof) -> Expr:
|
|
1410
|
-
return Expr(column, ddof=ddof, agg_func=
|
|
1504
|
+
return Expr(column, ddof=ddof, agg_func="std")
|
|
1411
1505
|
|
|
1412
1506
|
|
|
1413
1507
|
def var(column, ddof) -> Expr:
|
|
@@ -1438,4 +1532,3 @@ def cum_count(expr, reverse: bool = False) -> Expr:
|
|
|
1438
1532
|
def when(condition):
|
|
1439
1533
|
"""Start a when-then-otherwise expression."""
|
|
1440
1534
|
return When(condition)
|
|
1441
|
-
|