Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
flowfile_frame/flow_frame.pyi
CHANGED
|
@@ -4,38 +4,57 @@ import inspect
|
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
6
|
import typing
|
|
7
|
-
from
|
|
8
|
-
from typing import List, Optional, ForwardRef, TypeVar, Any, Iterable, Sequence, Mapping, Collection, Callable, Literal, IO, Union
|
|
7
|
+
from collections.abc import Awaitable, Callable, Collection, Iterable, Mapping, Sequence
|
|
9
8
|
from datetime import timedelta
|
|
9
|
+
from io import IOBase
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from
|
|
11
|
+
from typing import (
|
|
12
|
+
IO,
|
|
13
|
+
Any,
|
|
14
|
+
ForwardRef,
|
|
15
|
+
Literal,
|
|
16
|
+
TypeVar,
|
|
17
|
+
)
|
|
12
18
|
|
|
13
19
|
# Third-party imports
|
|
14
20
|
import polars as pl
|
|
21
|
+
from polars import DataFrame, LazyFrame, QueryOptFlags
|
|
15
22
|
from polars._typing import *
|
|
16
23
|
from polars._typing import ParquetMetadata, PlanStage
|
|
17
24
|
from polars._utils.async_ import _GeventDataFrameResult
|
|
18
25
|
from polars.dependencies import polars_cloud as pc
|
|
19
26
|
from polars.io.cloud import CredentialProviderFunction
|
|
20
|
-
from polars.lazyframe.frame import LazyGroupBy
|
|
21
|
-
from polars import LazyFrame, DataFrame, QueryOptFlags
|
|
22
27
|
from polars.io.parquet import ParquetFieldOverwrites
|
|
28
|
+
from polars.lazyframe.frame import LazyGroupBy
|
|
23
29
|
from polars.lazyframe.opt_flags import DEFAULT_QUERY_OPT_FLAGS
|
|
24
|
-
from polars.type_aliases import (
|
|
30
|
+
from polars.type_aliases import (
|
|
31
|
+
AsofJoinStrategy,
|
|
32
|
+
ClosedInterval,
|
|
33
|
+
CompatLevel,
|
|
34
|
+
EngineType,
|
|
35
|
+
ExplainFormat,
|
|
36
|
+
IntoExpr,
|
|
37
|
+
IpcCompression,
|
|
38
|
+
Label,
|
|
39
|
+
Schema,
|
|
40
|
+
SerializationFormat,
|
|
41
|
+
StartBy,
|
|
42
|
+
SyncOnCloseMethod,
|
|
43
|
+
)
|
|
25
44
|
|
|
26
45
|
# Local application/library specific imports
|
|
27
46
|
import flowfile_frame
|
|
28
47
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
29
48
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
49
|
+
from flowfile_core.schemas import transform_schema
|
|
30
50
|
from flowfile_frame import group_frame
|
|
31
51
|
from flowfile_frame.expr import Expr
|
|
32
|
-
from flowfile_core.schemas import transform_schema
|
|
33
52
|
|
|
34
53
|
# Conditional imports
|
|
35
54
|
if sys.version_info >= (3, 10):
|
|
36
55
|
from typing import Concatenate
|
|
37
56
|
else:
|
|
38
|
-
from
|
|
57
|
+
from typing import Concatenate
|
|
39
58
|
|
|
40
59
|
T = TypeVar('T')
|
|
41
60
|
P = typing.ParamSpec('P')
|
|
@@ -57,7 +76,7 @@ class FlowFrame:
|
|
|
57
76
|
data: LazyFrame
|
|
58
77
|
flow_graph: FlowGraph
|
|
59
78
|
node_id: int
|
|
60
|
-
parent_node_id:
|
|
79
|
+
parent_node_id: int | None
|
|
61
80
|
|
|
62
81
|
# This special method determines how the object behaves in boolean contexts.
|
|
63
82
|
def __bool__(self) -> Any: ...
|
|
@@ -81,7 +100,7 @@ class FlowFrame:
|
|
|
81
100
|
def __ne__(self, other: object) -> typing.NoReturn: ...
|
|
82
101
|
|
|
83
102
|
# Unified constructor for FlowFrame.
|
|
84
|
-
def __new__(cls, data:
|
|
103
|
+
def __new__(cls, data: LazyFrame | collections.abc.Mapping[str, collections.abc.Sequence[object] | collections.abc.Mapping[str, collections.abc.Sequence[object]] | ForwardRef('Series')] | collections.abc.Sequence[typing.Any] | ForwardRef('np.ndarray[Any, Any]') | ForwardRef('pa.Table') | ForwardRef('pd.DataFrame') | ForwardRef('ArrowArrayExportable') | ForwardRef('ArrowStreamExportable') | ForwardRef('torch.Tensor') = None, schema: collections.abc.Mapping[str, ForwardRef('DataTypeClass') | ForwardRef('DataType') | type[int] | type[float] | type[bool] | type[str] | type[date] | type[time] | type[datetime] | type[timedelta] | type[list[typing.Any]] | type[tuple[typing.Any, ...]] | type[bytes] | type[object] | type[Decimal] | type[None] | NoneType] | collections.abc.Sequence[str | tuple[str, ForwardRef('DataTypeClass') | ForwardRef('DataType') | type[int] | type[float] | type[bool] | type[str] | type[date] | type[time] | type[datetime] | type[timedelta] | type[list[typing.Any]] | type[tuple[typing.Any, ...]] | type[bytes] | type[object] | type[Decimal] | type[None] | NoneType]] | NoneType = None, schema_overrides: collections.abc.Mapping[str, ForwardRef('DataTypeClass') | ForwardRef('DataType')] | None = None, strict: bool = True, orient: typing.Literal['col', 'row'] | None = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: flowfile_core.flowfile.flow_graph.FlowGraph | None = None, node_id: int | None = None, parent_node_id: int | None = None, **kwargs) -> Self: ...
|
|
85
104
|
|
|
86
105
|
def __repr__(self) -> Any: ...
|
|
87
106
|
|
|
@@ -91,60 +110,60 @@ class FlowFrame:
|
|
|
91
110
|
# Add a cross join node to the graph.
|
|
92
111
|
def _add_cross_join_node(self, new_node_id: int, join_input: transform_schema.CrossJoinInput, description: str, other: FlowFrame) -> None: ...
|
|
93
112
|
|
|
94
|
-
def _add_number_of_records(self, new_node_id: int, description: str = None) ->
|
|
113
|
+
def _add_number_of_records(self, new_node_id: int, description: str = None) -> FlowFrame: ...
|
|
95
114
|
|
|
96
|
-
def _add_polars_code(self, new_node_id: int, code: str, depending_on_ids:
|
|
115
|
+
def _add_polars_code(self, new_node_id: int, code: str, depending_on_ids: list[str] | None = None, convertable_to_code: bool = True, method_name: str = None, polars_expr: flowfile_frame.expr.Expr | list[flowfile_frame.expr.Expr] | NoneType = None, group_expr: flowfile_frame.expr.Expr | list[flowfile_frame.expr.Expr] | NoneType = None, kwargs_expr: dict | None = None, group_kwargs: dict | None = None, description: str = None) -> Any: ...
|
|
97
116
|
|
|
98
117
|
# Add a regular join node to the graph.
|
|
99
118
|
def _add_regular_join_node(self, new_node_id: int, join_input: transform_schema.JoinInput, description: str, other: FlowFrame) -> None: ...
|
|
100
119
|
|
|
101
120
|
# Build kwargs dictionary for Polars join code.
|
|
102
|
-
def _build_polars_join_kwargs(self, on:
|
|
121
|
+
def _build_polars_join_kwargs(self, on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, left_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, right_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, left_columns: list[str] | None, right_columns: list[str] | None, how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left']) -> dict: ...
|
|
103
122
|
|
|
104
123
|
def _comparison_error(self, operator: str) -> typing.NoReturn: ...
|
|
105
124
|
|
|
106
125
|
# Helper method to create a new FlowFrame that's a child of this one
|
|
107
|
-
def _create_child_frame(self, new_node_id) ->
|
|
126
|
+
def _create_child_frame(self, new_node_id) -> FlowFrame: ...
|
|
108
127
|
|
|
109
128
|
# Detect if the expression is a cum_count operation and use record_id if possible.
|
|
110
|
-
def _detect_cum_count_record_id(self, expr: Any, new_node_id: int, description:
|
|
129
|
+
def _detect_cum_count_record_id(self, expr: Any, new_node_id: int, description: str | None = None) -> FlowFrame: ...
|
|
111
130
|
|
|
112
131
|
# Ensure both FlowFrames are in the same graph, combining if necessary.
|
|
113
132
|
def _ensure_same_graph(self, other: FlowFrame) -> None: ...
|
|
114
133
|
|
|
115
134
|
# Execute join using native FlowFile join nodes.
|
|
116
|
-
def _execute_native_join(self, other: FlowFrame, new_node_id: int, join_mappings:
|
|
135
|
+
def _execute_native_join(self, other: FlowFrame, new_node_id: int, join_mappings: list | None, how: str, description: str) -> FlowFrame: ...
|
|
117
136
|
|
|
118
137
|
# Execute join using Polars code approach.
|
|
119
|
-
def _execute_polars_code_join(self, other: FlowFrame, new_node_id: int, on:
|
|
138
|
+
def _execute_polars_code_join(self, other: FlowFrame, new_node_id: int, on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, left_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, right_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, left_columns: list[str] | None, right_columns: list[str] | None, how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'], description: str) -> FlowFrame: ...
|
|
120
139
|
|
|
121
140
|
# Generates the `input_df.sort(...)` Polars code string using pure expression strings.
|
|
122
|
-
def _generate_sort_polars_code(self, pure_sort_expr_strs:
|
|
141
|
+
def _generate_sort_polars_code(self, pure_sort_expr_strs: list[str], descending_values: list[bool], nulls_last_values: list[bool], multithreaded: bool, maintain_order: bool) -> str: ...
|
|
123
142
|
|
|
124
143
|
# Parse and validate join column specifications.
|
|
125
|
-
def _parse_join_columns(self, on:
|
|
144
|
+
def _parse_join_columns(self, on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, left_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, right_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column, how: str) -> tuple[list[str] | None, list[str] | None]: ...
|
|
126
145
|
|
|
127
146
|
# Determine if we should use Polars code instead of native join.
|
|
128
147
|
def _should_use_polars_code_for_join(self, maintain_order, coalesce, nulls_equal, validate, suffix) -> bool: ...
|
|
129
148
|
|
|
130
|
-
def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) ->
|
|
149
|
+
def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> FlowFrame: ...
|
|
131
150
|
|
|
132
151
|
# Approximate count of unique values.
|
|
133
|
-
def approx_n_unique(self, description:
|
|
152
|
+
def approx_n_unique(self, description: str | None = None) -> FlowFrame: ...
|
|
134
153
|
|
|
135
154
|
# Return the `k` smallest rows.
|
|
136
|
-
def bottom_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, description:
|
|
155
|
+
def bottom_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, description: str | None = None) -> FlowFrame: ...
|
|
137
156
|
|
|
138
|
-
def cache(self, description:
|
|
157
|
+
def cache(self, description: str | None = None) -> FlowFrame: ...
|
|
139
158
|
|
|
140
159
|
# Cast LazyFrame column(s) to the specified dtype(s).
|
|
141
|
-
def cast(self, dtypes: Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType] | PolarsDataType | pl.DataTypeExpr, strict: bool = True, description:
|
|
160
|
+
def cast(self, dtypes: Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType] | PolarsDataType | pl.DataTypeExpr, strict: bool = True, description: str | None = None) -> FlowFrame: ...
|
|
142
161
|
|
|
143
162
|
# Create an empty copy of the current LazyFrame, with zero to 'n' rows.
|
|
144
|
-
def clear(self, n: int = 0, description:
|
|
163
|
+
def clear(self, n: int = 0, description: str | None = None) -> FlowFrame: ...
|
|
145
164
|
|
|
146
165
|
# Create a copy of this LazyFrame.
|
|
147
|
-
def clone(self, description:
|
|
166
|
+
def clone(self, description: str | None = None) -> FlowFrame: ...
|
|
148
167
|
|
|
149
168
|
# Collect lazy data into memory.
|
|
150
169
|
def collect(self, *args, **kwargs) -> DataFrame: ...
|
|
@@ -157,260 +176,260 @@ class FlowFrame:
|
|
|
157
176
|
|
|
158
177
|
# Get the column names.
|
|
159
178
|
@property
|
|
160
|
-
def columns(self) ->
|
|
179
|
+
def columns(self) -> list[str]: ...
|
|
161
180
|
|
|
162
181
|
# Combine multiple FlowFrames into a single FlowFrame.
|
|
163
|
-
def concat(self, other:
|
|
182
|
+
def concat(self, other: ForwardRef('FlowFrame') | list[ForwardRef('FlowFrame')], how: str = 'vertical', rechunk: bool = False, parallel: bool = True, description: str = None) -> FlowFrame: ...
|
|
164
183
|
|
|
165
184
|
# Return the number of non-null elements for each column.
|
|
166
|
-
def count(self, description:
|
|
185
|
+
def count(self, description: str | None = None) -> FlowFrame: ...
|
|
167
186
|
|
|
168
187
|
# Simple naive implementation of creating the frame from any type. It converts the data to a polars frame,
|
|
169
|
-
def create_from_any_type(self, data:
|
|
188
|
+
def create_from_any_type(self, data: collections.abc.Mapping[str, collections.abc.Sequence[object] | collections.abc.Mapping[str, collections.abc.Sequence[object]] | ForwardRef('Series')] | collections.abc.Sequence[typing.Any] | ForwardRef('np.ndarray[Any, Any]') | ForwardRef('pa.Table') | ForwardRef('pd.DataFrame') | ForwardRef('ArrowArrayExportable') | ForwardRef('ArrowStreamExportable') | ForwardRef('torch.Tensor') = None, schema: collections.abc.Mapping[str, ForwardRef('DataTypeClass') | ForwardRef('DataType') | type[int] | type[float] | type[bool] | type[str] | type[date] | type[time] | type[datetime] | type[timedelta] | type[list[typing.Any]] | type[tuple[typing.Any, ...]] | type[bytes] | type[object] | type[Decimal] | type[None] | NoneType] | collections.abc.Sequence[str | tuple[str, ForwardRef('DataTypeClass') | ForwardRef('DataType') | type[int] | type[float] | type[bool] | type[str] | type[date] | type[time] | type[datetime] | type[timedelta] | type[list[typing.Any]] | type[tuple[typing.Any, ...]] | type[bytes] | type[object] | type[Decimal] | type[None] | NoneType]] | NoneType = None, schema_overrides: collections.abc.Mapping[str, ForwardRef('DataTypeClass') | ForwardRef('DataType')] | None = None, strict: bool = True, orient: typing.Literal['col', 'row'] | None = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph = None, node_id = None, parent_node_id = None, description: str | None = None) -> Any: ...
|
|
170
189
|
|
|
171
190
|
# Creates a summary of statistics for a LazyFrame, returning a DataFrame.
|
|
172
191
|
def describe(self, percentiles: Sequence[float] | float | None = ..., interpolation: QuantileMethod = 'nearest') -> DataFrame: ...
|
|
173
192
|
|
|
174
193
|
# Read a logical plan from a file to construct a LazyFrame.
|
|
175
|
-
def deserialize(self, source: str | Path | IOBase, format: SerializationFormat = 'binary', description:
|
|
194
|
+
def deserialize(self, source: str | Path | IOBase, format: SerializationFormat = 'binary', description: str | None = None) -> FlowFrame: ...
|
|
176
195
|
|
|
177
196
|
# Remove columns from the DataFrame.
|
|
178
|
-
def drop(self, *columns, strict: bool = True, description:
|
|
197
|
+
def drop(self, *columns, strict: bool = True, description: str | None = None) -> FlowFrame: ...
|
|
179
198
|
|
|
180
199
|
# Drop all rows that contain one or more NaN values.
|
|
181
|
-
def drop_nans(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, description:
|
|
200
|
+
def drop_nans(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, description: str | None = None) -> FlowFrame: ...
|
|
182
201
|
|
|
183
202
|
# Drop all rows that contain one or more null values.
|
|
184
|
-
def drop_nulls(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, description:
|
|
203
|
+
def drop_nulls(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, description: str | None = None) -> FlowFrame: ...
|
|
185
204
|
|
|
186
205
|
# Get the column data types.
|
|
187
206
|
@property
|
|
188
|
-
def dtypes(self) ->
|
|
207
|
+
def dtypes(self) -> list[pl.classes.DataType]: ...
|
|
189
208
|
|
|
190
209
|
# Create a string representation of the query plan.
|
|
191
210
|
def explain(self, format: ExplainFormat = 'plain', optimized: bool = True, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True, streaming: bool = False, engine: EngineType = 'auto', tree_format: bool | None = None, optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS) -> str: ...
|
|
192
211
|
|
|
193
212
|
# Explode the dataframe to long format by exploding the given columns.
|
|
194
|
-
def explode(self, columns:
|
|
213
|
+
def explode(self, columns: str | flowfile_frame.expr.Column | typing.Iterable[str | flowfile_frame.expr.Column], *more_columns, description: str = None) -> FlowFrame: ...
|
|
195
214
|
|
|
196
215
|
# Collect a small number of rows for debugging purposes.
|
|
197
216
|
def fetch(self, n_rows: int = 500, type_coercion: bool = True, _type_check: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, no_optimization: bool = False, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True) -> DataFrame: ...
|
|
198
217
|
|
|
199
218
|
# Fill floating point NaN values.
|
|
200
|
-
def fill_nan(self, value: int | float | Expr | None, description:
|
|
219
|
+
def fill_nan(self, value: int | float | Expr | None, description: str | None = None) -> FlowFrame: ...
|
|
201
220
|
|
|
202
221
|
# Fill null values using the specified value or strategy.
|
|
203
|
-
def fill_null(self, value: Any | Expr | None = None, strategy: FillNullStrategy | None = None, limit: int | None = None, matches_supertype: bool = True, description:
|
|
222
|
+
def fill_null(self, value: Any | Expr | None = None, strategy: FillNullStrategy | None = None, limit: int | None = None, matches_supertype: bool = True, description: str | None = None) -> FlowFrame: ...
|
|
204
223
|
|
|
205
224
|
# Filter rows based on a predicate.
|
|
206
|
-
def filter(self, *predicates, flowfile_formula:
|
|
225
|
+
def filter(self, *predicates, flowfile_formula: str | None = None, description: str | None = None, **constraints) -> FlowFrame: ...
|
|
207
226
|
|
|
208
227
|
# Get the first row of the DataFrame.
|
|
209
|
-
def first(self, description:
|
|
228
|
+
def first(self, description: str | None = None) -> FlowFrame: ...
|
|
210
229
|
|
|
211
|
-
def fuzzy_match(self, other: FlowFrame, fuzzy_mappings:
|
|
230
|
+
def fuzzy_match(self, other: FlowFrame, fuzzy_mappings: list[flowfile_core.schemas.transform_schema.FuzzyMap], description: str = None) -> FlowFrame: ...
|
|
212
231
|
|
|
213
232
|
# Take every nth row in the LazyFrame and return as a new LazyFrame.
|
|
214
|
-
def gather_every(self, n: int, offset: int = 0, description:
|
|
233
|
+
def gather_every(self, n: int, offset: int = 0, description: str | None = None) -> FlowFrame: ...
|
|
215
234
|
|
|
216
|
-
def get_node_settings(self, description:
|
|
235
|
+
def get_node_settings(self, description: str | None = None) -> FlowNode: ...
|
|
217
236
|
|
|
218
237
|
# Start a group by operation.
|
|
219
|
-
def group_by(self, *by, description:
|
|
238
|
+
def group_by(self, *by, description: str | None = None, maintain_order: bool = False, **named_by) -> group_frame.GroupByFrame: ...
|
|
220
239
|
|
|
221
240
|
# Group based on a time value (or index value of type Int32, Int64).
|
|
222
|
-
def group_by_dynamic(self, index_column: IntoExpr, every: str | timedelta, period: str | timedelta | None = None, offset: str | timedelta | None = None, include_boundaries: bool = False, closed: ClosedInterval = 'left', label: Label = 'left', group_by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = 'window', description:
|
|
241
|
+
def group_by_dynamic(self, index_column: IntoExpr, every: str | timedelta, period: str | timedelta | None = None, offset: str | timedelta | None = None, include_boundaries: bool = False, closed: ClosedInterval = 'left', label: Label = 'left', group_by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = 'window', description: str | None = None) -> LazyGroupBy: ...
|
|
223
242
|
|
|
224
243
|
def head(self, n: int, description: str = None) -> Any: ...
|
|
225
244
|
|
|
226
245
|
# Inspect a node in the computation graph.
|
|
227
|
-
def inspect(self, fmt: str = '{}', description:
|
|
246
|
+
def inspect(self, fmt: str = '{}', description: str | None = None) -> FlowFrame: ...
|
|
228
247
|
|
|
229
248
|
# Interpolate intermediate values. The interpolation method is linear.
|
|
230
|
-
def interpolate(self, description:
|
|
249
|
+
def interpolate(self, description: str | None = None) -> FlowFrame: ...
|
|
231
250
|
|
|
232
251
|
# Add a join operation to the Logical Plan.
|
|
233
|
-
def join(self, other, on:
|
|
252
|
+
def join(self, other, on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column = None, how: str = 'inner', left_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column = None, right_on: list[str | flowfile_frame.expr.Column] | str | flowfile_frame.expr.Column = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> FlowFrame: ...
|
|
234
253
|
|
|
235
254
|
# Perform an asof join.
|
|
236
|
-
def join_asof(self, other: FlowFrame, left_on: str | None | Expr = None, right_on: str | None | Expr = None, on: str | None | Expr = None, by_left: str | Sequence[str] | None = None, by_right: str | Sequence[str] | None = None, by: str | Sequence[str] | None = None, strategy: AsofJoinStrategy = 'backward', suffix: str = '_right', tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool = True, allow_exact_matches: bool = True, check_sortedness: bool = True, description:
|
|
255
|
+
def join_asof(self, other: FlowFrame, left_on: str | None | Expr = None, right_on: str | None | Expr = None, on: str | None | Expr = None, by_left: str | Sequence[str] | None = None, by_right: str | Sequence[str] | None = None, by: str | Sequence[str] | None = None, strategy: AsofJoinStrategy = 'backward', suffix: str = '_right', tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool = True, allow_exact_matches: bool = True, check_sortedness: bool = True, description: str | None = None) -> FlowFrame: ...
|
|
237
256
|
|
|
238
257
|
# Perform a join based on one or multiple (in)equality predicates.
|
|
239
|
-
def join_where(self, other: FlowFrame, *predicates, suffix: str = '_right', description:
|
|
258
|
+
def join_where(self, other: FlowFrame, *predicates, suffix: str = '_right', description: str | None = None) -> FlowFrame: ...
|
|
240
259
|
|
|
241
260
|
# Get the last row of the DataFrame.
|
|
242
|
-
def last(self, description:
|
|
261
|
+
def last(self, description: str | None = None) -> FlowFrame: ...
|
|
243
262
|
|
|
244
263
|
# Return lazy representation, i.e. itself.
|
|
245
|
-
def lazy(self, description:
|
|
264
|
+
def lazy(self, description: str | None = None) -> FlowFrame: ...
|
|
246
265
|
|
|
247
266
|
def limit(self, n: int, description: str = None) -> Any: ...
|
|
248
267
|
|
|
249
268
|
# Apply a custom function.
|
|
250
|
-
def map_batches(self, function: Callable[[DataFrame], DataFrame], predicate_pushdown: bool = True, projection_pushdown: bool = True, slice_pushdown: bool = True, no_optimizations: bool = False, schema: None | SchemaDict = None, validate_output_schema: bool = True, streamable: bool = False, description:
|
|
269
|
+
def map_batches(self, function: Callable[[DataFrame], DataFrame], predicate_pushdown: bool = True, projection_pushdown: bool = True, slice_pushdown: bool = True, no_optimizations: bool = False, schema: None | SchemaDict = None, validate_output_schema: bool = True, streamable: bool = False, description: str | None = None) -> FlowFrame: ...
|
|
251
270
|
|
|
252
271
|
# Match or evolve the schema of a LazyFrame into a specific schema.
|
|
253
|
-
def match_to_schema(self, schema: SchemaDict | Schema, missing_columns: Literal['insert', 'raise'] | Mapping[str, Literal['insert', 'raise'] | Expr] = 'raise', missing_struct_fields: Literal['insert', 'raise'] | Mapping[str, Literal['insert', 'raise']] = 'raise', extra_columns: Literal['ignore', 'raise'] = 'raise', extra_struct_fields: Literal['ignore', 'raise'] | Mapping[str, Literal['ignore', 'raise']] = 'raise', integer_cast: Literal['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']] = 'forbid', float_cast: Literal['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']] = 'forbid', description:
|
|
272
|
+
def match_to_schema(self, schema: SchemaDict | Schema, missing_columns: Literal['insert', 'raise'] | Mapping[str, Literal['insert', 'raise'] | Expr] = 'raise', missing_struct_fields: Literal['insert', 'raise'] | Mapping[str, Literal['insert', 'raise']] = 'raise', extra_columns: Literal['ignore', 'raise'] = 'raise', extra_struct_fields: Literal['ignore', 'raise'] | Mapping[str, Literal['ignore', 'raise']] = 'raise', integer_cast: Literal['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']] = 'forbid', float_cast: Literal['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']] = 'forbid', description: str | None = None) -> FlowFrame: ...
|
|
254
273
|
|
|
255
274
|
# Aggregate the columns in the LazyFrame to their maximum value.
|
|
256
|
-
def max(self, description:
|
|
275
|
+
def max(self, description: str | None = None) -> FlowFrame: ...
|
|
257
276
|
|
|
258
277
|
# Aggregate the columns in the LazyFrame to their mean value.
|
|
259
|
-
def mean(self, description:
|
|
278
|
+
def mean(self, description: str | None = None) -> FlowFrame: ...
|
|
260
279
|
|
|
261
280
|
# Aggregate the columns in the LazyFrame to their median value.
|
|
262
|
-
def median(self, description:
|
|
281
|
+
def median(self, description: str | None = None) -> FlowFrame: ...
|
|
263
282
|
|
|
264
283
|
# Unpivot a DataFrame from wide to long format.
|
|
265
|
-
def melt(self, id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, streamable: bool = True, description:
|
|
284
|
+
def melt(self, id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, streamable: bool = True, description: str | None = None) -> FlowFrame: ...
|
|
266
285
|
|
|
267
286
|
# Take two sorted DataFrames and merge them by the sorted key.
|
|
268
|
-
def merge_sorted(self, other: FlowFrame, key: str, description:
|
|
287
|
+
def merge_sorted(self, other: FlowFrame, key: str, description: str | None = None) -> FlowFrame: ...
|
|
269
288
|
|
|
270
289
|
# Aggregate the columns in the LazyFrame to their minimum value.
|
|
271
|
-
def min(self, description:
|
|
290
|
+
def min(self, description: str | None = None) -> FlowFrame: ...
|
|
272
291
|
|
|
273
292
|
# Aggregate the columns in the LazyFrame as the sum of their null value count.
|
|
274
|
-
def null_count(self, description:
|
|
293
|
+
def null_count(self, description: str | None = None) -> FlowFrame: ...
|
|
275
294
|
|
|
276
295
|
# Offers a structured way to apply a sequence of user-defined functions (UDFs).
|
|
277
|
-
def pipe(self, function: Callable[Concatenate[LazyFrame, P], T], *args, description:
|
|
296
|
+
def pipe(self, function: Callable[Concatenate[LazyFrame, P], T], *args, description: str | None = None, **kwargs) -> T: ...
|
|
278
297
|
|
|
279
298
|
# Pivot a DataFrame from long to wide format.
|
|
280
|
-
def pivot(self, on: str | list[str], index: str | list[str] | None = None, values: str | list[str] | None = None, aggregate_function: str | None = 'first', maintain_order: bool = True, sort_columns: bool = False, separator: str = '_', description: str = None) ->
|
|
299
|
+
def pivot(self, on: str | list[str], index: str | list[str] | None = None, values: str | list[str] | None = None, aggregate_function: str | None = 'first', maintain_order: bool = True, sort_columns: bool = False, separator: str = '_', description: str = None) -> FlowFrame: ...
|
|
281
300
|
|
|
282
301
|
# Profile a LazyFrame.
|
|
283
302
|
def profile(self, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, no_optimization: bool = False, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True, show_plot: bool = False, truncate_nodes: int = 0, figsize: tuple[int, int] = ..., engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, **_kwargs) -> tuple[DataFrame, DataFrame]: ...
|
|
284
303
|
|
|
285
304
|
# Aggregate the columns in the LazyFrame to their quantile value.
|
|
286
|
-
def quantile(self, quantile: float | Expr, interpolation: QuantileMethod = 'nearest', description:
|
|
305
|
+
def quantile(self, quantile: float | Expr, interpolation: QuantileMethod = 'nearest', description: str | None = None) -> FlowFrame: ...
|
|
287
306
|
|
|
288
307
|
# Run a query remotely on Polars Cloud.
|
|
289
|
-
def remote(self, context: pc.ComputeContext | None = None, plan_type: pc._typing.PlanTypePreference = 'dot', description:
|
|
308
|
+
def remote(self, context: pc.ComputeContext | None = None, plan_type: pc._typing.PlanTypePreference = 'dot', description: str | None = None) -> pc.LazyFrameExt: ...
|
|
290
309
|
|
|
291
310
|
# Remove rows, dropping those that match the given predicate expression(s).
|
|
292
|
-
def remove(self, *predicates, description:
|
|
311
|
+
def remove(self, *predicates, description: str | None = None, **constraints) -> FlowFrame: ...
|
|
293
312
|
|
|
294
313
|
# Rename column names.
|
|
295
|
-
def rename(self, mapping: Mapping[str, str] | Callable[[str], str], strict: bool = True, description:
|
|
314
|
+
def rename(self, mapping: Mapping[str, str] | Callable[[str], str], strict: bool = True, description: str | None = None) -> FlowFrame: ...
|
|
296
315
|
|
|
297
316
|
# Reverse the DataFrame.
|
|
298
|
-
def reverse(self, description:
|
|
317
|
+
def reverse(self, description: str | None = None) -> FlowFrame: ...
|
|
299
318
|
|
|
300
319
|
# Create rolling groups based on a temporal or integer column.
|
|
301
|
-
def rolling(self, index_column: IntoExpr, period: str | timedelta, offset: str | timedelta | None = None, closed: ClosedInterval = 'right', group_by: IntoExpr | Iterable[IntoExpr] | None = None, description:
|
|
320
|
+
def rolling(self, index_column: IntoExpr, period: str | timedelta, offset: str | timedelta | None = None, closed: ClosedInterval = 'right', group_by: IntoExpr | Iterable[IntoExpr] | None = None, description: str | None = None) -> LazyGroupBy: ...
|
|
302
321
|
|
|
303
322
|
# Save the graph
|
|
304
|
-
def save_graph(self, file_path: str, auto_arrange: bool = True, description:
|
|
323
|
+
def save_graph(self, file_path: str, auto_arrange: bool = True, description: str | None = None) -> Any: ...
|
|
305
324
|
|
|
306
325
|
# Get an ordered mapping of column names to their data type.
|
|
307
326
|
@property
|
|
308
327
|
def schema(self) -> pl.Schema: ...
|
|
309
328
|
|
|
310
329
|
# Select columns from the frame.
|
|
311
|
-
def select(self, *columns, description:
|
|
330
|
+
def select(self, *columns, description: str | None = None) -> FlowFrame: ...
|
|
312
331
|
|
|
313
332
|
# Select columns from this LazyFrame.
|
|
314
|
-
def select_seq(self, *exprs, description:
|
|
333
|
+
def select_seq(self, *exprs, description: str | None = None, **named_exprs) -> FlowFrame: ...
|
|
315
334
|
|
|
316
335
|
# Serialize the logical plan of this LazyFrame to a file or string in JSON format.
|
|
317
|
-
def serialize(self, file: IOBase | str | Path | None = None, format: SerializationFormat = 'binary', description:
|
|
336
|
+
def serialize(self, file: IOBase | str | Path | None = None, format: SerializationFormat = 'binary', description: str | None = None) -> bytes | str | None: ...
|
|
318
337
|
|
|
319
338
|
# Flag a column as sorted.
|
|
320
|
-
def set_sorted(self, column: str, descending: bool = False, description:
|
|
339
|
+
def set_sorted(self, column: str, descending: bool = False, description: str | None = None) -> FlowFrame: ...
|
|
321
340
|
|
|
322
341
|
# Shift values by the given number of indices.
|
|
323
|
-
def shift(self, n: int | IntoExprColumn = 1, fill_value: IntoExpr | None = None, description:
|
|
342
|
+
def shift(self, n: int | IntoExprColumn = 1, fill_value: IntoExpr | None = None, description: str | None = None) -> FlowFrame: ...
|
|
324
343
|
|
|
325
344
|
# Show a plot of the query plan.
|
|
326
345
|
def show_graph(self, optimized: bool = True, show: bool = True, output_path: str | Path | None = None, raw_output: bool = False, figsize: tuple[float, float] = ..., type_coercion: bool = True, _type_check: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True, engine: EngineType = 'auto', plan_stage: PlanStage = 'ir', _check_order: bool = True, optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS) -> str | None: ...
|
|
327
346
|
|
|
328
347
|
# Write the data to a CSV file.
|
|
329
|
-
def sink_csv(self, file: str, *args, separator: str = ',', encoding: str = 'utf-8', description: str = None) ->
|
|
348
|
+
def sink_csv(self, file: str, *args, separator: str = ',', encoding: str = 'utf-8', description: str = None) -> FlowFrame: ...
|
|
330
349
|
|
|
331
350
|
# Evaluate the query in streaming mode and write to an IPC file.
|
|
332
|
-
def sink_ipc(self, path: str | Path | IO[bytes] | PartitioningScheme, compression: IpcCompression | None = 'uncompressed', compat_level: CompatLevel | None = None, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, mkdir: bool = False, lazy: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description:
|
|
351
|
+
def sink_ipc(self, path: str | Path | IO[bytes] | PartitioningScheme, compression: IpcCompression | None = 'uncompressed', compat_level: CompatLevel | None = None, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, mkdir: bool = False, lazy: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description: str | None = None) -> LazyFrame | None: ...
|
|
333
352
|
|
|
334
353
|
# Evaluate the query in streaming mode and write to an NDJSON file.
|
|
335
|
-
def sink_ndjson(self, path: str | Path | IO[bytes] | IO[str] | PartitioningScheme, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, mkdir: bool = False, lazy: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description:
|
|
354
|
+
def sink_ndjson(self, path: str | Path | IO[bytes] | IO[str] | PartitioningScheme, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, mkdir: bool = False, lazy: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description: str | None = None) -> LazyFrame | None: ...
|
|
336
355
|
|
|
337
356
|
# Evaluate the query in streaming mode and write to a Parquet file.
|
|
338
|
-
def sink_parquet(self, path: str | Path | IO[bytes] | PartitioningScheme, compression: str = 'zstd', compression_level: int | None = None, statistics: bool | str | dict[str, bool] = True, row_group_size: int | None = None, data_page_size: int | None = None, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, metadata: ParquetMetadata | None = None, mkdir: bool = False, lazy: bool = False, field_overwrites: ParquetFieldOverwrites | Sequence[ParquetFieldOverwrites] | Mapping[str, ParquetFieldOverwrites] | None = None, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description:
|
|
357
|
+
def sink_parquet(self, path: str | Path | IO[bytes] | PartitioningScheme, compression: str = 'zstd', compression_level: int | None = None, statistics: bool | str | dict[str, bool] = True, row_group_size: int | None = None, data_page_size: int | None = None, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, metadata: ParquetMetadata | None = None, mkdir: bool = False, lazy: bool = False, field_overwrites: ParquetFieldOverwrites | Sequence[ParquetFieldOverwrites] | Mapping[str, ParquetFieldOverwrites] | None = None, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description: str | None = None) -> LazyFrame | None: ...
|
|
339
358
|
|
|
340
359
|
# Get a slice of this DataFrame.
|
|
341
|
-
def slice(self, offset: int, length: int | None = None, description:
|
|
360
|
+
def slice(self, offset: int, length: int | None = None, description: str | None = None) -> FlowFrame: ...
|
|
342
361
|
|
|
343
362
|
# Sort the dataframe by the given columns.
|
|
344
|
-
def sort(self, by:
|
|
363
|
+
def sort(self, by: list[flowfile_frame.expr.Expr | str] | flowfile_frame.expr.Expr | str, *more_by, descending: bool | list[bool] = False, nulls_last: bool | list[bool] = False, multithreaded: bool = True, maintain_order: bool = False, description: str | None = None) -> FlowFrame: ...
|
|
345
364
|
|
|
346
365
|
# Execute a SQL query against the LazyFrame.
|
|
347
|
-
def sql(self, query: str, table_name: str = 'self', description:
|
|
366
|
+
def sql(self, query: str, table_name: str = 'self', description: str | None = None) -> FlowFrame: ...
|
|
348
367
|
|
|
349
368
|
# Aggregate the columns in the LazyFrame to their standard deviation value.
|
|
350
|
-
def std(self, ddof: int = 1, description:
|
|
369
|
+
def std(self, ddof: int = 1, description: str | None = None) -> FlowFrame: ...
|
|
351
370
|
|
|
352
371
|
# Aggregate the columns in the LazyFrame to their sum value.
|
|
353
|
-
def sum(self, description:
|
|
372
|
+
def sum(self, description: str | None = None) -> FlowFrame: ...
|
|
354
373
|
|
|
355
374
|
# Get the last `n` rows.
|
|
356
|
-
def tail(self, n: int = 5, description:
|
|
375
|
+
def tail(self, n: int = 5, description: str | None = None) -> FlowFrame: ...
|
|
357
376
|
|
|
358
377
|
# Split text in a column into multiple rows.
|
|
359
|
-
def text_to_rows(self, column: str | flowfile_frame.expr.Column, output_column: str = None, delimiter: str = None, split_by_column: str = None, description: str = None) ->
|
|
378
|
+
def text_to_rows(self, column: str | flowfile_frame.expr.Column, output_column: str = None, delimiter: str = None, split_by_column: str = None, description: str = None) -> FlowFrame: ...
|
|
360
379
|
|
|
361
380
|
# Get the underlying ETL graph.
|
|
362
|
-
def to_graph(self, description:
|
|
381
|
+
def to_graph(self, description: str | None = None) -> Any: ...
|
|
363
382
|
|
|
364
383
|
# Return the `k` largest rows.
|
|
365
|
-
def top_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, description:
|
|
384
|
+
def top_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, description: str | None = None) -> FlowFrame: ...
|
|
366
385
|
|
|
367
386
|
# Drop duplicate rows from this dataframe.
|
|
368
|
-
def unique(self, subset:
|
|
387
|
+
def unique(self, subset: str | ForwardRef('Expr') | list[ForwardRef('Expr') | str] = None, keep: typing.Literal['first', 'last', 'any', 'none'] = 'any', maintain_order: bool = False, description: str = None) -> FlowFrame: ...
|
|
369
388
|
|
|
370
389
|
# Decompose struct columns into separate columns for each of their fields.
|
|
371
|
-
def unnest(self, columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], *more_columns, description:
|
|
390
|
+
def unnest(self, columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], *more_columns, description: str | None = None) -> FlowFrame: ...
|
|
372
391
|
|
|
373
392
|
# Unpivot a DataFrame from wide to long format.
|
|
374
|
-
def unpivot(self, on: list[str | flowfile_frame.selectors.Selector] | str | None | flowfile_frame.selectors.Selector = None, index: list[str] | str | None = None, variable_name: str = 'variable', value_name: str = 'value', description: str = None) ->
|
|
393
|
+
def unpivot(self, on: list[str | flowfile_frame.selectors.Selector] | str | None | flowfile_frame.selectors.Selector = None, index: list[str] | str | None = None, variable_name: str = 'variable', value_name: str = 'value', description: str = None) -> FlowFrame: ...
|
|
375
394
|
|
|
376
395
|
# Update the values in this `LazyFrame` with the values in `other`.
|
|
377
|
-
def update(self, other: FlowFrame, on: str | Sequence[str] | None = None, how: Literal['left', 'inner', 'full'] = 'left', left_on: str | Sequence[str] | None = None, right_on: str | Sequence[str] | None = None, include_nulls: bool = False, maintain_order: MaintainOrderJoin | None = 'left', description:
|
|
396
|
+
def update(self, other: FlowFrame, on: str | Sequence[str] | None = None, how: Literal['left', 'inner', 'full'] = 'left', left_on: str | Sequence[str] | None = None, right_on: str | Sequence[str] | None = None, include_nulls: bool = False, maintain_order: MaintainOrderJoin | None = 'left', description: str | None = None) -> FlowFrame: ...
|
|
378
397
|
|
|
379
398
|
# Aggregate the columns in the LazyFrame to their variance value.
|
|
380
|
-
def var(self, ddof: int = 1, description:
|
|
399
|
+
def var(self, ddof: int = 1, description: str | None = None) -> FlowFrame: ...
|
|
381
400
|
|
|
382
401
|
# Get the number of columns.
|
|
383
402
|
@property
|
|
384
403
|
def width(self) -> int: ...
|
|
385
404
|
|
|
386
405
|
# Add or replace columns in the DataFrame.
|
|
387
|
-
def with_columns(self, *exprs:
|
|
406
|
+
def with_columns(self, *exprs: Expr | Iterable[Expr] | Any, flowfile_formulas: list[str] | None = None, output_column_names: list[str] | None = None, description: str | None = None, **named_exprs: Expr | Any) -> FlowFrame: ...
|
|
388
407
|
|
|
389
408
|
# Add columns to this LazyFrame.
|
|
390
|
-
def with_columns_seq(self, *exprs, description:
|
|
409
|
+
def with_columns_seq(self, *exprs, description: str | None = None, **named_exprs) -> FlowFrame: ...
|
|
391
410
|
|
|
392
411
|
# Add an external context to the computation graph.
|
|
393
|
-
def with_context(self, other: Self | list[Self], description:
|
|
412
|
+
def with_context(self, other: Self | list[Self], description: str | None = None) -> FlowFrame: ...
|
|
394
413
|
|
|
395
414
|
# Add a column at index 0 that counts the rows.
|
|
396
|
-
def with_row_count(self, name: str = 'row_nr', offset: int = 0, description:
|
|
415
|
+
def with_row_count(self, name: str = 'row_nr', offset: int = 0, description: str | None = None) -> FlowFrame: ...
|
|
397
416
|
|
|
398
417
|
# Add a row index as the first column in the DataFrame.
|
|
399
|
-
def with_row_index(self, name: str = 'index', offset: int = 0, description: str = None) ->
|
|
418
|
+
def with_row_index(self, name: str = 'index', offset: int = 0, description: str = None) -> FlowFrame: ...
|
|
400
419
|
|
|
401
|
-
def write_csv(self, file: str | os.PathLike, separator: str = ',', encoding: str = 'utf-8', convert_to_absolute_path: bool = True, description: str = None, **kwargs) ->
|
|
420
|
+
def write_csv(self, file: str | os.PathLike, separator: str = ',', encoding: str = 'utf-8', convert_to_absolute_path: bool = True, description: str = None, **kwargs) -> FlowFrame: ...
|
|
402
421
|
|
|
403
422
|
# Write the data frame to cloud storage in CSV format.
|
|
404
|
-
def write_csv_to_cloud_storage(self, path: str, connection_name:
|
|
423
|
+
def write_csv_to_cloud_storage(self, path: str, connection_name: str | None = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: str | None = None) -> FlowFrame: ...
|
|
405
424
|
|
|
406
425
|
# Write the data frame to cloud storage in Delta Lake format.
|
|
407
|
-
def write_delta(self, path: str, connection_name:
|
|
426
|
+
def write_delta(self, path: str, connection_name: str | None = None, write_mode: typing.Literal['overwrite', 'append'] = 'overwrite', description: str | None = None) -> FlowFrame: ...
|
|
408
427
|
|
|
409
428
|
# Write the data frame to cloud storage in JSON format.
|
|
410
|
-
def write_json_to_cloud_storage(self, path: str, connection_name:
|
|
429
|
+
def write_json_to_cloud_storage(self, path: str, connection_name: str | None = None, description: str | None = None) -> FlowFrame: ...
|
|
411
430
|
|
|
412
431
|
# Write the data to a Parquet file. Creates a standard Output node if only
|
|
413
|
-
def write_parquet(self, path: str | os.PathLike, convert_to_absolute_path: bool = True, description: str = None, **kwargs) ->
|
|
432
|
+
def write_parquet(self, path: str | os.PathLike, convert_to_absolute_path: bool = True, description: str = None, **kwargs) -> FlowFrame: ...
|
|
414
433
|
|
|
415
434
|
# Write the data frame to cloud storage in Parquet format.
|
|
416
|
-
def write_parquet_to_cloud_storage(self, path: str, connection_name:
|
|
435
|
+
def write_parquet_to_cloud_storage(self, path: str, connection_name: str | None = None, compression: typing.Literal['snappy', 'gzip', 'brotli', 'lz4', 'zstd'] = 'snappy', description: str | None = None) -> FlowFrame: ...
|