Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os
|
|
3
|
+
from collections.abc import Callable
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Literal
|
|
5
6
|
|
|
6
7
|
import polars as pl
|
|
7
|
-
from polars._typing import
|
|
8
|
-
Sequence, CsvEncoding)
|
|
8
|
+
from polars._typing import IO, CsvEncoding, PolarsDataType, SchemaDict, Sequence
|
|
9
9
|
|
|
10
10
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
11
11
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
12
|
-
from flowfile_core.schemas import input_schema, transform_schema
|
|
12
|
+
from flowfile_core.schemas import cloud_storage_schemas, input_schema, transform_schema
|
|
13
|
+
from flowfile_frame.cloud_storage.secret_manager import get_current_user_id
|
|
13
14
|
from flowfile_frame.config import logger
|
|
14
15
|
from flowfile_frame.expr import col
|
|
15
16
|
from flowfile_frame.flow_frame import FlowFrame
|
|
16
|
-
from flowfile_frame.utils import create_flow_graph
|
|
17
|
-
|
|
18
|
-
from flowfile_frame.utils import generate_node_id
|
|
17
|
+
from flowfile_frame.utils import create_flow_graph, generate_node_id
|
|
18
|
+
|
|
19
19
|
|
|
20
20
|
def sum(expr):
|
|
21
21
|
"""Sum aggregation function."""
|
|
@@ -53,42 +53,42 @@ def count(expr):
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
def read_csv(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
56
|
+
source: str | Path | IO[bytes] | bytes | list[str | Path | IO[bytes] | bytes],
|
|
57
|
+
*,
|
|
58
|
+
flow_graph: Any | None = None, # Using Any for FlowGraph placeholder
|
|
59
|
+
separator: str = ",",
|
|
60
|
+
convert_to_absolute_path: bool = True,
|
|
61
|
+
description: str | None = None,
|
|
62
|
+
has_header: bool = True,
|
|
63
|
+
new_columns: list[str] | None = None,
|
|
64
|
+
comment_prefix: str | None = None,
|
|
65
|
+
quote_char: str | None = '"',
|
|
66
|
+
skip_rows: int = 0,
|
|
67
|
+
skip_lines: int = 0,
|
|
68
|
+
schema: SchemaDict | None = None,
|
|
69
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None = None,
|
|
70
|
+
null_values: str | list[str] | dict[str, str] | None = None,
|
|
71
|
+
missing_utf8_is_empty_string: bool = False,
|
|
72
|
+
ignore_errors: bool = False,
|
|
73
|
+
try_parse_dates: bool = False,
|
|
74
|
+
infer_schema: bool = True,
|
|
75
|
+
infer_schema_length: int | None = 100,
|
|
76
|
+
n_rows: int | None = None,
|
|
77
|
+
encoding: CsvEncoding = "utf8",
|
|
78
|
+
low_memory: bool = False,
|
|
79
|
+
rechunk: bool = False,
|
|
80
|
+
storage_options: dict[str, Any] | None = None,
|
|
81
|
+
skip_rows_after_header: int = 0,
|
|
82
|
+
row_index_name: str | None = None,
|
|
83
|
+
row_index_offset: int = 0,
|
|
84
|
+
eol_char: str = "\n",
|
|
85
|
+
raise_if_empty: bool = True,
|
|
86
|
+
truncate_ragged_lines: bool = False,
|
|
87
|
+
decimal_comma: bool = False,
|
|
88
|
+
glob: bool = True,
|
|
89
|
+
cache: bool = True,
|
|
90
|
+
with_column_names: Callable[[list[str]], list[str]] | None = None,
|
|
91
|
+
**other_options: Any,
|
|
92
92
|
) -> FlowFrame:
|
|
93
93
|
"""
|
|
94
94
|
Read a CSV file into a FlowFrame.
|
|
@@ -146,44 +146,44 @@ def read_csv(
|
|
|
146
146
|
current_source_path_for_native = None
|
|
147
147
|
if isinstance(source, (str, os.PathLike)):
|
|
148
148
|
current_source_path_for_native = str(source)
|
|
149
|
-
if
|
|
149
|
+
if "~" in current_source_path_for_native:
|
|
150
150
|
current_source_path_for_native = os.path.expanduser(current_source_path_for_native)
|
|
151
151
|
elif isinstance(source, list) and all(isinstance(s, (str, os.PathLike)) for s in source):
|
|
152
152
|
current_source_path_for_native = str(source[0]) if source else None
|
|
153
|
-
if current_source_path_for_native and
|
|
154
|
-
|
|
153
|
+
if current_source_path_for_native and "~" in current_source_path_for_native:
|
|
154
|
+
current_source_path_for_native = os.path.expanduser(current_source_path_for_native)
|
|
155
155
|
elif isinstance(source, (io.BytesIO, io.StringIO)):
|
|
156
156
|
logger.warning("Read from bytes io from csv not supported, converting data to raw data")
|
|
157
157
|
return from_dict(pl.read_csv(source), flow_graph=flow_graph, description=description)
|
|
158
|
-
actual_infer_schema_length:
|
|
158
|
+
actual_infer_schema_length: int | None
|
|
159
159
|
if not infer_schema:
|
|
160
160
|
actual_infer_schema_length = 0
|
|
161
161
|
else:
|
|
162
162
|
actual_infer_schema_length = infer_schema_length
|
|
163
163
|
can_use_native = (
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
164
|
+
current_source_path_for_native is not None
|
|
165
|
+
and comment_prefix is None
|
|
166
|
+
and skip_lines == 0
|
|
167
|
+
and schema is None
|
|
168
|
+
and schema_overrides is None
|
|
169
|
+
and null_values is None
|
|
170
|
+
and not missing_utf8_is_empty_string
|
|
171
|
+
and not try_parse_dates
|
|
172
|
+
and n_rows is None
|
|
173
|
+
and not low_memory
|
|
174
|
+
and not rechunk
|
|
175
|
+
and storage_options is None
|
|
176
|
+
and skip_rows_after_header == 0
|
|
177
|
+
and row_index_name is None
|
|
178
|
+
and row_index_offset == 0
|
|
179
|
+
and eol_char == "\n"
|
|
180
|
+
and not decimal_comma
|
|
181
|
+
and new_columns is None
|
|
182
|
+
and glob is True
|
|
183
183
|
)
|
|
184
184
|
if can_use_native and current_source_path_for_native:
|
|
185
185
|
received_table = input_schema.ReceivedTable(
|
|
186
|
-
file_type=
|
|
186
|
+
file_type="csv",
|
|
187
187
|
path=current_source_path_for_native,
|
|
188
188
|
name=Path(current_source_path_for_native).name,
|
|
189
189
|
table_settings=input_schema.InputCsvTable(
|
|
@@ -195,8 +195,8 @@ def read_csv(
|
|
|
195
195
|
infer_schema_length=actual_infer_schema_length if actual_infer_schema_length is not None else 10000,
|
|
196
196
|
truncate_ragged_lines=truncate_ragged_lines,
|
|
197
197
|
ignore_errors=ignore_errors,
|
|
198
|
-
row_delimiter=eol_char
|
|
199
|
-
)
|
|
198
|
+
row_delimiter=eol_char,
|
|
199
|
+
),
|
|
200
200
|
)
|
|
201
201
|
if convert_to_absolute_path:
|
|
202
202
|
try:
|
|
@@ -213,15 +213,13 @@ def read_csv(
|
|
|
213
213
|
pos_x=100,
|
|
214
214
|
pos_y=100,
|
|
215
215
|
is_setup=True,
|
|
216
|
-
description=read_node_description
|
|
216
|
+
description=read_node_description,
|
|
217
217
|
)
|
|
218
218
|
flow_graph.add_read(read_node)
|
|
219
219
|
flow_graph.get_node(1)
|
|
220
220
|
|
|
221
221
|
result_frame = FlowFrame(
|
|
222
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
223
|
-
flow_graph=flow_graph,
|
|
224
|
-
node_id=node_id
|
|
222
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
225
223
|
)
|
|
226
224
|
flow_graph.get_node(1)
|
|
227
225
|
return result_frame
|
|
@@ -258,13 +256,15 @@ def read_csv(
|
|
|
258
256
|
glob=glob,
|
|
259
257
|
cache=cache,
|
|
260
258
|
with_column_names=with_column_names,
|
|
261
|
-
**other_options
|
|
259
|
+
**other_options,
|
|
262
260
|
)
|
|
263
261
|
polars_code_node_description = description or "Read CSV with Polars scan_csv"
|
|
264
262
|
if isinstance(source, (str, os.PathLike)):
|
|
265
263
|
polars_code_node_description = description or f"Read CSV with Polars scan_csv from {Path(source).name}"
|
|
266
264
|
elif isinstance(source, list) and source and isinstance(source[0], (str, os.PathLike)):
|
|
267
|
-
polars_code_node_description =
|
|
265
|
+
polars_code_node_description = (
|
|
266
|
+
description or f"Read CSV with Polars scan_csv from {Path(source[0]).name} (and possibly others)"
|
|
267
|
+
)
|
|
268
268
|
|
|
269
269
|
# Assuming input_schema.NodePolarsCode, transform_schema.PolarsCodeInput are defined
|
|
270
270
|
polars_code_settings = input_schema.NodePolarsCode(
|
|
@@ -272,7 +272,7 @@ def read_csv(
|
|
|
272
272
|
node_id=node_id,
|
|
273
273
|
polars_code_input=transform_schema.PolarsCodeInput(polars_code=polars_code),
|
|
274
274
|
is_setup=True,
|
|
275
|
-
description=polars_code_node_description
|
|
275
|
+
description=polars_code_node_description,
|
|
276
276
|
)
|
|
277
277
|
flow_graph.add_polars_code(polars_code_settings)
|
|
278
278
|
return FlowFrame(
|
|
@@ -283,28 +283,28 @@ def read_csv(
|
|
|
283
283
|
|
|
284
284
|
|
|
285
285
|
def _build_polars_code_args(
|
|
286
|
-
source:
|
|
286
|
+
source: str | Path | IO[bytes] | bytes | list[str | Path | IO[bytes] | bytes],
|
|
287
287
|
separator: str,
|
|
288
288
|
has_header: bool,
|
|
289
|
-
new_columns:
|
|
290
|
-
comment_prefix:
|
|
291
|
-
quote_char:
|
|
289
|
+
new_columns: list[str] | None,
|
|
290
|
+
comment_prefix: str | None,
|
|
291
|
+
quote_char: str | None,
|
|
292
292
|
skip_rows: int,
|
|
293
293
|
skip_lines: int,
|
|
294
|
-
schema:
|
|
295
|
-
schema_overrides:
|
|
296
|
-
null_values:
|
|
294
|
+
schema: SchemaDict | None,
|
|
295
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None,
|
|
296
|
+
null_values: str | list[str] | dict[str, str] | None,
|
|
297
297
|
missing_utf8_is_empty_string: bool,
|
|
298
298
|
ignore_errors: bool,
|
|
299
299
|
try_parse_dates: bool,
|
|
300
|
-
infer_schema_length:
|
|
301
|
-
n_rows:
|
|
300
|
+
infer_schema_length: int | None,
|
|
301
|
+
n_rows: int | None,
|
|
302
302
|
encoding: CsvEncoding,
|
|
303
303
|
low_memory: bool,
|
|
304
304
|
rechunk: bool,
|
|
305
|
-
storage_options:
|
|
305
|
+
storage_options: dict[str, Any] | None,
|
|
306
306
|
skip_rows_after_header: int,
|
|
307
|
-
row_index_name:
|
|
307
|
+
row_index_name: str | None,
|
|
308
308
|
row_index_offset: int,
|
|
309
309
|
eol_char: str,
|
|
310
310
|
raise_if_empty: bool,
|
|
@@ -312,8 +312,8 @@ def _build_polars_code_args(
|
|
|
312
312
|
decimal_comma: bool,
|
|
313
313
|
glob: bool,
|
|
314
314
|
cache: bool,
|
|
315
|
-
with_column_names:
|
|
316
|
-
**other_options: Any
|
|
315
|
+
with_column_names: Callable[[list[str]], list[str]] | None,
|
|
316
|
+
**other_options: Any,
|
|
317
317
|
) -> str:
|
|
318
318
|
source_repr: str
|
|
319
319
|
if isinstance(source, (str, Path)):
|
|
@@ -322,41 +322,41 @@ def _build_polars_code_args(
|
|
|
322
322
|
source_repr = repr([str(p) for p in source])
|
|
323
323
|
elif isinstance(source, bytes):
|
|
324
324
|
source_repr = "source_bytes_obj"
|
|
325
|
-
elif hasattr(source,
|
|
325
|
+
elif hasattr(source, "read"):
|
|
326
326
|
source_repr = "source_file_like_obj"
|
|
327
327
|
else:
|
|
328
328
|
source_repr = repr(source)
|
|
329
329
|
|
|
330
330
|
param_mapping = {
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
331
|
+
"has_header": (True, lambda x: str(x)),
|
|
332
|
+
"separator": (",", lambda x: repr(str(x))),
|
|
333
|
+
"comment_prefix": (None, lambda x: repr(str(x)) if x is not None else "None"),
|
|
334
|
+
"quote_char": ('"', lambda x: repr(str(x)) if x is not None else "None"),
|
|
335
|
+
"skip_rows": (0, str),
|
|
336
|
+
"skip_lines": (0, str),
|
|
337
|
+
"schema": (None, lambda x: repr(x) if x is not None else "None"),
|
|
338
|
+
"schema_overrides": (None, lambda x: repr(x) if x is not None else "None"),
|
|
339
|
+
"null_values": (None, lambda x: repr(x) if x is not None else "None"),
|
|
340
|
+
"missing_utf8_is_empty_string": (False, str),
|
|
341
|
+
"ignore_errors": (False, str),
|
|
342
|
+
"cache": (True, str),
|
|
343
|
+
"with_column_names": (None, lambda x: repr(x) if x is not None else "None"),
|
|
344
|
+
"infer_schema_length": (100, lambda x: str(x) if x is not None else "None"),
|
|
345
|
+
"n_rows": (None, lambda x: str(x) if x is not None else "None"),
|
|
346
|
+
"encoding": ("utf8", lambda x: repr(str(x))),
|
|
347
|
+
"low_memory": (False, str),
|
|
348
|
+
"rechunk": (False, str),
|
|
349
|
+
"skip_rows_after_header": (0, str),
|
|
350
|
+
"row_index_name": (None, lambda x: repr(str(x)) if x is not None else "None"),
|
|
351
|
+
"row_index_offset": (0, str),
|
|
352
|
+
"try_parse_dates": (False, str),
|
|
353
|
+
"eol_char": ("\n", lambda x: repr(str(x))),
|
|
354
|
+
"new_columns": (None, lambda x: repr(x) if x is not None else "None"),
|
|
355
|
+
"raise_if_empty": (True, str),
|
|
356
|
+
"truncate_ragged_lines": (False, str),
|
|
357
|
+
"decimal_comma": (False, str),
|
|
358
|
+
"glob": (True, str),
|
|
359
|
+
"storage_options": (None, lambda x: repr(x) if x is not None else "None"),
|
|
360
360
|
}
|
|
361
361
|
|
|
362
362
|
all_vars = locals()
|
|
@@ -381,8 +381,9 @@ def _build_polars_code_args(
|
|
|
381
381
|
return polars_code
|
|
382
382
|
|
|
383
383
|
|
|
384
|
-
def read_parquet(
|
|
385
|
-
|
|
384
|
+
def read_parquet(
|
|
385
|
+
source, *, flow_graph: FlowGraph = None, description: str = None, convert_to_absolute_path: bool = True, **options
|
|
386
|
+
) -> FlowFrame:
|
|
386
387
|
"""
|
|
387
388
|
Read a Parquet file into a FlowFrame.
|
|
388
389
|
|
|
@@ -396,7 +397,7 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
396
397
|
Returns:
|
|
397
398
|
A FlowFrame with the Parquet data
|
|
398
399
|
"""
|
|
399
|
-
if
|
|
400
|
+
if "~" in source:
|
|
400
401
|
file_path = os.path.expanduser(source)
|
|
401
402
|
node_id = generate_node_id()
|
|
402
403
|
|
|
@@ -406,10 +407,7 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
406
407
|
flow_id = flow_graph.flow_id
|
|
407
408
|
|
|
408
409
|
received_table = input_schema.ReceivedTable(
|
|
409
|
-
file_type=
|
|
410
|
-
path=source,
|
|
411
|
-
name=Path(source).name,
|
|
412
|
-
table_settings=input_schema.InputParquetTable()
|
|
410
|
+
file_type="parquet", path=source, name=Path(source).name, table_settings=input_schema.InputParquetTable()
|
|
413
411
|
)
|
|
414
412
|
if convert_to_absolute_path:
|
|
415
413
|
received_table.path = received_table.abs_file_path
|
|
@@ -421,15 +419,13 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
421
419
|
pos_x=100,
|
|
422
420
|
pos_y=100,
|
|
423
421
|
is_setup=True,
|
|
424
|
-
description=description
|
|
422
|
+
description=description,
|
|
425
423
|
)
|
|
426
424
|
|
|
427
425
|
flow_graph.add_read(read_node)
|
|
428
426
|
|
|
429
427
|
return FlowFrame(
|
|
430
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
431
|
-
flow_graph=flow_graph,
|
|
432
|
-
node_id=node_id
|
|
428
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
433
429
|
)
|
|
434
430
|
|
|
435
431
|
|
|
@@ -458,7 +454,7 @@ def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) ->
|
|
|
458
454
|
pos_x=100,
|
|
459
455
|
pos_y=100,
|
|
460
456
|
is_setup=True,
|
|
461
|
-
description=description
|
|
457
|
+
description=description,
|
|
462
458
|
)
|
|
463
459
|
|
|
464
460
|
# Add to graph
|
|
@@ -466,17 +462,17 @@ def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) ->
|
|
|
466
462
|
|
|
467
463
|
# Return new frame
|
|
468
464
|
return FlowFrame(
|
|
469
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
470
|
-
flow_graph=flow_graph,
|
|
471
|
-
node_id=node_id
|
|
465
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
472
466
|
)
|
|
473
467
|
|
|
474
468
|
|
|
475
|
-
def concat(
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
469
|
+
def concat(
|
|
470
|
+
frames: list["FlowFrame"],
|
|
471
|
+
how: str = "vertical",
|
|
472
|
+
rechunk: bool = False,
|
|
473
|
+
parallel: bool = True,
|
|
474
|
+
description: str = None,
|
|
475
|
+
) -> "FlowFrame":
|
|
480
476
|
"""
|
|
481
477
|
Concatenate multiple FlowFrames into one.
|
|
482
478
|
|
|
@@ -506,48 +502,46 @@ def concat(frames: List['FlowFrame'],
|
|
|
506
502
|
first_frame = frames[0]
|
|
507
503
|
remaining_frames = frames[1:]
|
|
508
504
|
|
|
509
|
-
return first_frame.concat(remaining_frames, how=how,
|
|
510
|
-
rechunk=rechunk, parallel=parallel,
|
|
511
|
-
description=description)
|
|
505
|
+
return first_frame.concat(remaining_frames, how=how, rechunk=rechunk, parallel=parallel, description=description)
|
|
512
506
|
|
|
513
507
|
|
|
514
508
|
def scan_csv(
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
509
|
+
source: str | Path | IO[bytes] | bytes | list[str | Path | IO[bytes] | bytes],
|
|
510
|
+
*,
|
|
511
|
+
flow_graph: Any | None = None, # Using Any for FlowGraph placeholder
|
|
512
|
+
separator: str = ",",
|
|
513
|
+
convert_to_absolute_path: bool = True,
|
|
514
|
+
description: str | None = None,
|
|
515
|
+
has_header: bool = True,
|
|
516
|
+
new_columns: list[str] | None = None,
|
|
517
|
+
comment_prefix: str | None = None,
|
|
518
|
+
quote_char: str | None = '"',
|
|
519
|
+
skip_rows: int = 0,
|
|
520
|
+
skip_lines: int = 0,
|
|
521
|
+
schema: SchemaDict | None = None,
|
|
522
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None = None,
|
|
523
|
+
null_values: str | list[str] | dict[str, str] | None = None,
|
|
524
|
+
missing_utf8_is_empty_string: bool = False,
|
|
525
|
+
ignore_errors: bool = False,
|
|
526
|
+
try_parse_dates: bool = False,
|
|
527
|
+
infer_schema: bool = True,
|
|
528
|
+
infer_schema_length: int | None = 100,
|
|
529
|
+
n_rows: int | None = None,
|
|
530
|
+
encoding: CsvEncoding = "utf8",
|
|
531
|
+
low_memory: bool = False,
|
|
532
|
+
rechunk: bool = False,
|
|
533
|
+
storage_options: dict[str, Any] | None = None,
|
|
534
|
+
skip_rows_after_header: int = 0,
|
|
535
|
+
row_index_name: str | None = None,
|
|
536
|
+
row_index_offset: int = 0,
|
|
537
|
+
eol_char: str = "\n",
|
|
538
|
+
raise_if_empty: bool = True,
|
|
539
|
+
truncate_ragged_lines: bool = False,
|
|
540
|
+
decimal_comma: bool = False,
|
|
541
|
+
glob: bool = True,
|
|
542
|
+
cache: bool = True,
|
|
543
|
+
with_column_names: Callable[[list[str]], list[str]] | None = None,
|
|
544
|
+
**other_options: Any,
|
|
551
545
|
) -> FlowFrame:
|
|
552
546
|
"""
|
|
553
547
|
Scan a CSV file into a FlowFrame. This function is an alias for read_csv.
|
|
@@ -592,17 +586,12 @@ def scan_csv(
|
|
|
592
586
|
glob=glob,
|
|
593
587
|
cache=cache,
|
|
594
588
|
with_column_names=with_column_names,
|
|
595
|
-
**other_options
|
|
589
|
+
**other_options,
|
|
596
590
|
)
|
|
597
591
|
|
|
598
592
|
|
|
599
593
|
def scan_parquet(
|
|
600
|
-
|
|
601
|
-
*,
|
|
602
|
-
flow_graph: FlowGraph = None,
|
|
603
|
-
description: str = None,
|
|
604
|
-
convert_to_absolute_path: bool = True,
|
|
605
|
-
**options
|
|
594
|
+
source, *, flow_graph: FlowGraph = None, description: str = None, convert_to_absolute_path: bool = True, **options
|
|
606
595
|
) -> FlowFrame:
|
|
607
596
|
"""
|
|
608
597
|
Scan a Parquet file into a FlowFrame. This function is an alias for read_parquet.
|
|
@@ -617,17 +606,17 @@ def scan_parquet(
|
|
|
617
606
|
flow_graph=flow_graph,
|
|
618
607
|
description=description,
|
|
619
608
|
convert_to_absolute_path=convert_to_absolute_path,
|
|
620
|
-
**options
|
|
609
|
+
**options,
|
|
621
610
|
)
|
|
622
611
|
|
|
623
612
|
|
|
624
613
|
def scan_parquet_from_cloud_storage(
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
614
|
+
source: str,
|
|
615
|
+
*,
|
|
616
|
+
flow_graph: FlowGraph | None = None,
|
|
617
|
+
connection_name: str | None = None,
|
|
618
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
619
|
+
description: str | None = None,
|
|
631
620
|
) -> FlowFrame:
|
|
632
621
|
node_id = generate_node_id()
|
|
633
622
|
|
|
@@ -644,29 +633,28 @@ def scan_parquet_from_cloud_storage(
|
|
|
644
633
|
settings = input_schema.NodeCloudStorageReader(
|
|
645
634
|
flow_id=flow_id,
|
|
646
635
|
node_id=node_id,
|
|
647
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
file_format="parquet"),
|
|
636
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
637
|
+
resource_path=source, scan_mode=scan_mode, connection_name=connection_name, file_format="parquet"
|
|
638
|
+
),
|
|
651
639
|
user_id=get_current_user_id(),
|
|
652
|
-
description=description
|
|
640
|
+
description=description,
|
|
641
|
+
)
|
|
653
642
|
flow_graph.add_cloud_storage_reader(settings)
|
|
654
643
|
return FlowFrame(
|
|
655
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
656
|
-
flow_graph=flow_graph,
|
|
657
|
-
node_id=node_id
|
|
644
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
658
645
|
)
|
|
659
646
|
|
|
660
647
|
|
|
661
648
|
def scan_csv_from_cloud_storage(
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
649
|
+
source: str,
|
|
650
|
+
*,
|
|
651
|
+
flow_graph: FlowGraph | None = None,
|
|
652
|
+
connection_name: str | None = None,
|
|
653
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
654
|
+
delimiter: str = ";",
|
|
655
|
+
has_header: bool | None = True,
|
|
656
|
+
encoding: CsvEncoding | None = "utf8",
|
|
657
|
+
) -> FlowFrame:
|
|
670
658
|
node_id = generate_node_id()
|
|
671
659
|
|
|
672
660
|
if scan_mode is None:
|
|
@@ -681,28 +669,26 @@ def scan_csv_from_cloud_storage(
|
|
|
681
669
|
settings = input_schema.NodeCloudStorageReader(
|
|
682
670
|
flow_id=flow_id,
|
|
683
671
|
node_id=node_id,
|
|
684
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
672
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
673
|
+
resource_path=source,
|
|
674
|
+
scan_mode=scan_mode,
|
|
675
|
+
connection_name=connection_name,
|
|
676
|
+
csv_delimiter=delimiter,
|
|
677
|
+
csv_encoding=encoding,
|
|
678
|
+
csv_has_header=has_header,
|
|
679
|
+
file_format="csv",
|
|
680
|
+
),
|
|
681
|
+
user_id=get_current_user_id(),
|
|
682
|
+
)
|
|
692
683
|
flow_graph.add_cloud_storage_reader(settings)
|
|
693
684
|
return FlowFrame(
|
|
694
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
695
|
-
flow_graph=flow_graph,
|
|
696
|
-
node_id=node_id
|
|
685
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
697
686
|
)
|
|
698
687
|
|
|
699
688
|
|
|
700
689
|
def scan_delta(
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
flow_graph: Optional[FlowGraph] = None,
|
|
704
|
-
connection_name: Optional[str] = None,
|
|
705
|
-
version: int = None) -> FlowFrame:
|
|
690
|
+
source: str, *, flow_graph: FlowGraph | None = None, connection_name: str | None = None, version: int = None
|
|
691
|
+
) -> FlowFrame:
|
|
706
692
|
node_id = generate_node_id()
|
|
707
693
|
if flow_graph is None:
|
|
708
694
|
flow_graph = create_flow_graph()
|
|
@@ -710,25 +696,23 @@ def scan_delta(
|
|
|
710
696
|
settings = input_schema.NodeCloudStorageReader(
|
|
711
697
|
flow_id=flow_id,
|
|
712
698
|
node_id=node_id,
|
|
713
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
699
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
700
|
+
resource_path=source, connection_name=connection_name, file_format="delta", delta_version=version
|
|
701
|
+
),
|
|
702
|
+
user_id=get_current_user_id(),
|
|
703
|
+
)
|
|
718
704
|
flow_graph.add_cloud_storage_reader(settings)
|
|
719
705
|
return FlowFrame(
|
|
720
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
721
|
-
flow_graph=flow_graph,
|
|
722
|
-
node_id=node_id
|
|
706
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
723
707
|
)
|
|
724
708
|
|
|
725
709
|
|
|
726
710
|
def scan_json_from_cloud_storage(
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
711
|
+
source: str,
|
|
712
|
+
*,
|
|
713
|
+
flow_graph: FlowGraph | None = None,
|
|
714
|
+
connection_name: str | None = None,
|
|
715
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
732
716
|
) -> FlowFrame:
|
|
733
717
|
node_id = generate_node_id()
|
|
734
718
|
|
|
@@ -744,15 +728,12 @@ def scan_json_from_cloud_storage(
|
|
|
744
728
|
settings = input_schema.NodeCloudStorageReader(
|
|
745
729
|
flow_id=flow_id,
|
|
746
730
|
node_id=node_id,
|
|
747
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
731
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
732
|
+
resource_path=source, scan_mode=scan_mode, connection_name=connection_name, file_format="json"
|
|
733
|
+
),
|
|
734
|
+
user_id=get_current_user_id(),
|
|
735
|
+
)
|
|
752
736
|
flow_graph.add_cloud_storage_reader(settings)
|
|
753
737
|
return FlowFrame(
|
|
754
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
755
|
-
flow_graph=flow_graph,
|
|
756
|
-
node_id=node_id
|
|
738
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
757
739
|
)
|
|
758
|
-
|