Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +179 -73
- flowfile/__main__.py +10 -7
- flowfile/api.py +52 -59
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
- flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
- flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
- flowfile-0.5.3.dist-info/RECORD +402 -0
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +13 -3
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +391 -279
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
- flowfile_core/flowfile/flow_graph.py +1011 -561
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +332 -232
- flowfile_core/flowfile/flow_node/models.py +54 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +82 -32
- flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
- flowfile_core/flowfile/manage/io_flowfile.py +391 -0
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +136 -35
- flowfile_core/flowfile/schema_callbacks.py +77 -54
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +72 -55
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +77 -43
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -55
- flowfile_core/schemas/input_schema.py +398 -154
- flowfile_core/schemas/output_model.py +50 -35
- flowfile_core/schemas/schemas.py +207 -67
- flowfile_core/schemas/transform_schema.py +1360 -435
- flowfile_core/schemas/yaml_types.py +117 -0
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +107 -50
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +581 -489
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +236 -252
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -4
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -27
- flowfile_worker/create/funcs.py +143 -94
- flowfile_worker/create/models.py +139 -68
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -93
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +118 -0
- tools/migrate/legacy_schemas.py +682 -0
- tools/migrate/migrate.py +610 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +21 -0
- tools/migrate/tests/test_migrate.py +622 -0
- tools/migrate/tests/test_migration_e2e.py +1009 -0
- tools/migrate/tests/test_node_migrations.py +843 -0
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-812dcbca.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-538058f3.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.4.1.dist-info/RECORD +0 -376
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os
|
|
3
|
+
from collections.abc import Callable
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Literal
|
|
5
6
|
|
|
6
7
|
import polars as pl
|
|
7
|
-
from polars._typing import
|
|
8
|
-
Sequence, CsvEncoding)
|
|
8
|
+
from polars._typing import IO, CsvEncoding, PolarsDataType, SchemaDict, Sequence
|
|
9
9
|
|
|
10
10
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
11
11
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
12
|
-
from flowfile_core.schemas import input_schema, transform_schema
|
|
12
|
+
from flowfile_core.schemas import cloud_storage_schemas, input_schema, transform_schema
|
|
13
|
+
from flowfile_frame.cloud_storage.secret_manager import get_current_user_id
|
|
13
14
|
from flowfile_frame.config import logger
|
|
14
15
|
from flowfile_frame.expr import col
|
|
15
16
|
from flowfile_frame.flow_frame import FlowFrame
|
|
16
|
-
from flowfile_frame.utils import create_flow_graph
|
|
17
|
-
|
|
18
|
-
from flowfile_frame.utils import generate_node_id
|
|
17
|
+
from flowfile_frame.utils import create_flow_graph, generate_node_id
|
|
18
|
+
|
|
19
19
|
|
|
20
20
|
def sum(expr):
|
|
21
21
|
"""Sum aggregation function."""
|
|
@@ -53,42 +53,42 @@ def count(expr):
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
def read_csv(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
56
|
+
source: str | Path | IO[bytes] | bytes | list[str | Path | IO[bytes] | bytes],
|
|
57
|
+
*,
|
|
58
|
+
flow_graph: Any | None = None, # Using Any for FlowGraph placeholder
|
|
59
|
+
separator: str = ",",
|
|
60
|
+
convert_to_absolute_path: bool = True,
|
|
61
|
+
description: str | None = None,
|
|
62
|
+
has_header: bool = True,
|
|
63
|
+
new_columns: list[str] | None = None,
|
|
64
|
+
comment_prefix: str | None = None,
|
|
65
|
+
quote_char: str | None = '"',
|
|
66
|
+
skip_rows: int = 0,
|
|
67
|
+
skip_lines: int = 0,
|
|
68
|
+
schema: SchemaDict | None = None,
|
|
69
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None = None,
|
|
70
|
+
null_values: str | list[str] | dict[str, str] | None = None,
|
|
71
|
+
missing_utf8_is_empty_string: bool = False,
|
|
72
|
+
ignore_errors: bool = False,
|
|
73
|
+
try_parse_dates: bool = False,
|
|
74
|
+
infer_schema: bool = True,
|
|
75
|
+
infer_schema_length: int | None = 100,
|
|
76
|
+
n_rows: int | None = None,
|
|
77
|
+
encoding: CsvEncoding = "utf8",
|
|
78
|
+
low_memory: bool = False,
|
|
79
|
+
rechunk: bool = False,
|
|
80
|
+
storage_options: dict[str, Any] | None = None,
|
|
81
|
+
skip_rows_after_header: int = 0,
|
|
82
|
+
row_index_name: str | None = None,
|
|
83
|
+
row_index_offset: int = 0,
|
|
84
|
+
eol_char: str = "\n",
|
|
85
|
+
raise_if_empty: bool = True,
|
|
86
|
+
truncate_ragged_lines: bool = False,
|
|
87
|
+
decimal_comma: bool = False,
|
|
88
|
+
glob: bool = True,
|
|
89
|
+
cache: bool = True,
|
|
90
|
+
with_column_names: Callable[[list[str]], list[str]] | None = None,
|
|
91
|
+
**other_options: Any,
|
|
92
92
|
) -> FlowFrame:
|
|
93
93
|
"""
|
|
94
94
|
Read a CSV file into a FlowFrame.
|
|
@@ -146,55 +146,57 @@ def read_csv(
|
|
|
146
146
|
current_source_path_for_native = None
|
|
147
147
|
if isinstance(source, (str, os.PathLike)):
|
|
148
148
|
current_source_path_for_native = str(source)
|
|
149
|
-
if
|
|
149
|
+
if "~" in current_source_path_for_native:
|
|
150
150
|
current_source_path_for_native = os.path.expanduser(current_source_path_for_native)
|
|
151
151
|
elif isinstance(source, list) and all(isinstance(s, (str, os.PathLike)) for s in source):
|
|
152
152
|
current_source_path_for_native = str(source[0]) if source else None
|
|
153
|
-
if current_source_path_for_native and
|
|
154
|
-
|
|
153
|
+
if current_source_path_for_native and "~" in current_source_path_for_native:
|
|
154
|
+
current_source_path_for_native = os.path.expanduser(current_source_path_for_native)
|
|
155
155
|
elif isinstance(source, (io.BytesIO, io.StringIO)):
|
|
156
156
|
logger.warning("Read from bytes io from csv not supported, converting data to raw data")
|
|
157
157
|
return from_dict(pl.read_csv(source), flow_graph=flow_graph, description=description)
|
|
158
|
-
actual_infer_schema_length:
|
|
158
|
+
actual_infer_schema_length: int | None
|
|
159
159
|
if not infer_schema:
|
|
160
160
|
actual_infer_schema_length = 0
|
|
161
161
|
else:
|
|
162
162
|
actual_infer_schema_length = infer_schema_length
|
|
163
163
|
can_use_native = (
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
164
|
+
current_source_path_for_native is not None
|
|
165
|
+
and comment_prefix is None
|
|
166
|
+
and skip_lines == 0
|
|
167
|
+
and schema is None
|
|
168
|
+
and schema_overrides is None
|
|
169
|
+
and null_values is None
|
|
170
|
+
and not missing_utf8_is_empty_string
|
|
171
|
+
and not try_parse_dates
|
|
172
|
+
and n_rows is None
|
|
173
|
+
and not low_memory
|
|
174
|
+
and not rechunk
|
|
175
|
+
and storage_options is None
|
|
176
|
+
and skip_rows_after_header == 0
|
|
177
|
+
and row_index_name is None
|
|
178
|
+
and row_index_offset == 0
|
|
179
|
+
and eol_char == "\n"
|
|
180
|
+
and not decimal_comma
|
|
181
|
+
and new_columns is None
|
|
182
|
+
and glob is True
|
|
183
183
|
)
|
|
184
184
|
if can_use_native and current_source_path_for_native:
|
|
185
185
|
received_table = input_schema.ReceivedTable(
|
|
186
|
-
file_type=
|
|
186
|
+
file_type="csv",
|
|
187
187
|
path=current_source_path_for_native,
|
|
188
188
|
name=Path(current_source_path_for_native).name,
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
189
|
+
table_settings=input_schema.InputCsvTable(
|
|
190
|
+
delimiter=separator,
|
|
191
|
+
has_headers=has_header,
|
|
192
|
+
encoding=encoding,
|
|
193
|
+
starting_from_line=skip_rows,
|
|
194
|
+
quote_char=quote_char if quote_char is not None else '"',
|
|
195
|
+
infer_schema_length=actual_infer_schema_length if actual_infer_schema_length is not None else 10000,
|
|
196
|
+
truncate_ragged_lines=truncate_ragged_lines,
|
|
197
|
+
ignore_errors=ignore_errors,
|
|
198
|
+
row_delimiter=eol_char,
|
|
199
|
+
),
|
|
198
200
|
)
|
|
199
201
|
if convert_to_absolute_path:
|
|
200
202
|
try:
|
|
@@ -211,15 +213,13 @@ def read_csv(
|
|
|
211
213
|
pos_x=100,
|
|
212
214
|
pos_y=100,
|
|
213
215
|
is_setup=True,
|
|
214
|
-
description=read_node_description
|
|
216
|
+
description=read_node_description,
|
|
215
217
|
)
|
|
216
218
|
flow_graph.add_read(read_node)
|
|
217
219
|
flow_graph.get_node(1)
|
|
218
220
|
|
|
219
221
|
result_frame = FlowFrame(
|
|
220
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
221
|
-
flow_graph=flow_graph,
|
|
222
|
-
node_id=node_id
|
|
222
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
223
223
|
)
|
|
224
224
|
flow_graph.get_node(1)
|
|
225
225
|
return result_frame
|
|
@@ -256,13 +256,15 @@ def read_csv(
|
|
|
256
256
|
glob=glob,
|
|
257
257
|
cache=cache,
|
|
258
258
|
with_column_names=with_column_names,
|
|
259
|
-
**other_options
|
|
259
|
+
**other_options,
|
|
260
260
|
)
|
|
261
261
|
polars_code_node_description = description or "Read CSV with Polars scan_csv"
|
|
262
262
|
if isinstance(source, (str, os.PathLike)):
|
|
263
263
|
polars_code_node_description = description or f"Read CSV with Polars scan_csv from {Path(source).name}"
|
|
264
264
|
elif isinstance(source, list) and source and isinstance(source[0], (str, os.PathLike)):
|
|
265
|
-
polars_code_node_description =
|
|
265
|
+
polars_code_node_description = (
|
|
266
|
+
description or f"Read CSV with Polars scan_csv from {Path(source[0]).name} (and possibly others)"
|
|
267
|
+
)
|
|
266
268
|
|
|
267
269
|
# Assuming input_schema.NodePolarsCode, transform_schema.PolarsCodeInput are defined
|
|
268
270
|
polars_code_settings = input_schema.NodePolarsCode(
|
|
@@ -270,7 +272,7 @@ def read_csv(
|
|
|
270
272
|
node_id=node_id,
|
|
271
273
|
polars_code_input=transform_schema.PolarsCodeInput(polars_code=polars_code),
|
|
272
274
|
is_setup=True,
|
|
273
|
-
description=polars_code_node_description
|
|
275
|
+
description=polars_code_node_description,
|
|
274
276
|
)
|
|
275
277
|
flow_graph.add_polars_code(polars_code_settings)
|
|
276
278
|
return FlowFrame(
|
|
@@ -281,28 +283,28 @@ def read_csv(
|
|
|
281
283
|
|
|
282
284
|
|
|
283
285
|
def _build_polars_code_args(
|
|
284
|
-
source:
|
|
286
|
+
source: str | Path | IO[bytes] | bytes | list[str | Path | IO[bytes] | bytes],
|
|
285
287
|
separator: str,
|
|
286
288
|
has_header: bool,
|
|
287
|
-
new_columns:
|
|
288
|
-
comment_prefix:
|
|
289
|
-
quote_char:
|
|
289
|
+
new_columns: list[str] | None,
|
|
290
|
+
comment_prefix: str | None,
|
|
291
|
+
quote_char: str | None,
|
|
290
292
|
skip_rows: int,
|
|
291
293
|
skip_lines: int,
|
|
292
|
-
schema:
|
|
293
|
-
schema_overrides:
|
|
294
|
-
null_values:
|
|
294
|
+
schema: SchemaDict | None,
|
|
295
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None,
|
|
296
|
+
null_values: str | list[str] | dict[str, str] | None,
|
|
295
297
|
missing_utf8_is_empty_string: bool,
|
|
296
298
|
ignore_errors: bool,
|
|
297
299
|
try_parse_dates: bool,
|
|
298
|
-
infer_schema_length:
|
|
299
|
-
n_rows:
|
|
300
|
+
infer_schema_length: int | None,
|
|
301
|
+
n_rows: int | None,
|
|
300
302
|
encoding: CsvEncoding,
|
|
301
303
|
low_memory: bool,
|
|
302
304
|
rechunk: bool,
|
|
303
|
-
storage_options:
|
|
305
|
+
storage_options: dict[str, Any] | None,
|
|
304
306
|
skip_rows_after_header: int,
|
|
305
|
-
row_index_name:
|
|
307
|
+
row_index_name: str | None,
|
|
306
308
|
row_index_offset: int,
|
|
307
309
|
eol_char: str,
|
|
308
310
|
raise_if_empty: bool,
|
|
@@ -310,8 +312,8 @@ def _build_polars_code_args(
|
|
|
310
312
|
decimal_comma: bool,
|
|
311
313
|
glob: bool,
|
|
312
314
|
cache: bool,
|
|
313
|
-
with_column_names:
|
|
314
|
-
**other_options: Any
|
|
315
|
+
with_column_names: Callable[[list[str]], list[str]] | None,
|
|
316
|
+
**other_options: Any,
|
|
315
317
|
) -> str:
|
|
316
318
|
source_repr: str
|
|
317
319
|
if isinstance(source, (str, Path)):
|
|
@@ -320,41 +322,41 @@ def _build_polars_code_args(
|
|
|
320
322
|
source_repr = repr([str(p) for p in source])
|
|
321
323
|
elif isinstance(source, bytes):
|
|
322
324
|
source_repr = "source_bytes_obj"
|
|
323
|
-
elif hasattr(source,
|
|
325
|
+
elif hasattr(source, "read"):
|
|
324
326
|
source_repr = "source_file_like_obj"
|
|
325
327
|
else:
|
|
326
328
|
source_repr = repr(source)
|
|
327
329
|
|
|
328
330
|
param_mapping = {
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
331
|
+
"has_header": (True, lambda x: str(x)),
|
|
332
|
+
"separator": (",", lambda x: repr(str(x))),
|
|
333
|
+
"comment_prefix": (None, lambda x: repr(str(x)) if x is not None else "None"),
|
|
334
|
+
"quote_char": ('"', lambda x: repr(str(x)) if x is not None else "None"),
|
|
335
|
+
"skip_rows": (0, str),
|
|
336
|
+
"skip_lines": (0, str),
|
|
337
|
+
"schema": (None, lambda x: repr(x) if x is not None else "None"),
|
|
338
|
+
"schema_overrides": (None, lambda x: repr(x) if x is not None else "None"),
|
|
339
|
+
"null_values": (None, lambda x: repr(x) if x is not None else "None"),
|
|
340
|
+
"missing_utf8_is_empty_string": (False, str),
|
|
341
|
+
"ignore_errors": (False, str),
|
|
342
|
+
"cache": (True, str),
|
|
343
|
+
"with_column_names": (None, lambda x: repr(x) if x is not None else "None"),
|
|
344
|
+
"infer_schema_length": (100, lambda x: str(x) if x is not None else "None"),
|
|
345
|
+
"n_rows": (None, lambda x: str(x) if x is not None else "None"),
|
|
346
|
+
"encoding": ("utf8", lambda x: repr(str(x))),
|
|
347
|
+
"low_memory": (False, str),
|
|
348
|
+
"rechunk": (False, str),
|
|
349
|
+
"skip_rows_after_header": (0, str),
|
|
350
|
+
"row_index_name": (None, lambda x: repr(str(x)) if x is not None else "None"),
|
|
351
|
+
"row_index_offset": (0, str),
|
|
352
|
+
"try_parse_dates": (False, str),
|
|
353
|
+
"eol_char": ("\n", lambda x: repr(str(x))),
|
|
354
|
+
"new_columns": (None, lambda x: repr(x) if x is not None else "None"),
|
|
355
|
+
"raise_if_empty": (True, str),
|
|
356
|
+
"truncate_ragged_lines": (False, str),
|
|
357
|
+
"decimal_comma": (False, str),
|
|
358
|
+
"glob": (True, str),
|
|
359
|
+
"storage_options": (None, lambda x: repr(x) if x is not None else "None"),
|
|
358
360
|
}
|
|
359
361
|
|
|
360
362
|
all_vars = locals()
|
|
@@ -379,8 +381,9 @@ def _build_polars_code_args(
|
|
|
379
381
|
return polars_code
|
|
380
382
|
|
|
381
383
|
|
|
382
|
-
def read_parquet(
|
|
383
|
-
|
|
384
|
+
def read_parquet(
|
|
385
|
+
source, *, flow_graph: FlowGraph = None, description: str = None, convert_to_absolute_path: bool = True, **options
|
|
386
|
+
) -> FlowFrame:
|
|
384
387
|
"""
|
|
385
388
|
Read a Parquet file into a FlowFrame.
|
|
386
389
|
|
|
@@ -394,7 +397,7 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
394
397
|
Returns:
|
|
395
398
|
A FlowFrame with the Parquet data
|
|
396
399
|
"""
|
|
397
|
-
if
|
|
400
|
+
if "~" in source:
|
|
398
401
|
file_path = os.path.expanduser(source)
|
|
399
402
|
node_id = generate_node_id()
|
|
400
403
|
|
|
@@ -404,9 +407,7 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
404
407
|
flow_id = flow_graph.flow_id
|
|
405
408
|
|
|
406
409
|
received_table = input_schema.ReceivedTable(
|
|
407
|
-
file_type=
|
|
408
|
-
path=source,
|
|
409
|
-
name=Path(source).name,
|
|
410
|
+
file_type="parquet", path=source, name=Path(source).name, table_settings=input_schema.InputParquetTable()
|
|
410
411
|
)
|
|
411
412
|
if convert_to_absolute_path:
|
|
412
413
|
received_table.path = received_table.abs_file_path
|
|
@@ -418,15 +419,13 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
|
|
|
418
419
|
pos_x=100,
|
|
419
420
|
pos_y=100,
|
|
420
421
|
is_setup=True,
|
|
421
|
-
description=description
|
|
422
|
+
description=description,
|
|
422
423
|
)
|
|
423
424
|
|
|
424
425
|
flow_graph.add_read(read_node)
|
|
425
426
|
|
|
426
427
|
return FlowFrame(
|
|
427
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
428
|
-
flow_graph=flow_graph,
|
|
429
|
-
node_id=node_id
|
|
428
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
430
429
|
)
|
|
431
430
|
|
|
432
431
|
|
|
@@ -455,7 +454,7 @@ def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) ->
|
|
|
455
454
|
pos_x=100,
|
|
456
455
|
pos_y=100,
|
|
457
456
|
is_setup=True,
|
|
458
|
-
description=description
|
|
457
|
+
description=description,
|
|
459
458
|
)
|
|
460
459
|
|
|
461
460
|
# Add to graph
|
|
@@ -463,17 +462,17 @@ def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) ->
|
|
|
463
462
|
|
|
464
463
|
# Return new frame
|
|
465
464
|
return FlowFrame(
|
|
466
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
467
|
-
flow_graph=flow_graph,
|
|
468
|
-
node_id=node_id
|
|
465
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
469
466
|
)
|
|
470
467
|
|
|
471
468
|
|
|
472
|
-
def concat(
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
469
|
+
def concat(
|
|
470
|
+
frames: list["FlowFrame"],
|
|
471
|
+
how: str = "vertical",
|
|
472
|
+
rechunk: bool = False,
|
|
473
|
+
parallel: bool = True,
|
|
474
|
+
description: str = None,
|
|
475
|
+
) -> "FlowFrame":
|
|
477
476
|
"""
|
|
478
477
|
Concatenate multiple FlowFrames into one.
|
|
479
478
|
|
|
@@ -503,48 +502,46 @@ def concat(frames: List['FlowFrame'],
|
|
|
503
502
|
first_frame = frames[0]
|
|
504
503
|
remaining_frames = frames[1:]
|
|
505
504
|
|
|
506
|
-
return first_frame.concat(remaining_frames, how=how,
|
|
507
|
-
rechunk=rechunk, parallel=parallel,
|
|
508
|
-
description=description)
|
|
505
|
+
return first_frame.concat(remaining_frames, how=how, rechunk=rechunk, parallel=parallel, description=description)
|
|
509
506
|
|
|
510
507
|
|
|
511
508
|
def scan_csv(
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
509
|
+
source: str | Path | IO[bytes] | bytes | list[str | Path | IO[bytes] | bytes],
|
|
510
|
+
*,
|
|
511
|
+
flow_graph: Any | None = None, # Using Any for FlowGraph placeholder
|
|
512
|
+
separator: str = ",",
|
|
513
|
+
convert_to_absolute_path: bool = True,
|
|
514
|
+
description: str | None = None,
|
|
515
|
+
has_header: bool = True,
|
|
516
|
+
new_columns: list[str] | None = None,
|
|
517
|
+
comment_prefix: str | None = None,
|
|
518
|
+
quote_char: str | None = '"',
|
|
519
|
+
skip_rows: int = 0,
|
|
520
|
+
skip_lines: int = 0,
|
|
521
|
+
schema: SchemaDict | None = None,
|
|
522
|
+
schema_overrides: SchemaDict | Sequence[PolarsDataType] | None = None,
|
|
523
|
+
null_values: str | list[str] | dict[str, str] | None = None,
|
|
524
|
+
missing_utf8_is_empty_string: bool = False,
|
|
525
|
+
ignore_errors: bool = False,
|
|
526
|
+
try_parse_dates: bool = False,
|
|
527
|
+
infer_schema: bool = True,
|
|
528
|
+
infer_schema_length: int | None = 100,
|
|
529
|
+
n_rows: int | None = None,
|
|
530
|
+
encoding: CsvEncoding = "utf8",
|
|
531
|
+
low_memory: bool = False,
|
|
532
|
+
rechunk: bool = False,
|
|
533
|
+
storage_options: dict[str, Any] | None = None,
|
|
534
|
+
skip_rows_after_header: int = 0,
|
|
535
|
+
row_index_name: str | None = None,
|
|
536
|
+
row_index_offset: int = 0,
|
|
537
|
+
eol_char: str = "\n",
|
|
538
|
+
raise_if_empty: bool = True,
|
|
539
|
+
truncate_ragged_lines: bool = False,
|
|
540
|
+
decimal_comma: bool = False,
|
|
541
|
+
glob: bool = True,
|
|
542
|
+
cache: bool = True,
|
|
543
|
+
with_column_names: Callable[[list[str]], list[str]] | None = None,
|
|
544
|
+
**other_options: Any,
|
|
548
545
|
) -> FlowFrame:
|
|
549
546
|
"""
|
|
550
547
|
Scan a CSV file into a FlowFrame. This function is an alias for read_csv.
|
|
@@ -589,17 +586,12 @@ def scan_csv(
|
|
|
589
586
|
glob=glob,
|
|
590
587
|
cache=cache,
|
|
591
588
|
with_column_names=with_column_names,
|
|
592
|
-
**other_options
|
|
589
|
+
**other_options,
|
|
593
590
|
)
|
|
594
591
|
|
|
595
592
|
|
|
596
593
|
def scan_parquet(
|
|
597
|
-
|
|
598
|
-
*,
|
|
599
|
-
flow_graph: FlowGraph = None,
|
|
600
|
-
description: str = None,
|
|
601
|
-
convert_to_absolute_path: bool = True,
|
|
602
|
-
**options
|
|
594
|
+
source, *, flow_graph: FlowGraph = None, description: str = None, convert_to_absolute_path: bool = True, **options
|
|
603
595
|
) -> FlowFrame:
|
|
604
596
|
"""
|
|
605
597
|
Scan a Parquet file into a FlowFrame. This function is an alias for read_parquet.
|
|
@@ -614,17 +606,17 @@ def scan_parquet(
|
|
|
614
606
|
flow_graph=flow_graph,
|
|
615
607
|
description=description,
|
|
616
608
|
convert_to_absolute_path=convert_to_absolute_path,
|
|
617
|
-
**options
|
|
609
|
+
**options,
|
|
618
610
|
)
|
|
619
611
|
|
|
620
612
|
|
|
621
613
|
def scan_parquet_from_cloud_storage(
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
614
|
+
source: str,
|
|
615
|
+
*,
|
|
616
|
+
flow_graph: FlowGraph | None = None,
|
|
617
|
+
connection_name: str | None = None,
|
|
618
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
619
|
+
description: str | None = None,
|
|
628
620
|
) -> FlowFrame:
|
|
629
621
|
node_id = generate_node_id()
|
|
630
622
|
|
|
@@ -641,29 +633,28 @@ def scan_parquet_from_cloud_storage(
|
|
|
641
633
|
settings = input_schema.NodeCloudStorageReader(
|
|
642
634
|
flow_id=flow_id,
|
|
643
635
|
node_id=node_id,
|
|
644
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
file_format="parquet"),
|
|
636
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
637
|
+
resource_path=source, scan_mode=scan_mode, connection_name=connection_name, file_format="parquet"
|
|
638
|
+
),
|
|
648
639
|
user_id=get_current_user_id(),
|
|
649
|
-
description=description
|
|
640
|
+
description=description,
|
|
641
|
+
)
|
|
650
642
|
flow_graph.add_cloud_storage_reader(settings)
|
|
651
643
|
return FlowFrame(
|
|
652
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
653
|
-
flow_graph=flow_graph,
|
|
654
|
-
node_id=node_id
|
|
644
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
655
645
|
)
|
|
656
646
|
|
|
657
647
|
|
|
658
648
|
def scan_csv_from_cloud_storage(
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
649
|
+
source: str,
|
|
650
|
+
*,
|
|
651
|
+
flow_graph: FlowGraph | None = None,
|
|
652
|
+
connection_name: str | None = None,
|
|
653
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
654
|
+
delimiter: str = ";",
|
|
655
|
+
has_header: bool | None = True,
|
|
656
|
+
encoding: CsvEncoding | None = "utf8",
|
|
657
|
+
) -> FlowFrame:
|
|
667
658
|
node_id = generate_node_id()
|
|
668
659
|
|
|
669
660
|
if scan_mode is None:
|
|
@@ -678,28 +669,26 @@ def scan_csv_from_cloud_storage(
|
|
|
678
669
|
settings = input_schema.NodeCloudStorageReader(
|
|
679
670
|
flow_id=flow_id,
|
|
680
671
|
node_id=node_id,
|
|
681
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
672
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
673
|
+
resource_path=source,
|
|
674
|
+
scan_mode=scan_mode,
|
|
675
|
+
connection_name=connection_name,
|
|
676
|
+
csv_delimiter=delimiter,
|
|
677
|
+
csv_encoding=encoding,
|
|
678
|
+
csv_has_header=has_header,
|
|
679
|
+
file_format="csv",
|
|
680
|
+
),
|
|
681
|
+
user_id=get_current_user_id(),
|
|
682
|
+
)
|
|
689
683
|
flow_graph.add_cloud_storage_reader(settings)
|
|
690
684
|
return FlowFrame(
|
|
691
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
692
|
-
flow_graph=flow_graph,
|
|
693
|
-
node_id=node_id
|
|
685
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
694
686
|
)
|
|
695
687
|
|
|
696
688
|
|
|
697
689
|
def scan_delta(
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
flow_graph: Optional[FlowGraph] = None,
|
|
701
|
-
connection_name: Optional[str] = None,
|
|
702
|
-
version: int = None) -> FlowFrame:
|
|
690
|
+
source: str, *, flow_graph: FlowGraph | None = None, connection_name: str | None = None, version: int = None
|
|
691
|
+
) -> FlowFrame:
|
|
703
692
|
node_id = generate_node_id()
|
|
704
693
|
if flow_graph is None:
|
|
705
694
|
flow_graph = create_flow_graph()
|
|
@@ -707,25 +696,23 @@ def scan_delta(
|
|
|
707
696
|
settings = input_schema.NodeCloudStorageReader(
|
|
708
697
|
flow_id=flow_id,
|
|
709
698
|
node_id=node_id,
|
|
710
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
699
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
700
|
+
resource_path=source, connection_name=connection_name, file_format="delta", delta_version=version
|
|
701
|
+
),
|
|
702
|
+
user_id=get_current_user_id(),
|
|
703
|
+
)
|
|
715
704
|
flow_graph.add_cloud_storage_reader(settings)
|
|
716
705
|
return FlowFrame(
|
|
717
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
718
|
-
flow_graph=flow_graph,
|
|
719
|
-
node_id=node_id
|
|
706
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
720
707
|
)
|
|
721
708
|
|
|
722
709
|
|
|
723
710
|
def scan_json_from_cloud_storage(
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
711
|
+
source: str,
|
|
712
|
+
*,
|
|
713
|
+
flow_graph: FlowGraph | None = None,
|
|
714
|
+
connection_name: str | None = None,
|
|
715
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
729
716
|
) -> FlowFrame:
|
|
730
717
|
node_id = generate_node_id()
|
|
731
718
|
|
|
@@ -741,15 +728,12 @@ def scan_json_from_cloud_storage(
|
|
|
741
728
|
settings = input_schema.NodeCloudStorageReader(
|
|
742
729
|
flow_id=flow_id,
|
|
743
730
|
node_id=node_id,
|
|
744
|
-
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
731
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(
|
|
732
|
+
resource_path=source, scan_mode=scan_mode, connection_name=connection_name, file_format="json"
|
|
733
|
+
),
|
|
734
|
+
user_id=get_current_user_id(),
|
|
735
|
+
)
|
|
749
736
|
flow_graph.add_cloud_storage_reader(settings)
|
|
750
737
|
return FlowFrame(
|
|
751
|
-
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
752
|
-
flow_graph=flow_graph,
|
|
753
|
-
node_id=node_id
|
|
738
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame, flow_graph=flow_graph, node_id=node_id
|
|
754
739
|
)
|
|
755
|
-
|