Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
flowfile_worker/create/funcs.py
CHANGED
|
@@ -1,9 +1,16 @@
|
|
|
1
|
-
import polars as pl
|
|
2
1
|
import os
|
|
3
2
|
|
|
4
|
-
|
|
3
|
+
import polars as pl
|
|
4
|
+
|
|
5
|
+
from flowfile_worker.create.models import (
|
|
6
|
+
InputCsvTable,
|
|
7
|
+
InputExcelTable,
|
|
8
|
+
InputJsonTable,
|
|
9
|
+
InputParquetTable,
|
|
10
|
+
ReceivedTable,
|
|
11
|
+
)
|
|
12
|
+
from flowfile_worker.create.read_excel_tables import df_from_calamine_xlsx, df_from_openpyxl
|
|
5
13
|
from flowfile_worker.create.utils import create_fake_data
|
|
6
|
-
from flowfile_worker.create.read_excel_tables import df_from_openpyxl, df_from_calamine_xlsx
|
|
7
14
|
|
|
8
15
|
|
|
9
16
|
def create_from_path_json(received_table: ReceivedTable):
|
|
@@ -13,42 +20,53 @@ def create_from_path_json(received_table: ReceivedTable):
|
|
|
13
20
|
f = received_table.abs_file_path
|
|
14
21
|
gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
|
|
15
22
|
low_mem = gbs_to_load > 10
|
|
16
|
-
if input_table_settings.encoding.upper() ==
|
|
23
|
+
if input_table_settings.encoding.upper() == "UTF8" or input_table_settings.encoding.upper() == "UTF-8":
|
|
17
24
|
try:
|
|
18
|
-
df = pl.scan_csv(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
df = pl.scan_csv(
|
|
26
|
+
f,
|
|
27
|
+
low_memory=low_mem,
|
|
28
|
+
try_parse_dates=True,
|
|
29
|
+
separator=input_table_settings.delimiter,
|
|
30
|
+
has_header=input_table_settings.has_headers,
|
|
31
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
32
|
+
encoding="utf8",
|
|
33
|
+
infer_schema_length=input_table_settings.infer_schema_length,
|
|
34
|
+
)
|
|
26
35
|
df.head(1).collect()
|
|
27
36
|
return df
|
|
28
37
|
except:
|
|
29
38
|
try:
|
|
30
|
-
df = pl.scan_csv(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
39
|
+
df = pl.scan_csv(
|
|
40
|
+
f,
|
|
41
|
+
low_memory=low_mem,
|
|
42
|
+
separator=input_table_settings.delimiter,
|
|
43
|
+
has_header=input_table_settings.has_headers,
|
|
44
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
45
|
+
encoding="utf8-lossy",
|
|
46
|
+
ignore_errors=True,
|
|
47
|
+
)
|
|
36
48
|
return df
|
|
37
49
|
except:
|
|
38
|
-
df = pl.scan_csv(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
df = pl.scan_csv(
|
|
51
|
+
f,
|
|
52
|
+
low_memory=low_mem,
|
|
53
|
+
separator=input_table_settings.delimiter,
|
|
54
|
+
has_header=input_table_settings.has_headers,
|
|
55
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
56
|
+
encoding="utf8",
|
|
57
|
+
ignore_errors=True,
|
|
58
|
+
)
|
|
44
59
|
return df
|
|
45
60
|
else:
|
|
46
|
-
df = pl.read_csv(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
61
|
+
df = pl.read_csv(
|
|
62
|
+
f,
|
|
63
|
+
low_memory=low_mem,
|
|
64
|
+
separator=input_table_settings.delimiter,
|
|
65
|
+
has_header=input_table_settings.has_headers,
|
|
66
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
67
|
+
encoding=input_table_settings.encoding,
|
|
68
|
+
ignore_errors=True,
|
|
69
|
+
)
|
|
52
70
|
return df
|
|
53
71
|
|
|
54
72
|
|
|
@@ -59,43 +77,53 @@ def create_from_path_csv(received_table: ReceivedTable) -> pl.DataFrame:
|
|
|
59
77
|
input_table_settings: InputCsvTable = received_table.table_settings
|
|
60
78
|
gbs_to_load = os.path.getsize(f) / 1024 / 1000 / 1000
|
|
61
79
|
low_mem = gbs_to_load > 10
|
|
62
|
-
if input_table_settings.encoding.upper() ==
|
|
80
|
+
if input_table_settings.encoding.upper() == "UTF8" or input_table_settings.encoding.upper() == "UTF-8":
|
|
63
81
|
try:
|
|
64
|
-
df = pl.scan_csv(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
82
|
+
df = pl.scan_csv(
|
|
83
|
+
f,
|
|
84
|
+
low_memory=low_mem,
|
|
85
|
+
try_parse_dates=True,
|
|
86
|
+
separator=input_table_settings.delimiter,
|
|
87
|
+
has_header=input_table_settings.has_headers,
|
|
88
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
89
|
+
encoding="utf8",
|
|
90
|
+
infer_schema_length=input_table_settings.infer_schema_length,
|
|
91
|
+
)
|
|
72
92
|
df.head(1).collect()
|
|
73
93
|
return df
|
|
74
94
|
except:
|
|
75
95
|
try:
|
|
76
|
-
df = pl.scan_csv(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
96
|
+
df = pl.scan_csv(
|
|
97
|
+
f,
|
|
98
|
+
low_memory=low_mem,
|
|
99
|
+
separator=input_table_settings.delimiter,
|
|
100
|
+
has_header=input_table_settings.has_headers,
|
|
101
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
102
|
+
encoding="utf8-lossy",
|
|
103
|
+
ignore_errors=True,
|
|
104
|
+
)
|
|
82
105
|
return df
|
|
83
106
|
except:
|
|
84
|
-
df = pl.scan_csv(
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
107
|
+
df = pl.scan_csv(
|
|
108
|
+
f,
|
|
109
|
+
low_memory=low_mem,
|
|
110
|
+
separator=input_table_settings.delimiter,
|
|
111
|
+
has_header=input_table_settings.has_headers,
|
|
112
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
113
|
+
encoding="utf8",
|
|
114
|
+
ignore_errors=True,
|
|
115
|
+
)
|
|
90
116
|
return df
|
|
91
117
|
else:
|
|
92
|
-
df = pl.read_csv(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
118
|
+
df = pl.read_csv(
|
|
119
|
+
f,
|
|
120
|
+
low_memory=low_mem,
|
|
121
|
+
separator=input_table_settings.delimiter,
|
|
122
|
+
has_header=input_table_settings.has_headers,
|
|
123
|
+
skip_rows=input_table_settings.starting_from_line,
|
|
124
|
+
encoding=input_table_settings.encoding,
|
|
125
|
+
ignore_errors=True,
|
|
126
|
+
)
|
|
99
127
|
return df
|
|
100
128
|
|
|
101
129
|
|
|
@@ -116,30 +144,36 @@ def create_from_path_excel(received_table: ReceivedTable):
|
|
|
116
144
|
input_table_settings: InputExcelTable = received_table.table_settings
|
|
117
145
|
|
|
118
146
|
if input_table_settings.type_inference:
|
|
119
|
-
engine =
|
|
147
|
+
engine = "openpyxl"
|
|
120
148
|
elif input_table_settings.start_row > 0 and input_table_settings.start_column == 0:
|
|
121
|
-
engine =
|
|
149
|
+
engine = "calamine" if input_table_settings.has_headers else "xlsx2csv"
|
|
122
150
|
elif input_table_settings.start_column > 0 or input_table_settings.start_row > 0:
|
|
123
|
-
engine =
|
|
151
|
+
engine = "openpyxl"
|
|
124
152
|
else:
|
|
125
|
-
engine =
|
|
153
|
+
engine = "calamine"
|
|
126
154
|
|
|
127
155
|
sheet_name = input_table_settings.sheet_name
|
|
128
156
|
|
|
129
|
-
if engine ==
|
|
130
|
-
df = df_from_calamine_xlsx(
|
|
131
|
-
|
|
157
|
+
if engine == "calamine":
|
|
158
|
+
df = df_from_calamine_xlsx(
|
|
159
|
+
file_path=received_table.abs_file_path,
|
|
160
|
+
sheet_name=sheet_name,
|
|
161
|
+
start_row=input_table_settings.start_row,
|
|
162
|
+
end_row=input_table_settings.end_row,
|
|
163
|
+
)
|
|
132
164
|
if input_table_settings.end_column > 0:
|
|
133
165
|
end_col_index = input_table_settings.end_column
|
|
134
166
|
cols_to_select = [df.columns[i] for i in range(input_table_settings.start_column, end_col_index)]
|
|
135
167
|
df = df.select(cols_to_select)
|
|
136
168
|
|
|
137
|
-
elif engine ==
|
|
138
|
-
csv_options = {
|
|
139
|
-
df = pl.read_excel(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
169
|
+
elif engine == "xlsx2csv":
|
|
170
|
+
csv_options = {"has_header": input_table_settings.has_headers, "skip_rows": input_table_settings.start_row}
|
|
171
|
+
df = pl.read_excel(
|
|
172
|
+
source=received_table.abs_file_path,
|
|
173
|
+
read_options=csv_options,
|
|
174
|
+
engine="xlsx2csv",
|
|
175
|
+
sheet_name=input_table_settings.sheet_name,
|
|
176
|
+
)
|
|
143
177
|
end_col_index = input_table_settings.end_column if input_table_settings.end_column > 0 else len(df.columns)
|
|
144
178
|
cols_to_select = [df.columns[i] for i in range(input_table_settings.start_column, end_col_index)]
|
|
145
179
|
df = df.select(cols_to_select)
|
|
@@ -149,10 +183,13 @@ def create_from_path_excel(received_table: ReceivedTable):
|
|
|
149
183
|
else:
|
|
150
184
|
max_col = input_table_settings.end_column if input_table_settings.end_column > 0 else None
|
|
151
185
|
max_row = input_table_settings.end_row + 1 if input_table_settings.end_row > 0 else None
|
|
152
|
-
df = df_from_openpyxl(
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
186
|
+
df = df_from_openpyxl(
|
|
187
|
+
file_path=received_table.abs_file_path,
|
|
188
|
+
sheet_name=input_table_settings.sheet_name,
|
|
189
|
+
min_row=input_table_settings.start_row + 1,
|
|
190
|
+
min_col=input_table_settings.start_column + 1,
|
|
191
|
+
max_row=max_row,
|
|
192
|
+
max_col=max_col,
|
|
193
|
+
has_headers=input_table_settings.has_headers,
|
|
194
|
+
)
|
|
158
195
|
return df
|
flowfile_worker/create/models.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field, model_validator, field_validator
|
|
2
|
-
from typing import List, Optional, Literal, Annotated
|
|
3
1
|
import os
|
|
4
2
|
from pathlib import Path
|
|
3
|
+
from typing import Annotated, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class MinimalFieldInfo(BaseModel):
|
|
@@ -11,19 +12,21 @@ class MinimalFieldInfo(BaseModel):
|
|
|
11
12
|
|
|
12
13
|
class InputTableBase(BaseModel):
|
|
13
14
|
"""Base settings for input file operations."""
|
|
15
|
+
|
|
14
16
|
file_type: str # Will be overridden with Literal in subclasses
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class InputCsvTable(InputTableBase):
|
|
18
20
|
"""Defines settings for reading a CSV file."""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
+
|
|
22
|
+
file_type: Literal["csv"] = "csv"
|
|
23
|
+
reference: str = ""
|
|
21
24
|
starting_from_line: int = 0
|
|
22
|
-
delimiter: str =
|
|
25
|
+
delimiter: str = ","
|
|
23
26
|
has_headers: bool = True
|
|
24
|
-
encoding: str =
|
|
25
|
-
parquet_ref:
|
|
26
|
-
row_delimiter: str =
|
|
27
|
+
encoding: str = "utf-8"
|
|
28
|
+
parquet_ref: str | None = None
|
|
29
|
+
row_delimiter: str = "\n"
|
|
27
30
|
quote_char: str = '"'
|
|
28
31
|
infer_schema_length: int = 10_000
|
|
29
32
|
truncate_ragged_lines: bool = False
|
|
@@ -32,18 +35,21 @@ class InputCsvTable(InputTableBase):
|
|
|
32
35
|
|
|
33
36
|
class InputJsonTable(InputCsvTable):
|
|
34
37
|
"""Defines settings for reading a JSON file."""
|
|
35
|
-
|
|
38
|
+
|
|
39
|
+
file_type: Literal["json"] = "json"
|
|
36
40
|
|
|
37
41
|
|
|
38
42
|
class InputParquetTable(InputTableBase):
|
|
39
43
|
"""Defines settings for reading a Parquet file."""
|
|
40
|
-
|
|
44
|
+
|
|
45
|
+
file_type: Literal["parquet"] = "parquet"
|
|
41
46
|
|
|
42
47
|
|
|
43
48
|
class InputExcelTable(InputTableBase):
|
|
44
49
|
"""Defines settings for reading an Excel file."""
|
|
45
|
-
|
|
46
|
-
|
|
50
|
+
|
|
51
|
+
file_type: Literal["excel"] = "excel"
|
|
52
|
+
sheet_name: str | None = None
|
|
47
53
|
start_row: int = 0
|
|
48
54
|
start_column: int = 0
|
|
49
55
|
end_row: int = 0
|
|
@@ -51,60 +57,58 @@ class InputExcelTable(InputTableBase):
|
|
|
51
57
|
has_headers: bool = True
|
|
52
58
|
type_inference: bool = False
|
|
53
59
|
|
|
54
|
-
@model_validator(mode=
|
|
60
|
+
@model_validator(mode="after")
|
|
55
61
|
def validate_range_values(self):
|
|
56
62
|
"""Validates that the Excel cell range is logical."""
|
|
57
63
|
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
58
64
|
if not isinstance(attribute, int) or attribute < 0:
|
|
59
65
|
raise ValueError("Row and column indices must be non-negative integers")
|
|
60
|
-
if (self.end_row > 0 and self.start_row > self.end_row) or
|
|
61
|
-
|
|
66
|
+
if (self.end_row > 0 and self.start_row > self.end_row) or (
|
|
67
|
+
self.end_column > 0 and self.start_column > self.end_column
|
|
68
|
+
):
|
|
62
69
|
raise ValueError("Start row/column must not be greater than end row/column")
|
|
63
70
|
return self
|
|
64
71
|
|
|
65
72
|
|
|
66
73
|
# Create the discriminated union (similar to OutputTableSettings)
|
|
67
74
|
InputTableSettings = Annotated[
|
|
68
|
-
InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable,
|
|
69
|
-
Field(discriminator='file_type')
|
|
75
|
+
InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable, Field(discriminator="file_type")
|
|
70
76
|
]
|
|
71
77
|
|
|
72
78
|
|
|
73
79
|
# Now create the main ReceivedTable model
|
|
74
80
|
class ReceivedTable(BaseModel):
|
|
75
81
|
"""Model for defining a table received from an external source."""
|
|
82
|
+
|
|
76
83
|
# Metadata fields
|
|
77
|
-
id:
|
|
78
|
-
name:
|
|
84
|
+
id: int | None = None
|
|
85
|
+
name: str | None = None
|
|
79
86
|
path: str # This can be an absolute or relative path
|
|
80
|
-
directory:
|
|
87
|
+
directory: str | None = None
|
|
81
88
|
analysis_file_available: bool = False
|
|
82
|
-
status:
|
|
83
|
-
fields:
|
|
84
|
-
abs_file_path:
|
|
89
|
+
status: str | None = None
|
|
90
|
+
fields: list[MinimalFieldInfo] = Field(default_factory=list)
|
|
91
|
+
abs_file_path: str | None = None
|
|
85
92
|
|
|
86
|
-
file_type: Literal[
|
|
93
|
+
file_type: Literal["csv", "json", "parquet", "excel"]
|
|
87
94
|
|
|
88
95
|
table_settings: InputTableSettings
|
|
89
96
|
|
|
90
97
|
@classmethod
|
|
91
|
-
def create_from_path(cls, path: str, file_type: Literal[
|
|
98
|
+
def create_from_path(cls, path: str, file_type: Literal["csv", "json", "parquet", "excel"] = "csv"):
|
|
92
99
|
"""Creates an instance from a file path string."""
|
|
93
100
|
filename = Path(path).name
|
|
94
101
|
|
|
95
102
|
# Create appropriate table_settings based on file_type
|
|
96
103
|
settings_map = {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
104
|
+
"csv": InputCsvTable(),
|
|
105
|
+
"json": InputJsonTable(),
|
|
106
|
+
"parquet": InputParquetTable(),
|
|
107
|
+
"excel": InputExcelTable(),
|
|
101
108
|
}
|
|
102
109
|
|
|
103
110
|
return cls(
|
|
104
|
-
name=filename,
|
|
105
|
-
path=path,
|
|
106
|
-
file_type=file_type,
|
|
107
|
-
table_settings=settings_map.get(file_type, InputCsvTable())
|
|
111
|
+
name=filename, path=path, file_type=file_type, table_settings=settings_map.get(file_type, InputCsvTable())
|
|
108
112
|
)
|
|
109
113
|
|
|
110
114
|
@property
|
|
@@ -124,31 +128,30 @@ class ReceivedTable(BaseModel):
|
|
|
124
128
|
base_path = base_path / self.name
|
|
125
129
|
self.abs_file_path = str(base_path.resolve())
|
|
126
130
|
|
|
127
|
-
@field_validator(
|
|
131
|
+
@field_validator("table_settings", mode="before")
|
|
128
132
|
@classmethod
|
|
129
133
|
def validate_table_settings(cls, v, info):
|
|
130
134
|
"""Ensures table_settings matches the file_type."""
|
|
131
135
|
if v is None:
|
|
132
|
-
file_type = info.data.get(
|
|
136
|
+
file_type = info.data.get("file_type", "csv")
|
|
133
137
|
# Create default based on file_type
|
|
134
138
|
settings_map = {
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
+
"csv": InputCsvTable(),
|
|
140
|
+
"json": InputJsonTable(),
|
|
141
|
+
"parquet": InputParquetTable(),
|
|
142
|
+
"excel": InputExcelTable(),
|
|
139
143
|
}
|
|
140
144
|
return settings_map.get(file_type, InputCsvTable())
|
|
141
145
|
|
|
142
146
|
# If it's a dict, add file_type if missing
|
|
143
|
-
if isinstance(v, dict) and
|
|
144
|
-
v[
|
|
147
|
+
if isinstance(v, dict) and "file_type" not in v:
|
|
148
|
+
v["file_type"] = info.data.get("file_type", "csv")
|
|
145
149
|
|
|
146
150
|
return v
|
|
147
151
|
|
|
148
|
-
@model_validator(mode=
|
|
152
|
+
@model_validator(mode="after")
|
|
149
153
|
def populate_abs_file_path(self):
|
|
150
154
|
"""Ensures the absolute file path is populated after validation."""
|
|
151
155
|
if not self.abs_file_path:
|
|
152
156
|
self.set_absolute_filepath()
|
|
153
157
|
return self
|
|
154
|
-
|
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
import polars as pl
|
|
2
2
|
|
|
3
|
-
|
|
4
3
|
dtype_to_pl = {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
4
|
+
"int": pl.Int64,
|
|
5
|
+
"integer": pl.Int64,
|
|
6
|
+
"char": pl.String,
|
|
7
|
+
"fixed decimal": pl.Float32,
|
|
8
|
+
"double": pl.Float64,
|
|
9
|
+
"float": pl.Float64,
|
|
10
|
+
"bool": pl.Boolean,
|
|
11
|
+
"byte": pl.UInt8,
|
|
12
|
+
"bit": pl.Binary,
|
|
13
|
+
"date": pl.Date,
|
|
14
|
+
"datetime": pl.Datetime,
|
|
15
|
+
"string": pl.String,
|
|
16
|
+
"str": pl.String,
|
|
17
|
+
"time": pl.Time,
|
|
19
18
|
}
|
|
20
19
|
|
|
21
20
|
dtype_to_pl_str = {k: v.__name__ for k, v in dtype_to_pl.items()}
|
|
@@ -1,59 +1,55 @@
|
|
|
1
|
-
from typing import Generator, List
|
|
2
|
-
from openpyxl import Workbook, load_workbook
|
|
3
|
-
from openpyxl.worksheet.worksheet import Worksheet
|
|
4
1
|
import gc
|
|
2
|
+
from collections.abc import Generator
|
|
3
|
+
|
|
5
4
|
import polars as pl
|
|
5
|
+
from openpyxl import Workbook, load_workbook
|
|
6
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
|
7
|
+
|
|
6
8
|
from flowfile_worker.create.utils import create_pl_df_type_save
|
|
7
9
|
|
|
8
10
|
|
|
9
|
-
def raw_data_openpyxl(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
def raw_data_openpyxl(
|
|
12
|
+
file_path: str,
|
|
13
|
+
sheet_name: str = None,
|
|
14
|
+
min_row: int = None,
|
|
15
|
+
max_row: int = None,
|
|
16
|
+
min_col: int = None,
|
|
17
|
+
max_col: int = None,
|
|
18
|
+
) -> Generator[list, None, None]:
|
|
16
19
|
workbook: Workbook = load_workbook(file_path, data_only=True, read_only=True)
|
|
17
20
|
sheet_name = workbook.sheetnames[0] if sheet_name is None else sheet_name
|
|
18
21
|
sheet: Worksheet = workbook[sheet_name]
|
|
19
|
-
|
|
20
|
-
max_row=max_row,
|
|
21
|
-
min_col=min_col,
|
|
22
|
-
max_col=max_col,
|
|
23
|
-
values_only=True):
|
|
24
|
-
yield row
|
|
22
|
+
yield from sheet.iter_rows(min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col, values_only=True)
|
|
25
23
|
workbook.close()
|
|
26
24
|
del workbook
|
|
27
25
|
gc.collect()
|
|
28
26
|
|
|
29
27
|
|
|
30
|
-
def df_from_openpyxl(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
min_col=min_col,
|
|
43
|
-
max_col=max_col)
|
|
28
|
+
def df_from_openpyxl(
|
|
29
|
+
file_path: str,
|
|
30
|
+
sheet_name: str = None,
|
|
31
|
+
min_row: int = None,
|
|
32
|
+
max_row: int = None,
|
|
33
|
+
min_col: int = None,
|
|
34
|
+
max_col: int = None,
|
|
35
|
+
has_headers: bool = True,
|
|
36
|
+
) -> pl.DataFrame:
|
|
37
|
+
data_iterator = raw_data_openpyxl(
|
|
38
|
+
file_path=file_path, sheet_name=sheet_name, min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col
|
|
39
|
+
)
|
|
44
40
|
raw_data = list(data_iterator)
|
|
45
41
|
if len(raw_data) > 0:
|
|
46
42
|
if has_headers:
|
|
47
43
|
columns = []
|
|
48
44
|
for i, col in enumerate(raw_data[0]):
|
|
49
45
|
if col is None:
|
|
50
|
-
col = f
|
|
46
|
+
col = f"_unnamed_column_{i}"
|
|
51
47
|
elif not isinstance(col, str):
|
|
52
48
|
col = str(col)
|
|
53
49
|
columns.append(col)
|
|
54
50
|
columns = ensure_unique(columns)
|
|
55
51
|
df = create_pl_df_type_save(raw_data[1:])
|
|
56
|
-
renames = {o: n for o, n in zip(df.columns, columns)}
|
|
52
|
+
renames = {o: n for o, n in zip(df.columns, columns, strict=False)}
|
|
57
53
|
df = df.rename(renames)
|
|
58
54
|
|
|
59
55
|
else:
|
|
@@ -63,7 +59,7 @@ def df_from_openpyxl(file_path: str,
|
|
|
63
59
|
return pl.DataFrame()
|
|
64
60
|
|
|
65
61
|
|
|
66
|
-
def ensure_unique(lst:
|
|
62
|
+
def ensure_unique(lst: list[str]) -> list[str]:
|
|
67
63
|
"""
|
|
68
64
|
Ensures that all elements in the input list are unique by appending
|
|
69
65
|
a version number (e.g., '_v1') to duplicates. It continues adding
|
|
@@ -99,12 +95,9 @@ def ensure_unique(lst: List[str]) -> List[str]:
|
|
|
99
95
|
def df_from_calamine_xlsx(file_path: str, sheet_name: str, start_row: int = 0, end_row: int = 0) -> pl.DataFrame:
|
|
100
96
|
read_options = {}
|
|
101
97
|
if start_row > 0:
|
|
102
|
-
read_options[
|
|
98
|
+
read_options["header_row"] = start_row
|
|
103
99
|
if end_row > 0:
|
|
104
|
-
read_options[
|
|
105
|
-
return pl.read_excel(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
read_options=read_options,
|
|
109
|
-
raise_if_empty=False
|
|
110
|
-
)
|
|
100
|
+
read_options["n_rows"] = end_row - start_row
|
|
101
|
+
return pl.read_excel(
|
|
102
|
+
source=file_path, engine="calamine", sheet_name=sheet_name, read_options=read_options, raise_if_empty=False
|
|
103
|
+
)
|