Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backends/main.py +25 -22
- build_backends/main_prd.py +10 -19
- flowfile/__init__.py +178 -74
- flowfile/__main__.py +10 -7
- flowfile/api.py +51 -57
- flowfile/web/__init__.py +14 -9
- flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
- flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
- flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
- flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
- flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
- flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
- flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
- flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
- flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
- flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
- flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
- flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
- flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
- flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
- flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
- flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
- flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
- flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
- flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
- flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
- flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
- flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
- flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
- flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
- flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
- flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
- flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
- flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
- flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
- flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
- flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
- flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
- flowfile/web/static/assets/Filter-7494ea97.css +48 -0
- flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
- flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
- flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
- flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
- flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
- flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
- flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
- flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
- flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
- flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
- flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
- flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
- flowfile/web/static/assets/LoginView-d325d632.css +172 -0
- flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
- flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
- flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
- flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
- flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
- flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
- flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
- flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
- flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
- flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
- flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
- flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
- flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
- flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
- flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
- flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
- flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
- flowfile/web/static/assets/PopOver-d96599db.css +33 -0
- flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
- flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
- flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
- flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
- flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
- flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
- flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
- flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
- flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
- flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
- flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
- flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
- flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
- flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
- flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
- flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
- flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
- flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
- flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
- flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
- flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
- flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
- flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
- flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
- flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
- flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
- flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
- flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
- flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
- flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
- flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
- flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
- flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
- flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
- flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
- flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
- flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
- flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
- flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
- flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
- flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
- flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
- flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
- flowfile/web/static/assets/index-07dda503.js +38 -0
- flowfile/web/static/assets/index-3ba44389.js +2696 -0
- flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
- flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
- flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
- flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
- flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
- flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
- flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
- flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
- flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
- flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
- flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
- flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
- flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
- flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
- flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
- flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
- flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
- flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
- flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
- flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
- flowfile-0.5.3.dist-info/RECORD +402 -0
- flowfile_core/__init__.py +13 -6
- flowfile_core/auth/jwt.py +51 -16
- flowfile_core/auth/models.py +32 -7
- flowfile_core/auth/password.py +89 -0
- flowfile_core/auth/secrets.py +8 -6
- flowfile_core/configs/__init__.py +9 -7
- flowfile_core/configs/flow_logger.py +15 -14
- flowfile_core/configs/node_store/__init__.py +72 -4
- flowfile_core/configs/node_store/nodes.py +155 -172
- flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
- flowfile_core/configs/settings.py +28 -15
- flowfile_core/database/connection.py +7 -6
- flowfile_core/database/init_db.py +96 -2
- flowfile_core/database/models.py +3 -1
- flowfile_core/fileExplorer/__init__.py +17 -0
- flowfile_core/fileExplorer/funcs.py +123 -57
- flowfile_core/fileExplorer/utils.py +10 -11
- flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
- flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
- flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
- flowfile_core/flowfile/analytics/utils.py +1 -1
- flowfile_core/flowfile/code_generator/code_generator.py +358 -244
- flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
- flowfile_core/flowfile/database_connection_manager/models.py +1 -1
- flowfile_core/flowfile/extensions.py +17 -12
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
- flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
- flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
- flowfile_core/flowfile/flow_graph.py +918 -571
- flowfile_core/flowfile/flow_graph_utils.py +31 -49
- flowfile_core/flowfile/flow_node/flow_node.py +330 -233
- flowfile_core/flowfile/flow_node/models.py +53 -41
- flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
- flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
- flowfile_core/flowfile/handler.py +80 -30
- flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
- flowfile_core/flowfile/manage/io_flowfile.py +54 -57
- flowfile_core/flowfile/node_designer/__init__.py +15 -13
- flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
- flowfile_core/flowfile/node_designer/custom_node.py +162 -36
- flowfile_core/flowfile/node_designer/ui_components.py +135 -34
- flowfile_core/flowfile/schema_callbacks.py +71 -51
- flowfile_core/flowfile/setting_generator/__init__.py +0 -1
- flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
- flowfile_core/flowfile/setting_generator/settings.py +64 -53
- flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
- flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
- flowfile_core/flowfile/util/calculate_layout.py +9 -13
- flowfile_core/flowfile/util/execution_orderer.py +25 -17
- flowfile_core/flowfile/util/node_skipper.py +4 -4
- flowfile_core/flowfile/utils.py +19 -21
- flowfile_core/main.py +26 -19
- flowfile_core/routes/auth.py +284 -11
- flowfile_core/routes/cloud_connections.py +25 -25
- flowfile_core/routes/logs.py +21 -29
- flowfile_core/routes/public.py +3 -3
- flowfile_core/routes/routes.py +70 -34
- flowfile_core/routes/secrets.py +25 -27
- flowfile_core/routes/user_defined_components.py +483 -4
- flowfile_core/run_lock.py +0 -1
- flowfile_core/schemas/__init__.py +4 -6
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
- flowfile_core/schemas/cloud_storage_schemas.py +59 -53
- flowfile_core/schemas/input_schema.py +231 -144
- flowfile_core/schemas/output_model.py +49 -34
- flowfile_core/schemas/schemas.py +116 -89
- flowfile_core/schemas/transform_schema.py +518 -263
- flowfile_core/schemas/yaml_types.py +21 -7
- flowfile_core/secret_manager/secret_manager.py +17 -13
- flowfile_core/types.py +29 -9
- flowfile_core/utils/arrow_reader.py +7 -6
- flowfile_core/utils/excel_file_manager.py +3 -3
- flowfile_core/utils/fileManager.py +7 -7
- flowfile_core/utils/fl_executor.py +8 -10
- flowfile_core/utils/utils.py +4 -4
- flowfile_core/utils/validate_setup.py +5 -4
- flowfile_frame/__init__.py +106 -51
- flowfile_frame/adapters.py +2 -9
- flowfile_frame/adding_expr.py +73 -32
- flowfile_frame/cloud_storage/frame_helpers.py +27 -23
- flowfile_frame/cloud_storage/secret_manager.py +12 -26
- flowfile_frame/config.py +2 -5
- flowfile_frame/expr.py +311 -218
- flowfile_frame/expr.pyi +160 -159
- flowfile_frame/expr_name.py +23 -23
- flowfile_frame/flow_frame.py +571 -476
- flowfile_frame/flow_frame.pyi +123 -104
- flowfile_frame/flow_frame_methods.py +227 -246
- flowfile_frame/group_frame.py +50 -20
- flowfile_frame/join.py +2 -2
- flowfile_frame/lazy.py +129 -87
- flowfile_frame/lazy_methods.py +83 -30
- flowfile_frame/list_name_space.py +55 -50
- flowfile_frame/selectors.py +148 -68
- flowfile_frame/series.py +9 -7
- flowfile_frame/utils.py +19 -21
- flowfile_worker/__init__.py +12 -7
- flowfile_worker/configs.py +11 -19
- flowfile_worker/create/__init__.py +14 -9
- flowfile_worker/create/funcs.py +114 -77
- flowfile_worker/create/models.py +46 -43
- flowfile_worker/create/pl_types.py +14 -15
- flowfile_worker/create/read_excel_tables.py +34 -41
- flowfile_worker/create/utils.py +22 -19
- flowfile_worker/external_sources/s3_source/main.py +18 -51
- flowfile_worker/external_sources/s3_source/models.py +34 -27
- flowfile_worker/external_sources/sql_source/main.py +8 -5
- flowfile_worker/external_sources/sql_source/models.py +13 -9
- flowfile_worker/flow_logger.py +10 -8
- flowfile_worker/funcs.py +214 -155
- flowfile_worker/main.py +11 -17
- flowfile_worker/models.py +35 -28
- flowfile_worker/process_manager.py +2 -3
- flowfile_worker/routes.py +121 -90
- flowfile_worker/secrets.py +9 -6
- flowfile_worker/spawner.py +80 -49
- flowfile_worker/utils.py +3 -2
- shared/__init__.py +2 -7
- shared/storage_config.py +25 -13
- test_utils/postgres/commands.py +3 -2
- test_utils/postgres/fixtures.py +9 -9
- test_utils/s3/commands.py +1 -1
- test_utils/s3/data_generator.py +3 -4
- test_utils/s3/demo_data_generator.py +4 -7
- test_utils/s3/fixtures.py +7 -5
- tools/migrate/__init__.py +1 -1
- tools/migrate/__main__.py +16 -29
- tools/migrate/legacy_schemas.py +251 -190
- tools/migrate/migrate.py +193 -181
- tools/migrate/tests/conftest.py +1 -3
- tools/migrate/tests/test_migrate.py +36 -41
- tools/migrate/tests/test_migration_e2e.py +28 -29
- tools/migrate/tests/test_node_migrations.py +50 -20
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
- flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
- flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
- flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
- flowfile/web/static/assets/Filter-f62091b3.css +0 -20
- flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
- flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
- flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
- flowfile/web/static/assets/secretApi-68435402.js +0 -46
- flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
- flowfile-0.5.1.dist-info/RECORD +0 -388
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
import polars as pl
|
|
2
|
-
from typing import Dict, Any, Callable
|
|
3
|
-
import textwrap
|
|
4
1
|
import ast
|
|
2
|
+
import textwrap
|
|
5
3
|
import time
|
|
4
|
+
from collections.abc import Callable
|
|
6
5
|
from io import BytesIO
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import polars as pl
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
def remove_comments_and_docstrings(source: str) -> str:
|
|
@@ -32,7 +34,7 @@ def remove_comments_and_docstrings(source: str) -> str:
|
|
|
32
34
|
# Handle string boundaries
|
|
33
35
|
if char in ('"', "'"):
|
|
34
36
|
# Check for escaped quotes
|
|
35
|
-
if i > 0 and line[i - 1] ==
|
|
37
|
+
if i > 0 and line[i - 1] == "\\":
|
|
36
38
|
result.append(char)
|
|
37
39
|
i += 1
|
|
38
40
|
continue
|
|
@@ -47,17 +49,17 @@ def remove_comments_and_docstrings(source: str) -> str:
|
|
|
47
49
|
string_char = None
|
|
48
50
|
|
|
49
51
|
# Only process comment characters outside strings
|
|
50
|
-
elif char ==
|
|
52
|
+
elif char == "#" and not in_string:
|
|
51
53
|
break
|
|
52
54
|
|
|
53
55
|
result.append(char)
|
|
54
56
|
i += 1
|
|
55
57
|
|
|
56
|
-
return
|
|
58
|
+
return "".join(result).rstrip()
|
|
57
59
|
|
|
58
60
|
# First pass: handle comments
|
|
59
61
|
lines = [remove_comments_from_line(line) for line in source.splitlines()]
|
|
60
|
-
source =
|
|
62
|
+
source = "\n".join(line for line in lines if line.strip())
|
|
61
63
|
|
|
62
64
|
# Second pass: handle docstrings using AST
|
|
63
65
|
try:
|
|
@@ -68,39 +70,51 @@ def remove_comments_and_docstrings(source: str) -> str:
|
|
|
68
70
|
class DocstringRemover(ast.NodeTransformer):
|
|
69
71
|
def visit_Module(self, node):
|
|
70
72
|
# Remove module-level docstrings
|
|
71
|
-
while (
|
|
72
|
-
|
|
73
|
-
|
|
73
|
+
while (
|
|
74
|
+
node.body
|
|
75
|
+
and isinstance(node.body[0], ast.Expr)
|
|
76
|
+
and isinstance(node.body[0].value, ast.Constant)
|
|
77
|
+
and isinstance(node.body[0].value.value, str)
|
|
78
|
+
):
|
|
74
79
|
node.body.pop(0)
|
|
75
80
|
return self.generic_visit(node)
|
|
76
81
|
|
|
77
82
|
def visit_FunctionDef(self, node):
|
|
78
83
|
# Remove function docstrings
|
|
79
|
-
if (
|
|
80
|
-
|
|
81
|
-
|
|
84
|
+
if (
|
|
85
|
+
node.body
|
|
86
|
+
and isinstance(node.body[0], ast.Expr)
|
|
87
|
+
and isinstance(node.body[0].value, ast.Constant)
|
|
88
|
+
and isinstance(node.body[0].value.value, str)
|
|
89
|
+
):
|
|
82
90
|
node.body.pop(0)
|
|
83
91
|
return self.generic_visit(node)
|
|
84
92
|
|
|
85
93
|
def visit_ClassDef(self, node):
|
|
86
94
|
# Remove class docstrings
|
|
87
|
-
if (
|
|
88
|
-
|
|
89
|
-
|
|
95
|
+
if (
|
|
96
|
+
node.body
|
|
97
|
+
and isinstance(node.body[0], ast.Expr)
|
|
98
|
+
and isinstance(node.body[0].value, ast.Constant)
|
|
99
|
+
and isinstance(node.body[0].value.value, str)
|
|
100
|
+
):
|
|
90
101
|
node.body.pop(0)
|
|
91
102
|
return self.generic_visit(node)
|
|
92
103
|
|
|
93
104
|
def visit_AsyncFunctionDef(self, node):
|
|
94
105
|
# Remove async function docstrings
|
|
95
|
-
if (
|
|
96
|
-
|
|
97
|
-
|
|
106
|
+
if (
|
|
107
|
+
node.body
|
|
108
|
+
and isinstance(node.body[0], ast.Expr)
|
|
109
|
+
and isinstance(node.body[0].value, ast.Constant)
|
|
110
|
+
and isinstance(node.body[0].value.value, str)
|
|
111
|
+
):
|
|
98
112
|
node.body.pop(0)
|
|
99
113
|
return self.generic_visit(node)
|
|
100
114
|
|
|
101
115
|
def visit_Expr(self, node):
|
|
102
116
|
# Remove standalone string literals
|
|
103
|
-
if isinstance(node.value, (ast.Str, ast.Constant)) and isinstance(getattr(node.value,
|
|
117
|
+
if isinstance(node.value, (ast.Str, ast.Constant)) and isinstance(getattr(node.value, "value", None), str):
|
|
104
118
|
return None
|
|
105
119
|
return self.generic_visit(node)
|
|
106
120
|
|
|
@@ -109,7 +123,7 @@ def remove_comments_and_docstrings(source: str) -> str:
|
|
|
109
123
|
ast.fix_missing_locations(tree)
|
|
110
124
|
result = ast.unparse(tree)
|
|
111
125
|
# Remove empty lines
|
|
112
|
-
return
|
|
126
|
+
return "\n".join(line for line in result.splitlines() if line.strip())
|
|
113
127
|
except Exception:
|
|
114
128
|
return source
|
|
115
129
|
|
|
@@ -122,62 +136,61 @@ class PolarsCodeParser:
|
|
|
122
136
|
|
|
123
137
|
def __init__(self):
|
|
124
138
|
import datetime
|
|
139
|
+
|
|
125
140
|
self.safe_globals = {
|
|
126
141
|
# Polars functionality
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
142
|
+
"pl": pl,
|
|
143
|
+
"col": pl.col,
|
|
144
|
+
"lit": pl.lit,
|
|
145
|
+
"expr": pl.expr,
|
|
132
146
|
# Polars datatypes - added directly
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
147
|
+
"Int8": pl.Int8,
|
|
148
|
+
"Int16": pl.Int16,
|
|
149
|
+
"Int32": pl.Int32,
|
|
150
|
+
"Int64": pl.Int64,
|
|
151
|
+
"Int128": pl.Int128,
|
|
152
|
+
"UInt8": pl.UInt8,
|
|
153
|
+
"UInt16": pl.UInt16,
|
|
154
|
+
"UInt32": pl.UInt32,
|
|
155
|
+
"UInt64": pl.UInt64,
|
|
156
|
+
"Float32": pl.Float32,
|
|
157
|
+
"Float64": pl.Float64,
|
|
158
|
+
"Boolean": pl.Boolean,
|
|
159
|
+
"String": pl.String,
|
|
160
|
+
"Utf8": pl.Utf8,
|
|
161
|
+
"Binary": pl.Binary,
|
|
162
|
+
"Null": pl.Null,
|
|
163
|
+
"List": pl.List,
|
|
164
|
+
"Array": pl.Array,
|
|
165
|
+
"Struct": pl.Struct,
|
|
166
|
+
"Object": pl.Object,
|
|
167
|
+
"Date": pl.Date,
|
|
168
|
+
"Time": pl.Time,
|
|
169
|
+
"Datetime": pl.Datetime,
|
|
170
|
+
"Duration": pl.Duration,
|
|
171
|
+
"Categorical": pl.Categorical,
|
|
172
|
+
"Decimal": pl.Decimal,
|
|
173
|
+
"Enum": pl.Enum,
|
|
174
|
+
"Unknown": pl.Unknown,
|
|
162
175
|
# Basic Python built-ins
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
176
|
+
"print": print,
|
|
177
|
+
"len": len,
|
|
178
|
+
"range": range,
|
|
179
|
+
"enumerate": enumerate,
|
|
180
|
+
"zip": zip,
|
|
181
|
+
"list": list,
|
|
182
|
+
"dict": dict,
|
|
183
|
+
"set": set,
|
|
184
|
+
"str": str,
|
|
185
|
+
"int": int,
|
|
186
|
+
"float": float,
|
|
187
|
+
"bool": bool,
|
|
188
|
+
"True": True,
|
|
189
|
+
"False": False,
|
|
190
|
+
"None": None,
|
|
191
|
+
"time": time,
|
|
192
|
+
"BytesIO": BytesIO,
|
|
193
|
+
"datetime": datetime,
|
|
181
194
|
}
|
|
182
195
|
|
|
183
196
|
@staticmethod
|
|
@@ -195,12 +208,12 @@ class PolarsCodeParser:
|
|
|
195
208
|
# Block exec/eval
|
|
196
209
|
if isinstance(node, ast.Call):
|
|
197
210
|
if isinstance(node.func, ast.Name):
|
|
198
|
-
if node.func.id in {
|
|
211
|
+
if node.func.id in {"exec", "eval", "compile", "__import__"}:
|
|
199
212
|
raise ValueError(f"Function '{node.func.id}' is not allowed")
|
|
200
213
|
|
|
201
214
|
# Block access to system attributes
|
|
202
215
|
if isinstance(node, ast.Attribute):
|
|
203
|
-
if node.attr.startswith(
|
|
216
|
+
if node.attr.startswith("__"):
|
|
204
217
|
raise ValueError(f"Access to '{node.attr}' is not allowed")
|
|
205
218
|
|
|
206
219
|
except SyntaxError as e:
|
|
@@ -230,17 +243,17 @@ class PolarsCodeParser:
|
|
|
230
243
|
function_def = f"def _transform({params}):\n"
|
|
231
244
|
|
|
232
245
|
# Handle single line expressions
|
|
233
|
-
if
|
|
246
|
+
if "\n" not in code:
|
|
234
247
|
# For expressions that should return directly
|
|
235
|
-
if any(code.startswith(prefix) for prefix in [
|
|
248
|
+
if any(code.startswith(prefix) for prefix in ["pl.", "col(", "input_df", "expr("]):
|
|
236
249
|
return function_def + f" return {code}"
|
|
237
250
|
# For assignments
|
|
238
251
|
else:
|
|
239
252
|
return function_def + f" {code}\n return output_df"
|
|
240
253
|
|
|
241
254
|
# For multi-line code
|
|
242
|
-
indented_code =
|
|
243
|
-
return function_def + indented_code +
|
|
255
|
+
indented_code = "\n".join(f" {line}" for line in code.split("\n"))
|
|
256
|
+
return function_def + indented_code + "\n return output_df"
|
|
244
257
|
|
|
245
258
|
def get_executable(self, code: str, num_inputs: int = 1) -> Callable:
|
|
246
259
|
"""
|
|
@@ -262,11 +275,11 @@ class PolarsCodeParser:
|
|
|
262
275
|
wrapped_code = self._wrap_in_function(code, num_inputs)
|
|
263
276
|
try:
|
|
264
277
|
# Create namespace for execution
|
|
265
|
-
local_namespace:
|
|
278
|
+
local_namespace: dict[str, Any] = {}
|
|
266
279
|
|
|
267
280
|
exec(wrapped_code, self.safe_globals, local_namespace)
|
|
268
281
|
|
|
269
|
-
transform_func = local_namespace[
|
|
282
|
+
transform_func = local_namespace["_transform"]
|
|
270
283
|
return transform_func
|
|
271
284
|
except Exception as e:
|
|
272
285
|
raise ValueError(f"Error executing code: {str(e)}")
|
|
@@ -280,4 +293,4 @@ class PolarsCodeParser:
|
|
|
280
293
|
self._validate_code(code)
|
|
281
294
|
|
|
282
295
|
|
|
283
|
-
polars_code_parser = PolarsCodeParser()
|
|
296
|
+
polars_code_parser = PolarsCodeParser()
|
|
@@ -1,80 +1,77 @@
|
|
|
1
|
-
from typing import Generator, List
|
|
2
|
-
from openpyxl import Workbook, load_workbook
|
|
3
|
-
from openpyxl.worksheet.worksheet import Worksheet
|
|
4
1
|
import gc
|
|
2
|
+
from collections.abc import Generator
|
|
3
|
+
|
|
5
4
|
import polars as pl
|
|
6
|
-
from
|
|
5
|
+
from openpyxl import Workbook, load_workbook
|
|
6
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
|
7
|
+
|
|
7
8
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
8
9
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import dtype_to_pl_str
|
|
10
|
+
from flowfile_core.flowfile.flow_data_engine.utils import create_pl_df_type_save, get_data_type
|
|
9
11
|
|
|
10
12
|
|
|
11
|
-
def raw_data_openpyxl(
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
def raw_data_openpyxl(
|
|
14
|
+
file_path: str,
|
|
15
|
+
sheet_name: str = None,
|
|
16
|
+
min_row: int = None,
|
|
17
|
+
max_row: int = None,
|
|
18
|
+
min_col: int = None,
|
|
19
|
+
max_col: int = None,
|
|
20
|
+
) -> Generator[list, None, None]:
|
|
18
21
|
workbook: Workbook = load_workbook(file_path, data_only=True, read_only=True)
|
|
19
22
|
sheet_name = workbook.sheetnames[0] if sheet_name is None else sheet_name
|
|
20
23
|
sheet: Worksheet = workbook[sheet_name]
|
|
21
|
-
for row in sheet.iter_rows(min_row=min_row,
|
|
22
|
-
max_row=max_row,
|
|
23
|
-
min_col=min_col,
|
|
24
|
-
max_col=max_col,
|
|
25
|
-
values_only=True):
|
|
24
|
+
for row in sheet.iter_rows(min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col, values_only=True):
|
|
26
25
|
yield row
|
|
27
26
|
workbook.close()
|
|
28
27
|
del workbook
|
|
29
28
|
gc.collect()
|
|
30
29
|
|
|
31
30
|
|
|
32
|
-
def get_calamine_xlsx_data_types(file_path: str, sheet_name: str, start_row: int=0, end_row: int=0):
|
|
31
|
+
def get_calamine_xlsx_data_types(file_path: str, sheet_name: str, start_row: int = 0, end_row: int = 0):
|
|
33
32
|
df = df_from_calamine_xlsx(file_path, sheet_name, start_row, end_row)
|
|
34
|
-
return [
|
|
33
|
+
return [
|
|
34
|
+
FlowfileColumn.from_input(n, str(dt), col_index=i)
|
|
35
|
+
for i, (n, dt) in enumerate(zip(df.columns, df.dtypes, strict=False))
|
|
36
|
+
]
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
def df_from_calamine_xlsx(file_path: str, sheet_name: str, start_row: int = 0, end_row: int = 0) -> pl.DataFrame:
|
|
38
40
|
read_options = {}
|
|
39
41
|
if start_row > 0:
|
|
40
|
-
read_options[
|
|
42
|
+
read_options["header_row"] = start_row
|
|
41
43
|
if end_row > 0:
|
|
42
|
-
read_options[
|
|
43
|
-
return pl.read_excel(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
sheet_name=sheet_name,
|
|
61
|
-
min_row=min_row,
|
|
62
|
-
max_row=max_row,
|
|
63
|
-
min_col=min_col,
|
|
64
|
-
max_col=max_col)
|
|
44
|
+
read_options["n_rows"] = end_row - start_row
|
|
45
|
+
return pl.read_excel(
|
|
46
|
+
source=file_path, engine="calamine", sheet_name=sheet_name, read_options=read_options, raise_if_empty=False
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def df_from_openpyxl(
|
|
51
|
+
file_path: str,
|
|
52
|
+
sheet_name: str = None,
|
|
53
|
+
min_row: int = None,
|
|
54
|
+
max_row: int = None,
|
|
55
|
+
min_col: int = None,
|
|
56
|
+
max_col: int = None,
|
|
57
|
+
has_headers: bool = True,
|
|
58
|
+
) -> pl.DataFrame:
|
|
59
|
+
data_iterator = raw_data_openpyxl(
|
|
60
|
+
file_path=file_path, sheet_name=sheet_name, min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col
|
|
61
|
+
)
|
|
65
62
|
raw_data = list(data_iterator)
|
|
66
63
|
if len(raw_data) > 0:
|
|
67
64
|
if has_headers:
|
|
68
65
|
columns = []
|
|
69
66
|
for i, col in enumerate(raw_data[0]):
|
|
70
67
|
if col is None:
|
|
71
|
-
col = f
|
|
68
|
+
col = f"_unnamed_column_{i}"
|
|
72
69
|
elif not isinstance(col, str):
|
|
73
70
|
col = str(col)
|
|
74
71
|
columns.append(col)
|
|
75
72
|
columns = ensure_unique(columns)
|
|
76
73
|
df = create_pl_df_type_save(raw_data[1:])
|
|
77
|
-
renames = {o: n for o, n in zip(df.columns, columns)}
|
|
74
|
+
renames = {o: n for o, n in zip(df.columns, columns, strict=False)}
|
|
78
75
|
df = df.rename(renames)
|
|
79
76
|
|
|
80
77
|
else:
|
|
@@ -84,32 +81,37 @@ def df_from_openpyxl(file_path: str,
|
|
|
84
81
|
return pl.DataFrame()
|
|
85
82
|
|
|
86
83
|
|
|
87
|
-
def get_open_xlsx_datatypes(
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
max_col=max_col)
|
|
84
|
+
def get_open_xlsx_datatypes(
|
|
85
|
+
file_path: str,
|
|
86
|
+
sheet_name: str = None,
|
|
87
|
+
min_row: int = None,
|
|
88
|
+
max_row: int = None,
|
|
89
|
+
min_col: int = None,
|
|
90
|
+
max_col: int = None,
|
|
91
|
+
has_headers: bool = True,
|
|
92
|
+
) -> list[FlowfileColumn]:
|
|
93
|
+
data_iterator = raw_data_openpyxl(
|
|
94
|
+
file_path=file_path, sheet_name=sheet_name, min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col
|
|
95
|
+
)
|
|
100
96
|
raw_data = data_iterator
|
|
101
97
|
if has_headers:
|
|
102
|
-
columns = (f
|
|
103
|
-
data_types = (dtype_to_pl_str.get(get_data_type(vals),
|
|
104
|
-
schema = [
|
|
98
|
+
columns = (f"_unnamed_column_{i}" if col is None else col for i, col in enumerate(next(raw_data)))
|
|
99
|
+
data_types = (dtype_to_pl_str.get(get_data_type(vals), "String") for vals in zip(*raw_data, strict=False))
|
|
100
|
+
schema = [
|
|
101
|
+
FlowfileColumn.from_input(n, d, col_index=i)
|
|
102
|
+
for i, (n, d) in enumerate(zip(columns, data_types, strict=False))
|
|
103
|
+
]
|
|
105
104
|
else:
|
|
106
|
-
columns = (f
|
|
107
|
-
data_types = (dtype_to_pl_str.get(get_data_type(vals),
|
|
108
|
-
schema = [
|
|
105
|
+
columns = (f"column_{i}" for i in range(len(next(raw_data))))
|
|
106
|
+
data_types = (dtype_to_pl_str.get(get_data_type(vals), "String") for vals in zip(*raw_data, strict=False))
|
|
107
|
+
schema = [
|
|
108
|
+
FlowfileColumn.from_input(n, d, col_index=i)
|
|
109
|
+
for i, (n, d) in enumerate(zip(columns, data_types, strict=False))
|
|
110
|
+
]
|
|
109
111
|
return schema
|
|
110
112
|
|
|
111
113
|
|
|
112
|
-
def ensure_unique(lst:
|
|
114
|
+
def ensure_unique(lst: list[str]) -> list[str]:
|
|
113
115
|
"""
|
|
114
116
|
Ensures that all elements in the input list are unique by appending
|
|
115
117
|
a version number (e.g., '_v1') to duplicates. It continues adding
|
|
@@ -140,4 +142,3 @@ def ensure_unique(lst: List[str]) -> List[str]:
|
|
|
140
142
|
seen[item] = 1 # First occurrence of the item
|
|
141
143
|
|
|
142
144
|
return result
|
|
143
|
-
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Generator
|
|
2
2
|
from functools import partial
|
|
3
3
|
from math import ceil
|
|
4
4
|
from random import randint
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
5
7
|
import polars as pl
|
|
6
|
-
from
|
|
8
|
+
from faker import Faker
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
def create_fake_data(n_records: int = 1000, optimized: bool = True) -> pl.DataFrame:
|
|
@@ -45,22 +47,25 @@ def create_fake_data(n_records: int = 1000, optimized: bool = True) -> pl.DataFr
|
|
|
45
47
|
|
|
46
48
|
def generate_phone_number():
|
|
47
49
|
return fake.phone_number()
|
|
50
|
+
|
|
48
51
|
data = []
|
|
49
52
|
for i in range(max_n_records):
|
|
50
53
|
name = generate_name()
|
|
51
|
-
data.append(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
54
|
+
data.append(
|
|
55
|
+
dict(
|
|
56
|
+
ID=randint(1, 1000000),
|
|
57
|
+
Name=name,
|
|
58
|
+
Address=generate_address(),
|
|
59
|
+
City=cities[selector(min_range(7000)) - 1],
|
|
60
|
+
Email=generate_email(name),
|
|
61
|
+
Phone=generate_phone_number(),
|
|
62
|
+
DOB=dob[selector(min_range(100_000)) - 1],
|
|
63
|
+
Work=companies[selector(min_range(100_000)) - 1],
|
|
64
|
+
Zipcode=zipcodes[selector(min_range(200_000)) - 1],
|
|
65
|
+
Country=countries[selector(min_range(50)) - 1],
|
|
66
|
+
sales_data=sales_data[selector(max_n_records) - 1],
|
|
67
|
+
)
|
|
68
|
+
)
|
|
64
69
|
if max_n_records < n_records:
|
|
65
70
|
n_duplicates: int = ceil(n_records / max_n_records)
|
|
66
71
|
output = []
|
|
@@ -71,8 +76,9 @@ def create_fake_data(n_records: int = 1000, optimized: bool = True) -> pl.DataFr
|
|
|
71
76
|
return pl.DataFrame(data)
|
|
72
77
|
|
|
73
78
|
|
|
74
|
-
|
|
75
|
-
|
|
79
|
+
def create_fake_data_raw(
|
|
80
|
+
n_records: int = 1000, col_selection: list[str] = None
|
|
81
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
76
82
|
fake = Faker()
|
|
77
83
|
selector = partial(randint, 0)
|
|
78
84
|
|
|
@@ -112,7 +118,7 @@ def create_fake_data_raw(n_records: int = 1000, col_selection: List[str] = None)
|
|
|
112
118
|
"Work": companies,
|
|
113
119
|
"Zipcode": zipcodes,
|
|
114
120
|
"Country": countries,
|
|
115
|
-
"sales_data": sales_data
|
|
121
|
+
"sales_data": sales_data,
|
|
116
122
|
}
|
|
117
123
|
|
|
118
124
|
# Filter the available columns based on col_selection
|
|
@@ -133,6 +139,6 @@ def create_fake_data_raw(n_records: int = 1000, col_selection: List[str] = None)
|
|
|
133
139
|
|
|
134
140
|
def write_fake_data():
|
|
135
141
|
df = create_fake_data()
|
|
136
|
-
df.write_parquet(
|
|
137
|
-
df.write_csv(
|
|
138
|
-
df.write_excel(
|
|
142
|
+
df.write_parquet("backend/tests/data/fake_data.parquet")
|
|
143
|
+
df.write_csv("backend/tests/data/fake_data.csv")
|
|
144
|
+
df.write_excel("backend/tests/data/fake_data.xlsx")
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import *
|
|
1
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import *
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
from typing import Any,
|
|
2
|
-
|
|
1
|
+
from typing import Any, Literal
|
|
2
|
+
|
|
3
3
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
4
|
+
from pydantic import BaseModel
|
|
4
5
|
|
|
5
|
-
OperationType = Literal[
|
|
6
|
+
OperationType = Literal["store", "calculate_schema", "calculate_number_of_records", "write_output", "store_sample"]
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class PolarsOperation(BaseModel):
|
|
@@ -10,14 +11,14 @@ class PolarsOperation(BaseModel):
|
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class PolarsScript(PolarsOperation):
|
|
13
|
-
task_id:
|
|
14
|
-
cache_dir:
|
|
14
|
+
task_id: str | None = None
|
|
15
|
+
cache_dir: str | None = None
|
|
15
16
|
operation_type: OperationType
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class FuzzyJoinInput(BaseModel):
|
|
19
|
-
task_id:
|
|
20
|
-
cache_dir:
|
|
20
|
+
task_id: str | None = None
|
|
21
|
+
cache_dir: str | None = None
|
|
21
22
|
left_df_operation: PolarsOperation
|
|
22
23
|
right_df_operation: PolarsOperation
|
|
23
24
|
fuzzy_maps: list[FuzzyMapping]
|
|
@@ -27,10 +28,11 @@ class FuzzyJoinInput(BaseModel):
|
|
|
27
28
|
|
|
28
29
|
class Status(BaseModel):
|
|
29
30
|
background_task_id: str
|
|
30
|
-
status: Literal[
|
|
31
|
+
status: Literal[
|
|
32
|
+
"Processing", "Completed", "Error", "Unknown Error", "Starting", "Cancelled"
|
|
33
|
+
] # Type alias for status
|
|
31
34
|
file_ref: str
|
|
32
35
|
progress: int = 0
|
|
33
|
-
error_message:
|
|
36
|
+
error_message: str | None = None # Add error_message field
|
|
34
37
|
results: Any
|
|
35
|
-
result_type: Literal[
|
|
36
|
-
|
|
38
|
+
result_type: Literal["polars", "other"] = "polars"
|