Flowfile 0.5.6__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/api.py +8 -6
- flowfile/web/static/assets/{AdminView-c2c7942b.js → AdminView-C4K1DdHI.js} +28 -33
- flowfile/web/static/assets/{CloudConnectionView-7a3042c6.js → CloudConnectionView-BZbPvPUL.js} +39 -50
- flowfile/web/static/assets/{CloudStorageReader-24c54524.css → CloudStorageReader-BDByiqPI.css} +25 -25
- flowfile/web/static/assets/{CloudStorageReader-709c4037.js → CloudStorageReader-DLVukNJ7.js} +30 -35
- flowfile/web/static/assets/{CloudStorageWriter-604c51a8.js → CloudStorageWriter-Bfi-C1QW.js} +32 -37
- flowfile/web/static/assets/{CloudStorageWriter-60547855.css → CloudStorageWriter-y8jL8yjG.css} +24 -24
- flowfile/web/static/assets/{ColumnActionInput-d63d6746.js → ColumnActionInput-BpiCApw9.js} +7 -12
- flowfile/web/static/assets/{ColumnSelector-0c8cd1cd.js → ColumnSelector-CEAwedI7.js} +1 -2
- flowfile/web/static/assets/ContextMenu-CdojQu0w.js +9 -0
- flowfile/web/static/assets/ContextMenu-D12mhsy1.js +9 -0
- flowfile/web/static/assets/ContextMenu-EWUR98va.js +9 -0
- flowfile/web/static/assets/{ContextMenu.vue_vue_type_script_setup_true_lang-774c517c.js → ContextMenu.vue_vue_type_script_setup_true_lang-I4rXXd6G.js} +4 -5
- flowfile/web/static/assets/{CrossJoin-38e5b99a.js → CrossJoin-BOFfxkJO.js} +19 -18
- flowfile/web/static/assets/{CrossJoin-71b4cc10.css → CrossJoin-Cmbyt9im.css} +18 -18
- flowfile/web/static/assets/{CustomNode-76e8f3f5.js → CustomNode-Bhpezobq.js} +12 -17
- flowfile/web/static/assets/{DatabaseConnectionSettings-38155669.js → DatabaseConnectionSettings-Dw3bSJKB.js} +10 -11
- flowfile/web/static/assets/{DatabaseReader-5bf8c75b.css → DatabaseReader-D6pUNUCs.css} +21 -21
- flowfile/web/static/assets/{DatabaseReader-2e549c8f.js → DatabaseReader-m87ghlw0.js} +36 -34
- flowfile/web/static/assets/{DatabaseView-dc877c29.js → DatabaseView-CisSAtpe.js} +30 -38
- flowfile/web/static/assets/{DatabaseWriter-ffb91864.js → DatabaseWriter-Bbj9JLdL.js} +33 -35
- flowfile/web/static/assets/{DatabaseWriter-bdcf2c8b.css → DatabaseWriter-RBqdFLj8.css} +17 -17
- flowfile/web/static/assets/{DesignerView-a4466dab.js → DesignerView-DemDevTQ.js} +1752 -2054
- flowfile/web/static/assets/{DesignerView-71d4e9a1.css → DesignerView-Dm6OzlIc.css} +209 -168
- flowfile/web/static/assets/{DocumentationView-979afc84.js → DocumentationView-BrC1ZR3H.js} +3 -4
- flowfile/web/static/assets/{ExploreData-e4b92aaf.js → ExploreData-BMKcDuRb.js} +8 -10
- flowfile/web/static/assets/{ExternalSource-d08e7227.js → ExternalSource-BXrNNS-f.js} +40 -42
- flowfile/web/static/assets/{ExternalSource-7ac7373f.css → ExternalSource-NB6WVl5R.css} +14 -14
- flowfile/web/static/assets/{Filter-7add806d.js → Filter-C2MjsN6P.js} +36 -33
- flowfile/web/static/assets/{Filter-7494ea97.css → Filter-DCMGGuGC.css} +9 -9
- flowfile/web/static/assets/{Formula-53d58c43.css → Formula-BYafbDj8.css} +4 -4
- flowfile/web/static/assets/{Formula-36ab24d2.js → Formula-ufuy4mVD.js} +27 -26
- flowfile/web/static/assets/{FuzzyMatch-ad6361d6.css → FuzzyMatch-BGJAwgd0.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-cc01bb04.js → FuzzyMatch-BOHODq3h.js} +36 -38
- flowfile/web/static/assets/{GraphSolver-4fb98f3b.js → GraphSolver-B6ZzpNGO.js} +23 -21
- flowfile/web/static/assets/{GraphSolver-4b4d7db9.css → GraphSolver-DFN83sj3.css} +4 -4
- flowfile/web/static/assets/{GroupBy-b3c8f429.js → GroupBy-B9BRNcfe.js} +30 -29
- flowfile/web/static/assets/{Sort-4abb7fae.css → GroupBy-x4ooP5np.css} +1 -1
- flowfile/web/static/assets/Join-Bx_g5bZz.css +118 -0
- flowfile/web/static/assets/{Join-096b7b26.js → Join-DsBEy1IH.js} +48 -43
- flowfile/web/static/assets/{LoginView-c33a246a.js → LoginView-Ct0rhdcO.js} +1 -2
- flowfile/web/static/assets/{ManualInput-39111f19.css → ManualInput-DlZmtMdt.css} +48 -48
- flowfile/web/static/assets/{ManualInput-7307e9b1.js → ManualInput-bC4BUgnG.js} +40 -41
- flowfile/web/static/assets/{MultiSelect-14822c48.js → MultiSelect-DIQ8PuTC.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-90c4d340.js → MultiSelect.vue_vue_type_script_setup_true_lang-BefHfqTI.js} +1 -1
- flowfile/web/static/assets/{NodeDesigner-5036c392.js → NodeDesigner-D39yzr2k.js} +178 -208
- flowfile/web/static/assets/{NodeDesigner-94cd4dd3.css → NodeDesigner-R0l6sYyY.css} +76 -76
- flowfile/web/static/assets/{NumericInput-15cf3b72.js → NumericInput-DMSX3oOr.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-91e679d7.js → NumericInput.vue_vue_type_script_setup_true_lang-d0YlVHAl.js} +1 -1
- flowfile/web/static/assets/{Output-1f8ed42c.js → Output-D0VoXGcW.js} +26 -34
- flowfile/web/static/assets/{Output-692dd25d.css → Output-DsmglIDy.css} +5 -5
- flowfile/web/static/assets/{Pivot-0e153f4e.js → Pivot-BnMB4sEe.js} +26 -26
- flowfile/web/static/assets/{Pivot-0eda81b4.css → Pivot-qKTyWxop.css} +4 -4
- flowfile/web/static/assets/{PivotValidation-81ec2a33.js → PivotValidation-B2lWvugt.js} +7 -9
- flowfile/web/static/assets/{PivotValidation-5a4f7c79.js → PivotValidation-BPlhRjpL.js} +7 -9
- flowfile/web/static/assets/{PolarsCode-a39f15ac.js → PolarsCode-5h0tHnWR.js} +22 -20
- flowfile/web/static/assets/{PopOver-ddcfe4f6.js → PopOver-BHpt5rsj.js} +5 -9
- flowfile/web/static/assets/{PopOver-d96599db.css → PopOver-CyYM4-rV.css} +1 -1
- flowfile/web/static/assets/{Read-90f366bc.css → Read-DJxkrTb_.css} +10 -10
- flowfile/web/static/assets/Read-TsLEFh3B.js +227 -0
- flowfile/web/static/assets/{RecordCount-e9048ccd.js → RecordCount-DkVixq9v.js} +18 -17
- flowfile/web/static/assets/{RecordId-ad02521d.js → RecordId-C2UEGlCf.js} +42 -39
- flowfile/web/static/assets/{SQLQueryComponent-2eeecf0b.js → SQLQueryComponent-Dr5KMoD3.js} +2 -3
- flowfile/web/static/assets/{Sample-9a68c23d.js → Sample-Cb3eQNmd.js} +30 -30
- flowfile/web/static/assets/{SecretSelector-2429f35a.js → SecretSelector-De2L2bSx.js} +3 -4
- flowfile/web/static/assets/{SecretsView-c6afc915.js → SecretsView-CheC9BPV.js} +13 -16
- flowfile/web/static/assets/{Select-fcd002b6.js → Select-CI8TloRs.js} +41 -36
- flowfile/web/static/assets/{SettingsSection-5ce15962.js → SettingsSection-B39ulIiI.js} +1 -2
- flowfile/web/static/assets/{SettingsSection-c6b1362c.js → SettingsSection-BiCc7S9h.js} +1 -2
- flowfile/web/static/assets/{SettingsSection-cebb91d5.js → SettingsSection-CITK_R7o.js} +2 -3
- flowfile/web/static/assets/{SettingsSection-26fe48d4.css → SettingsSection-D2GgY-Aq.css} +4 -4
- flowfile/web/static/assets/{SetupView-2d12e01f.js → SetupView-C1aXRDvp.js} +1 -2
- flowfile/web/static/assets/{SingleSelect-b67de4eb.js → SingleSelect-Kr_hz90m.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-eedb70eb.js → SingleSelect.vue_vue_type_script_setup_true_lang-Rxht5Z5N.js} +1 -1
- flowfile/web/static/assets/{SliderInput-fd8134ac.js → SliderInput-CLqpCxCb.js} +1 -2
- flowfile/web/static/assets/{GroupBy-5792782d.css → Sort-BIt2kc_p.css} +1 -1
- flowfile/web/static/assets/{Sort-c005a573.js → Sort-Dnw_J6Qi.js} +25 -25
- flowfile/web/static/assets/{TextInput-1bb31dab.js → TextInput-wdlunIZC.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-a51fe730.js → TextInput.vue_vue_type_script_setup_true_lang-Bcj3ywzv.js} +1 -1
- flowfile/web/static/assets/{TextToRows-4f363753.js → TextToRows-BhtyGWPq.js} +42 -49
- flowfile/web/static/assets/{TextToRows-12afb4f4.css → TextToRows-DivDOLDx.css} +9 -9
- flowfile/web/static/assets/{ToggleSwitch-ca0f2e5e.js → ToggleSwitch-B-6WzfFf.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-49aa41d8.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-Cj8LqT-b.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-f6147968.js → UnavailableFields-Yf6XSqFB.js} +2 -3
- flowfile/web/static/assets/{Union-c65f17b7.js → Union-CwpjeKYC.js} +20 -23
- flowfile/web/static/assets/{Unpivot-b6ad6427.css → Union-DQJcpp3-.css} +6 -6
- flowfile/web/static/assets/{Unique-a1d96fb2.js → Unique-25v3urqH.js} +75 -74
- flowfile/web/static/assets/{Union-d6a8d7d5.css → Unpivot-Deqh1gtI.css} +6 -6
- flowfile/web/static/assets/{Unpivot-c2657ff3.js → Unpivot-sYcTTXrq.js} +34 -27
- flowfile/web/static/assets/{UnpivotValidation-28e29a3b.js → UnpivotValidation-C5DDEKY2.js} +5 -7
- flowfile/web/static/assets/VueGraphicWalker-B8l1_Z92.js +131 -0
- flowfile/web/static/assets/VueGraphicWalker-Da_1-3me.css +21 -0
- flowfile/web/static/assets/{api-df48ec50.js → api-C0LvF-0C.js} +1 -1
- flowfile/web/static/assets/{api-ee542cf7.js → api-DaC83EO_.js} +1 -1
- flowfile/web/static/assets/client-C8Ygr6Gb.js +42 -0
- flowfile/web/static/assets/{dropDown-7576a76a.js → dropDown-D5YXaPRR.js} +7 -12
- flowfile/web/static/assets/{fullEditor-7583bef5.js → fullEditor-BVYnWm05.js} +300 -18
- flowfile/web/static/assets/genericNodeSettings-2wAu-QKn.css +75 -0
- flowfile/web/static/assets/genericNodeSettings-BBtW_Cpz.js +590 -0
- flowfile/web/static/assets/{VueGraphicWalker-2fc3ddd4.js → graphic-walker.es-VrK6vdGE.js} +92305 -89751
- flowfile/web/static/assets/index-BCJxPfM5.js +6693 -0
- flowfile/web/static/assets/{index-057d770d.js → index-CHPMUR0d.js} +150 -170
- flowfile/web/static/assets/index-DPkoZWq8.js +32 -0
- flowfile/web/static/assets/index-DnW_KC_I.js +277 -0
- flowfile/web/static/assets/index-UFXyfirV.css +10797 -0
- flowfile/web/static/assets/index-bcuE0Z0p.js +87456 -0
- flowfile/web/static/assets/{node.types-2c15bb7e.js → node.types-Dl4gtSW9.js} +2 -2
- flowfile/web/static/assets/{outputCsv-c492b15e.js → outputCsv-BELuBiJZ.js} +1 -2
- flowfile/web/static/assets/outputCsv-CdGkv-fN.css +2581 -0
- flowfile/web/static/assets/{outputExcel-13bfa10f.js → outputExcel-D0TTNM79.js} +1 -2
- flowfile/web/static/assets/{outputParquet-9be1523a.js → outputParquet-Cz9EbRHj.js} +1 -2
- flowfile/web/static/assets/{readCsv-5a49a8c9.js → readCsv-7bd3kUMI.js} +1 -2
- flowfile/web/static/assets/{readExcel-27c30ad8.js → readExcel-Cq8CCwIv.js} +3 -4
- flowfile/web/static/assets/{readParquet-c5244ad5.css → readParquet-CRDmBrsp.css} +4 -4
- flowfile/web/static/assets/{readParquet-446bde68.js → readParquet-DjR4mRaj.js} +4 -5
- flowfile/web/static/assets/{secrets.api-34431884.js → secrets.api-C9o2KE5V.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-5754a2b1.js → selectDynamic-Bl5FVsME.js} +5 -7
- flowfile/web/static/assets/useNodeSettings-dMS9zmh_.js +69 -0
- flowfile/web/static/assets/{vue-codemirror.esm-8f46fb36.js → vue-codemirror.esm-CwaYwln0.js} +3469 -3064
- flowfile/web/static/assets/{vue-content-loader.es-808fe33a.js → vue-content-loader.es-CMoRXo7N.js} +3 -3
- flowfile/web/static/index.html +2 -3
- {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/METADATA +2 -1
- flowfile-0.6.1.dist-info/RECORD +417 -0
- {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/WHEEL +1 -1
- flowfile_core/auth/password.py +1 -0
- flowfile_core/database/init_db.py +7 -5
- flowfile_core/fileExplorer/funcs.py +2 -2
- flowfile_core/flowfile/code_generator/code_generator.py +13 -11
- flowfile_core/flowfile/filter_expressions.py +327 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +61 -59
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +3 -29
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +45 -14
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +20 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/streaming.py +206 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +146 -24
- flowfile_core/flowfile/flow_graph.py +504 -190
- flowfile_core/flowfile/flow_node/__init__.py +32 -0
- flowfile_core/flowfile/flow_node/executor.py +404 -0
- flowfile_core/flowfile/flow_node/flow_node.py +207 -106
- flowfile_core/flowfile/flow_node/models.py +40 -0
- flowfile_core/flowfile/flow_node/output_field_config_applier.py +217 -0
- flowfile_core/flowfile/flow_node/schema_utils.py +78 -0
- flowfile_core/flowfile/flow_node/state.py +155 -0
- flowfile_core/flowfile/history_manager.py +401 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +9 -0
- flowfile_core/flowfile/manage/io_flowfile.py +3 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +20 -4
- flowfile_core/flowfile/util/execution_orderer.py +89 -36
- flowfile_core/routes/auth.py +8 -9
- flowfile_core/routes/routes.py +320 -101
- flowfile_core/routes/user_defined_components.py +18 -16
- flowfile_core/schemas/history_schema.py +220 -0
- flowfile_core/schemas/input_schema.py +130 -6
- flowfile_core/schemas/schemas.py +9 -0
- flowfile_core/schemas/transform_schema.py +27 -5
- flowfile_core/schemas/yaml_types.py +23 -5
- flowfile_frame/adding_expr.py +18 -126
- flowfile_frame/callable_utils.py +261 -0
- flowfile_frame/database/connection_manager.py +0 -1
- flowfile_frame/expr.py +8 -4
- flowfile_frame/flow_frame.py +41 -41
- flowfile_frame/lazy.py +3 -12
- flowfile_frame/lazy_methods.py +5 -64
- flowfile_frame/utils.py +13 -32
- flowfile_worker/funcs.py +6 -4
- flowfile_worker/main.py +2 -0
- flowfile_worker/models.py +31 -11
- flowfile_worker/routes.py +60 -35
- flowfile_worker/spawner.py +7 -1
- flowfile_worker/streaming.py +335 -0
- flowfile/web/static/assets/ContextMenu-366bf1b4.js +0 -9
- flowfile/web/static/assets/ContextMenu-85cf5b44.js +0 -9
- flowfile/web/static/assets/ContextMenu-9d28ae6d.js +0 -9
- flowfile/web/static/assets/Join-28b5e18f.css +0 -109
- flowfile/web/static/assets/Read-39b63932.js +0 -222
- flowfile/web/static/assets/VueGraphicWalker-430f0b86.css +0 -6
- flowfile/web/static/assets/database_reader-ce1e55f3.svg +0 -24
- flowfile/web/static/assets/database_writer-b4ad0753.svg +0 -23
- flowfile/web/static/assets/element-icons-9c88a535.woff +0 -0
- flowfile/web/static/assets/element-icons-de5eb258.ttf +0 -0
- flowfile/web/static/assets/genericNodeSettings-0155288b.js +0 -136
- flowfile/web/static/assets/genericNodeSettings-3b2507ea.css +0 -46
- flowfile/web/static/assets/index-aeec439d.js +0 -38
- flowfile/web/static/assets/index-ca6799de.js +0 -62760
- flowfile/web/static/assets/index-d60c9dd4.css +0 -10777
- flowfile/web/static/assets/nodeInput-d478b9ac.js +0 -2
- flowfile/web/static/assets/outputCsv-cc84e09f.css +0 -2499
- flowfile-0.5.6.dist-info/RECORD +0 -407
- /flowfile/web/static/assets/{AdminView-f53bad23.css → AdminView-B2Dthl3u.css} +0 -0
- /flowfile/web/static/assets/{CloudConnectionView-cf85f943.css → CloudConnectionView-BdFYGWV7.css} +0 -0
- /flowfile/web/static/assets/{ColumnActionInput-c44b7aee.css → ColumnActionInput-dCasSIC9.css} +0 -0
- /flowfile/web/static/assets/{ColumnSelector-371637fb.css → ColumnSelector-j6sEOjo1.css} +0 -0
- /flowfile/web/static/assets/{CustomNode-edb9b939.css → CustomNode-VPlajG0j.css} +0 -0
- /flowfile/web/static/assets/{DatabaseConnectionSettings-c20a1e16.css → DatabaseConnectionSettings-B78hXYgu.css} +0 -0
- /flowfile/web/static/assets/{DatabaseView-6655afd6.css → DatabaseView-B-_adk1s.css} +0 -0
- /flowfile/web/static/assets/{DocumentationView-9ea6e871.css → DocumentationView-CL7iipFL.css} +0 -0
- /flowfile/web/static/assets/{ExploreData-10c5acc8.css → ExploreData-DHjv0Plr.css} +0 -0
- /flowfile/web/static/assets/{LoginView-d325d632.css → LoginView-DN1BXY3e.css} +0 -0
- /flowfile/web/static/assets/{PivotValidation-0e905b1a.css → PivotValidation-DK-FARWe.css} +0 -0
- /flowfile/web/static/assets/{PivotValidation-41b57ad6.css → PivotValidation-FUa9F47u.css} +0 -0
- /flowfile/web/static/assets/{PolarsCode-2b1f1f23.css → PolarsCode-G-gRSrSc.css} +0 -0
- /flowfile/web/static/assets/{SQLQueryComponent-edb90b98.css → SQLQueryComponent-oAbWw0r-.css} +0 -0
- /flowfile/web/static/assets/{SecretSelector-6329f743.css → SecretSelector-CJSadIZx.css} +0 -0
- /flowfile/web/static/assets/{SecretsView-aa291340.css → SecretsView-DbzIRAba.css} +0 -0
- /flowfile/web/static/assets/{SettingsSection-8f980839.css → SettingsSection-BGcJnH6q.css} +0 -0
- /flowfile/web/static/assets/{SettingsSection-07fbbc39.css → SettingsSection-DDWn_EGW.css} +0 -0
- /flowfile/web/static/assets/{SetupView-ec26f76a.css → SetupView-CI1nd-5Z.css} +0 -0
- /flowfile/web/static/assets/{SliderInput-f2e4f23c.css → SliderInput-BRk-q_Dk.css} +0 -0
- /flowfile/web/static/assets/{UnavailableFields-394a1f78.css → UnavailableFields-DRKDImKe.css} +0 -0
- /flowfile/web/static/assets/{Unique-2b705521.css → Unique-Absb0aON.css} +0 -0
- /flowfile/web/static/assets/{UnpivotValidation-d5ca3b7b.css → UnpivotValidation-DSBkFgS-.css} +0 -0
- /flowfile/web/static/assets/{airbyte-292aa232.png → airbyte-W0xvIXwZ.png} +0 -0
- /flowfile/web/static/assets/{cloud_storage_reader-aa1415d6.png → cloud_storage_reader-3GpSCk90.png} +0 -0
- /flowfile/web/static/assets/{cross_join-d30c0290.png → cross_join-B0qpgYoV.png} +0 -0
- /flowfile/web/static/assets/{dropDown-1d6acbd9.css → dropDown-CE0VF5_P.css} +0 -0
- /flowfile/web/static/assets/{explore_data-8a0a2861.png → explore_data-tX6olPPL.png} +0 -0
- /flowfile/web/static/assets/{fa-brands-400-808443ae.ttf → fa-brands-400-D1LuMI3I.ttf} +0 -0
- /flowfile/web/static/assets/{fa-brands-400-d7236a19.woff2 → fa-brands-400-D_cYUPeE.woff2} +0 -0
- /flowfile/web/static/assets/{fa-regular-400-e3456d12.woff2 → fa-regular-400-BjRzuEpd.woff2} +0 -0
- /flowfile/web/static/assets/{fa-regular-400-54cf6086.ttf → fa-regular-400-DZaxPHgR.ttf} +0 -0
- /flowfile/web/static/assets/{fa-solid-900-aa759986.woff2 → fa-solid-900-CTAAxXor.woff2} +0 -0
- /flowfile/web/static/assets/{fa-solid-900-d2f05935.ttf → fa-solid-900-D0aA9rwL.ttf} +0 -0
- /flowfile/web/static/assets/{fa-v4compatibility-0ce9033c.woff2 → fa-v4compatibility-C9RhG_FT.woff2} +0 -0
- /flowfile/web/static/assets/{fa-v4compatibility-30f6abf6.ttf → fa-v4compatibility-CCth-dXg.ttf} +0 -0
- /flowfile/web/static/assets/{filter-d7708bda.png → filter-WRdZyUOw.png} +0 -0
- /flowfile/web/static/assets/{formula-eeeb1611.png → formula-CgM7uHVI.png} +0 -0
- /flowfile/web/static/assets/{fullEditor-fe9f7e18.css → fullEditor-CmDI7T9F.css} +0 -0
- /flowfile/web/static/assets/{fuzzy_match-40c161b2.png → fuzzy_match-Yon3k5Tc.png} +0 -0
- /flowfile/web/static/assets/{graph_solver-8b7888b8.png → graph_solver-BlMrBttD.png} +0 -0
- /flowfile/web/static/assets/{group_by-80561fc3.png → group_by-Gici0CSS.png} +0 -0
- /flowfile/web/static/assets/{input_data-ab2eb678.png → input_data-BRdGecLc.png} +0 -0
- /flowfile/web/static/assets/{join-349043ae.png → join-BITWRu73.png} +0 -0
- /flowfile/web/static/assets/{manual_input-ae98f31d.png → manual_input-CFvo_EUS.png} +0 -0
- /flowfile/web/static/assets/{old_join-5d0eb604.png → old_join-B9bkpPqv.png} +0 -0
- /flowfile/web/static/assets/{output-06ec0371.png → output-Dp7-ZpC4.png} +0 -0
- /flowfile/web/static/assets/{outputExcel-f5d272b2.css → outputExcel-CKgRe2iT.css} +0 -0
- /flowfile/web/static/assets/{outputParquet-54597c3c.css → outputParquet-d7j407cK.css} +0 -0
- /flowfile/web/static/assets/{pivot-9660df51.png → pivot-DSxKhNlD.png} +0 -0
- /flowfile/web/static/assets/{polars_code-05ce5dc6.png → polars_code-DxiztZ1c.png} +0 -0
- /flowfile/web/static/assets/{readCsv-3bfac4c3.css → readCsv-BG-1Jilp.css} +0 -0
- /flowfile/web/static/assets/{readExcel-3db6b763.css → readExcel-DBQXKPtC.css} +0 -0
- /flowfile/web/static/assets/{record_count-dab44eb5.png → record_count-DCeaLtpS.png} +0 -0
- /flowfile/web/static/assets/{record_id-0b15856b.png → record_id-FeUjyIFh.png} +0 -0
- /flowfile/web/static/assets/{sample-693a88b5.png → sample-DeqfRiB-.png} +0 -0
- /flowfile/web/static/assets/{select-b0d0437a.png → select-D4JjbdjS.png} +0 -0
- /flowfile/web/static/assets/{selectDynamic-f2fb394f.css → selectDynamic-CjeTPUUo.css} +0 -0
- /flowfile/web/static/assets/{sort-2aa579f0.png → sort-DGwUG9WS.png} +0 -0
- /flowfile/web/static/assets/{summarize-2a099231.png → summarize-DFaNHpfp.png} +0 -0
- /flowfile/web/static/assets/{text_to_rows-859b29ea.png → text_to_rows-BdiAewrN.png} +0 -0
- /flowfile/web/static/assets/{union-2d8609f4.png → union-DCK-LSMq.png} +0 -0
- /flowfile/web/static/assets/{unique-1958b98a.png → unique-CdP3zZIq.png} +0 -0
- /flowfile/web/static/assets/{unpivot-d3cb4b5b.png → unpivot-CHttrEt8.png} +0 -0
- /flowfile/web/static/assets/{user-defined-icon-0ae16c90.png → user-defined-icon-BcIp2Vzo.png} +0 -0
- /flowfile/web/static/assets/{view-7a0f0be1.png → view-DUSRwjvq.png} +0 -0
- {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -7,7 +7,7 @@ from collections.abc import Callable, Generator, Iterable
|
|
|
7
7
|
from copy import deepcopy
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from math import ceil
|
|
10
|
-
from typing import Any, Literal, TypeVar
|
|
10
|
+
from typing import Any, Literal, TypeVar
|
|
11
11
|
|
|
12
12
|
import polars as pl
|
|
13
13
|
|
|
@@ -37,7 +37,11 @@ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import (
|
|
|
37
37
|
assert_if_flowfile_schema,
|
|
38
38
|
convert_stats_to_column_info,
|
|
39
39
|
)
|
|
40
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import
|
|
40
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import (
|
|
41
|
+
cast_str_to_polars_type,
|
|
42
|
+
get_polars_type,
|
|
43
|
+
safe_eval_pl_type,
|
|
44
|
+
)
|
|
41
45
|
from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.prepare_for_fuzzy_match import prepare_for_fuzzy_match
|
|
42
46
|
from flowfile_core.flowfile.flow_data_engine.join import (
|
|
43
47
|
get_col_name_to_delete,
|
|
@@ -171,7 +175,7 @@ class FlowDataEngine:
|
|
|
171
175
|
name: str = None
|
|
172
176
|
number_of_records: int = None
|
|
173
177
|
errors: list = None
|
|
174
|
-
_schema: list[
|
|
178
|
+
_schema: list[FlowfileColumn] | None = None
|
|
175
179
|
|
|
176
180
|
# Configuration attributes
|
|
177
181
|
_optimize_memory: bool = False
|
|
@@ -204,13 +208,11 @@ class FlowDataEngine:
|
|
|
204
208
|
|
|
205
209
|
def __init__(
|
|
206
210
|
self,
|
|
207
|
-
raw_data:
|
|
208
|
-
list[dict], list[Any], dict[str, Any], "ParquetFile", pl.DataFrame, pl.LazyFrame, input_schema.RawData
|
|
209
|
-
] = None,
|
|
211
|
+
raw_data: list[dict] | list[Any] | dict[str, Any] | ParquetFile | pl.DataFrame | pl.LazyFrame | input_schema.RawData = None,
|
|
210
212
|
path_ref: str = None,
|
|
211
213
|
name: str = None,
|
|
212
214
|
optimize_memory: bool = True,
|
|
213
|
-
schema: list[
|
|
215
|
+
schema: list[FlowfileColumn] | list[str] | pl.Schema = None,
|
|
214
216
|
number_of_records: int = None,
|
|
215
217
|
calculate_schema_stats: bool = False,
|
|
216
218
|
streamable: bool = True,
|
|
@@ -523,7 +525,7 @@ class FlowDataEngine:
|
|
|
523
525
|
@classmethod
|
|
524
526
|
def from_cloud_storage_obj(
|
|
525
527
|
cls, settings: cloud_storage_schemas.CloudStorageReadSettingsInternal
|
|
526
|
-
) ->
|
|
528
|
+
) -> FlowDataEngine:
|
|
527
529
|
"""Creates a FlowDataEngine from an object in cloud storage.
|
|
528
530
|
|
|
529
531
|
This method supports reading from various cloud storage providers like AWS S3,
|
|
@@ -607,7 +609,7 @@ class FlowDataEngine:
|
|
|
607
609
|
storage_options: dict[str, Any],
|
|
608
610
|
credential_provider: Callable | None,
|
|
609
611
|
read_settings: cloud_storage_schemas.CloudStorageReadSettings,
|
|
610
|
-
) ->
|
|
612
|
+
) -> FlowDataEngine:
|
|
611
613
|
"""Reads Iceberg table(s) from cloud storage."""
|
|
612
614
|
raise NotImplementedError("Failed to read Iceberg table from cloud storage: Not yet implemented")
|
|
613
615
|
|
|
@@ -618,7 +620,7 @@ class FlowDataEngine:
|
|
|
618
620
|
storage_options: dict[str, Any],
|
|
619
621
|
credential_provider: Callable | None,
|
|
620
622
|
is_directory: bool,
|
|
621
|
-
) ->
|
|
623
|
+
) -> FlowDataEngine:
|
|
622
624
|
"""Reads Parquet file(s) from cloud storage."""
|
|
623
625
|
try:
|
|
624
626
|
# Use scan_parquet for lazy evaluation
|
|
@@ -656,7 +658,7 @@ class FlowDataEngine:
|
|
|
656
658
|
storage_options: dict[str, Any],
|
|
657
659
|
credential_provider: Callable | None,
|
|
658
660
|
read_settings: cloud_storage_schemas.CloudStorageReadSettings,
|
|
659
|
-
) ->
|
|
661
|
+
) -> FlowDataEngine:
|
|
660
662
|
"""Reads a Delta Lake table from cloud storage."""
|
|
661
663
|
try:
|
|
662
664
|
logger.info("Reading Delta file from cloud storage...")
|
|
@@ -687,7 +689,7 @@ class FlowDataEngine:
|
|
|
687
689
|
storage_options: dict[str, Any],
|
|
688
690
|
credential_provider: Callable | None,
|
|
689
691
|
read_settings: cloud_storage_schemas.CloudStorageReadSettings,
|
|
690
|
-
) ->
|
|
692
|
+
) -> FlowDataEngine:
|
|
691
693
|
"""Reads CSV file(s) from cloud storage."""
|
|
692
694
|
try:
|
|
693
695
|
scan_kwargs = {
|
|
@@ -730,7 +732,7 @@ class FlowDataEngine:
|
|
|
730
732
|
storage_options: dict[str, Any],
|
|
731
733
|
credential_provider: Callable | None,
|
|
732
734
|
is_directory: bool,
|
|
733
|
-
) ->
|
|
735
|
+
) -> FlowDataEngine:
|
|
734
736
|
"""Reads JSON file(s) from cloud storage."""
|
|
735
737
|
try:
|
|
736
738
|
if is_directory:
|
|
@@ -821,6 +823,7 @@ class FlowDataEngine:
|
|
|
821
823
|
if self.lazy and isinstance(df, pl.DataFrame):
|
|
822
824
|
raise Exception("Cannot set a non-lazy dataframe to a lazy flowfile")
|
|
823
825
|
self._data_frame = df
|
|
826
|
+
self._schema = None
|
|
824
827
|
|
|
825
828
|
@staticmethod
|
|
826
829
|
def _create_schema_stats_from_pl_schema(pl_schema: pl.Schema) -> list[dict]:
|
|
@@ -968,7 +971,7 @@ class FlowDataEngine:
|
|
|
968
971
|
|
|
969
972
|
def do_group_by(
|
|
970
973
|
self, group_by_input: transform_schemas.GroupByInput, calculate_schema_stats: bool = True
|
|
971
|
-
) ->
|
|
974
|
+
) -> FlowDataEngine:
|
|
972
975
|
"""Performs a group-by operation on the DataFrame.
|
|
973
976
|
|
|
974
977
|
Args:
|
|
@@ -1008,7 +1011,7 @@ class FlowDataEngine:
|
|
|
1008
1011
|
calculate_schema_stats=calculate_schema_stats,
|
|
1009
1012
|
)
|
|
1010
1013
|
|
|
1011
|
-
def do_sort(self, sorts: list[transform_schemas.SortByInput]) ->
|
|
1014
|
+
def do_sort(self, sorts: list[transform_schemas.SortByInput]) -> FlowDataEngine:
|
|
1012
1015
|
"""Sorts the DataFrame by one or more columns.
|
|
1013
1016
|
|
|
1014
1017
|
Args:
|
|
@@ -1027,7 +1030,7 @@ class FlowDataEngine:
|
|
|
1027
1030
|
|
|
1028
1031
|
def change_column_types(
|
|
1029
1032
|
self, transforms: list[transform_schemas.SelectInput], calculate_schema: bool = False
|
|
1030
|
-
) ->
|
|
1033
|
+
) -> FlowDataEngine:
|
|
1031
1034
|
"""Changes the data type of one or more columns.
|
|
1032
1035
|
|
|
1033
1036
|
Args:
|
|
@@ -1040,7 +1043,7 @@ class FlowDataEngine:
|
|
|
1040
1043
|
"""
|
|
1041
1044
|
dtypes = [dtype.base_type() for dtype in self.data_frame.collect_schema().dtypes()]
|
|
1042
1045
|
idx_mapping = list(
|
|
1043
|
-
(transform.old_name, self.cols_idx.get(transform.old_name),
|
|
1046
|
+
(transform.old_name, self.cols_idx.get(transform.old_name), get_polars_type(transform.polars_type))
|
|
1044
1047
|
for transform in transforms
|
|
1045
1048
|
if transform.data_type is not None
|
|
1046
1049
|
)
|
|
@@ -1122,7 +1125,7 @@ class FlowDataEngine:
|
|
|
1122
1125
|
return self.data_frame.to_dict(as_series=False)
|
|
1123
1126
|
|
|
1124
1127
|
@classmethod
|
|
1125
|
-
def create_from_external_source(cls, external_source: ExternalDataSource) ->
|
|
1128
|
+
def create_from_external_source(cls, external_source: ExternalDataSource) -> FlowDataEngine:
|
|
1126
1129
|
"""Creates a FlowDataEngine from an external data source.
|
|
1127
1130
|
|
|
1128
1131
|
Args:
|
|
@@ -1142,7 +1145,7 @@ class FlowDataEngine:
|
|
|
1142
1145
|
return ff
|
|
1143
1146
|
|
|
1144
1147
|
@classmethod
|
|
1145
|
-
def create_from_sql(cls, sql: str, conn: Any) ->
|
|
1148
|
+
def create_from_sql(cls, sql: str, conn: Any) -> FlowDataEngine:
|
|
1146
1149
|
"""Creates a FlowDataEngine by executing a SQL query.
|
|
1147
1150
|
|
|
1148
1151
|
Args:
|
|
@@ -1155,7 +1158,7 @@ class FlowDataEngine:
|
|
|
1155
1158
|
return cls(pl.read_sql(sql, conn))
|
|
1156
1159
|
|
|
1157
1160
|
@classmethod
|
|
1158
|
-
def create_from_schema(cls, schema: list[FlowfileColumn]) ->
|
|
1161
|
+
def create_from_schema(cls, schema: list[FlowfileColumn]) -> FlowDataEngine:
|
|
1159
1162
|
"""Creates an empty FlowDataEngine from a schema definition.
|
|
1160
1163
|
|
|
1161
1164
|
Args:
|
|
@@ -1172,7 +1175,7 @@ class FlowDataEngine:
|
|
|
1172
1175
|
return cls(df, schema=schema, calculate_schema_stats=False, number_of_records=0)
|
|
1173
1176
|
|
|
1174
1177
|
@classmethod
|
|
1175
|
-
def create_from_path(cls, received_table: input_schema.ReceivedTable) ->
|
|
1178
|
+
def create_from_path(cls, received_table: input_schema.ReceivedTable) -> FlowDataEngine:
|
|
1176
1179
|
"""Creates a FlowDataEngine from a local file path.
|
|
1177
1180
|
|
|
1178
1181
|
Supports various file types like CSV, Parquet, and Excel.
|
|
@@ -1200,7 +1203,7 @@ class FlowDataEngine:
|
|
|
1200
1203
|
return flow_file
|
|
1201
1204
|
|
|
1202
1205
|
@classmethod
|
|
1203
|
-
def create_random(cls, number_of_records: int = 1000) ->
|
|
1206
|
+
def create_random(cls, number_of_records: int = 1000) -> FlowDataEngine:
|
|
1204
1207
|
"""Creates a FlowDataEngine with randomly generated data.
|
|
1205
1208
|
|
|
1206
1209
|
Useful for testing and examples.
|
|
@@ -1214,7 +1217,7 @@ class FlowDataEngine:
|
|
|
1214
1217
|
return cls(create_fake_data(number_of_records))
|
|
1215
1218
|
|
|
1216
1219
|
@classmethod
|
|
1217
|
-
def generate_enumerator(cls, length: int = 1000, output_name: str = "output_column") ->
|
|
1220
|
+
def generate_enumerator(cls, length: int = 1000, output_name: str = "output_column") -> FlowDataEngine:
|
|
1218
1221
|
"""Generates a FlowDataEngine with a single column containing a sequence of integers.
|
|
1219
1222
|
|
|
1220
1223
|
Args:
|
|
@@ -1277,7 +1280,7 @@ class FlowDataEngine:
|
|
|
1277
1280
|
|
|
1278
1281
|
return flow_file_columns
|
|
1279
1282
|
|
|
1280
|
-
def split(self, split_input: transform_schemas.TextToRowsInput) ->
|
|
1283
|
+
def split(self, split_input: transform_schemas.TextToRowsInput) -> FlowDataEngine:
|
|
1281
1284
|
"""Splits a column's text values into multiple rows based on a delimiter.
|
|
1282
1285
|
|
|
1283
1286
|
This operation is often referred to as "exploding" the DataFrame, as it
|
|
@@ -1304,7 +1307,7 @@ class FlowDataEngine:
|
|
|
1304
1307
|
|
|
1305
1308
|
return FlowDataEngine(df)
|
|
1306
1309
|
|
|
1307
|
-
def unpivot(self, unpivot_input: transform_schemas.UnpivotInput) ->
|
|
1310
|
+
def unpivot(self, unpivot_input: transform_schemas.UnpivotInput) -> FlowDataEngine:
|
|
1308
1311
|
"""Converts the DataFrame from a wide to a long format.
|
|
1309
1312
|
|
|
1310
1313
|
This is the inverse of a pivot operation, taking columns and transforming
|
|
@@ -1328,7 +1331,7 @@ class FlowDataEngine:
|
|
|
1328
1331
|
|
|
1329
1332
|
return FlowDataEngine(result)
|
|
1330
1333
|
|
|
1331
|
-
def do_pivot(self, pivot_input: transform_schemas.PivotInput, node_logger: NodeLogger = None) ->
|
|
1334
|
+
def do_pivot(self, pivot_input: transform_schemas.PivotInput, node_logger: NodeLogger = None) -> FlowDataEngine:
|
|
1332
1335
|
"""Converts the DataFrame from a long to a wide format, aggregating values.
|
|
1333
1336
|
|
|
1334
1337
|
Args:
|
|
@@ -1399,7 +1402,7 @@ class FlowDataEngine:
|
|
|
1399
1402
|
|
|
1400
1403
|
return FlowDataEngine(df, calculate_schema_stats=False)
|
|
1401
1404
|
|
|
1402
|
-
def do_filter(self, predicate: str) ->
|
|
1405
|
+
def do_filter(self, predicate: str) -> FlowDataEngine:
|
|
1403
1406
|
"""Filters rows based on a predicate expression.
|
|
1404
1407
|
|
|
1405
1408
|
Args:
|
|
@@ -1418,7 +1421,7 @@ class FlowDataEngine:
|
|
|
1418
1421
|
df = self.data_frame.filter(f)
|
|
1419
1422
|
return FlowDataEngine(df, schema=self.schema, streamable=self._streamable)
|
|
1420
1423
|
|
|
1421
|
-
def add_record_id(self, record_id_settings: transform_schemas.RecordIdInput) ->
|
|
1424
|
+
def add_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> FlowDataEngine:
|
|
1422
1425
|
"""Adds a record ID (row number) column to the DataFrame.
|
|
1423
1426
|
|
|
1424
1427
|
Can generate a simple sequential ID or a grouped ID that resets for
|
|
@@ -1435,7 +1438,7 @@ class FlowDataEngine:
|
|
|
1435
1438
|
return self._add_grouped_record_id(record_id_settings)
|
|
1436
1439
|
return self._add_simple_record_id(record_id_settings)
|
|
1437
1440
|
|
|
1438
|
-
def _add_grouped_record_id(self, record_id_settings: transform_schemas.RecordIdInput) ->
|
|
1441
|
+
def _add_grouped_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> FlowDataEngine:
|
|
1439
1442
|
"""Adds a record ID column with grouping."""
|
|
1440
1443
|
select_cols = [pl.col(record_id_settings.output_column_name)] + [pl.col(c) for c in self.columns]
|
|
1441
1444
|
|
|
@@ -1456,7 +1459,7 @@ class FlowDataEngine:
|
|
|
1456
1459
|
|
|
1457
1460
|
return FlowDataEngine(df, schema=output_schema)
|
|
1458
1461
|
|
|
1459
|
-
def _add_simple_record_id(self, record_id_settings: transform_schemas.RecordIdInput) ->
|
|
1462
|
+
def _add_simple_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> FlowDataEngine:
|
|
1460
1463
|
"""Adds a simple sequential record ID column."""
|
|
1461
1464
|
df = self.data_frame.with_row_index(record_id_settings.output_column_name, record_id_settings.offset)
|
|
1462
1465
|
|
|
@@ -1494,7 +1497,7 @@ class FlowDataEngine:
|
|
|
1494
1497
|
"""Returns a string representation of the FlowDataEngine."""
|
|
1495
1498
|
return f"flow data engine\n{self.data_frame.__repr__()}"
|
|
1496
1499
|
|
|
1497
|
-
def __call__(self) ->
|
|
1500
|
+
def __call__(self) -> FlowDataEngine:
|
|
1498
1501
|
"""Makes the class instance callable, returning itself."""
|
|
1499
1502
|
return self
|
|
1500
1503
|
|
|
@@ -1502,7 +1505,7 @@ class FlowDataEngine:
|
|
|
1502
1505
|
"""Returns the number of records in the table."""
|
|
1503
1506
|
return self.number_of_records if self.number_of_records >= 0 else self.get_number_of_records()
|
|
1504
1507
|
|
|
1505
|
-
def cache(self) ->
|
|
1508
|
+
def cache(self) -> FlowDataEngine:
|
|
1506
1509
|
"""Caches the current DataFrame to disk and updates the internal reference.
|
|
1507
1510
|
|
|
1508
1511
|
This triggers a background process to write the current LazyFrame's result
|
|
@@ -1557,7 +1560,7 @@ class FlowDataEngine:
|
|
|
1557
1560
|
df = self.collect()
|
|
1558
1561
|
return df.to_dicts()
|
|
1559
1562
|
|
|
1560
|
-
def __get_sample__(self, n_rows: int = 100, streamable: bool = True) ->
|
|
1563
|
+
def __get_sample__(self, n_rows: int = 100, streamable: bool = True) -> FlowDataEngine:
|
|
1561
1564
|
"""Internal method to get a sample of the data."""
|
|
1562
1565
|
if not self.lazy:
|
|
1563
1566
|
df = self.data_frame.lazy()
|
|
@@ -1581,7 +1584,7 @@ class FlowDataEngine:
|
|
|
1581
1584
|
shuffle: bool = False,
|
|
1582
1585
|
seed: int = None,
|
|
1583
1586
|
execution_location: ExecutionLocationsLiteral | None = None,
|
|
1584
|
-
) ->
|
|
1587
|
+
) -> FlowDataEngine:
|
|
1585
1588
|
"""Gets a sample of rows from the DataFrame.
|
|
1586
1589
|
|
|
1587
1590
|
Args:
|
|
@@ -1620,7 +1623,7 @@ class FlowDataEngine:
|
|
|
1620
1623
|
|
|
1621
1624
|
return FlowDataEngine(sample_df, schema=self.schema)
|
|
1622
1625
|
|
|
1623
|
-
def get_subset(self, n_rows: int = 100) ->
|
|
1626
|
+
def get_subset(self, n_rows: int = 100) -> FlowDataEngine:
|
|
1624
1627
|
"""Gets the first `n_rows` from the DataFrame.
|
|
1625
1628
|
|
|
1626
1629
|
Args:
|
|
@@ -1636,7 +1639,7 @@ class FlowDataEngine:
|
|
|
1636
1639
|
|
|
1637
1640
|
def iter_batches(
|
|
1638
1641
|
self, batch_size: int = 1000, columns: list | tuple | str = None
|
|
1639
|
-
) -> Generator[
|
|
1642
|
+
) -> Generator[FlowDataEngine, None, None]:
|
|
1640
1643
|
"""Iterates over the DataFrame in batches.
|
|
1641
1644
|
|
|
1642
1645
|
Args:
|
|
@@ -1657,7 +1660,7 @@ class FlowDataEngine:
|
|
|
1657
1660
|
def start_fuzzy_join(
|
|
1658
1661
|
self,
|
|
1659
1662
|
fuzzy_match_input: transform_schemas.FuzzyMatchInput,
|
|
1660
|
-
other:
|
|
1663
|
+
other: FlowDataEngine,
|
|
1661
1664
|
file_ref: str,
|
|
1662
1665
|
flow_id: int = -1,
|
|
1663
1666
|
node_id: int | str = -1,
|
|
@@ -1696,7 +1699,7 @@ class FlowDataEngine:
|
|
|
1696
1699
|
def fuzzy_join_external(
|
|
1697
1700
|
self,
|
|
1698
1701
|
fuzzy_match_input: transform_schemas.FuzzyMatchInput,
|
|
1699
|
-
other:
|
|
1702
|
+
other: FlowDataEngine,
|
|
1700
1703
|
file_ref: str = None,
|
|
1701
1704
|
flow_id: int = -1,
|
|
1702
1705
|
node_id: int = -1,
|
|
@@ -1722,9 +1725,9 @@ class FlowDataEngine:
|
|
|
1722
1725
|
def fuzzy_join(
|
|
1723
1726
|
self,
|
|
1724
1727
|
fuzzy_match_input: transform_schemas.FuzzyMatchInput,
|
|
1725
|
-
other:
|
|
1728
|
+
other: FlowDataEngine,
|
|
1726
1729
|
node_logger: NodeLogger = None,
|
|
1727
|
-
) ->
|
|
1730
|
+
) -> FlowDataEngine:
|
|
1728
1731
|
fuzzy_match_input_manager = transform_schemas.FuzzyMatchInputManager(fuzzy_match_input)
|
|
1729
1732
|
left_df, right_df = prepare_for_fuzzy_match(
|
|
1730
1733
|
left=self, right=other, fuzzy_match_input_manager=fuzzy_match_input_manager
|
|
@@ -1741,8 +1744,8 @@ class FlowDataEngine:
|
|
|
1741
1744
|
cross_join_input: transform_schemas.CrossJoinInput,
|
|
1742
1745
|
auto_generate_selection: bool,
|
|
1743
1746
|
verify_integrity: bool,
|
|
1744
|
-
other:
|
|
1745
|
-
) ->
|
|
1747
|
+
other: FlowDataEngine,
|
|
1748
|
+
) -> FlowDataEngine:
|
|
1746
1749
|
"""Performs a cross join with another DataFrame.
|
|
1747
1750
|
|
|
1748
1751
|
A cross join produces the Cartesian product of the two DataFrames.
|
|
@@ -1796,8 +1799,8 @@ class FlowDataEngine:
|
|
|
1796
1799
|
join_input: transform_schemas.JoinInput,
|
|
1797
1800
|
auto_generate_selection: bool,
|
|
1798
1801
|
verify_integrity: bool,
|
|
1799
|
-
other:
|
|
1800
|
-
) ->
|
|
1802
|
+
other: FlowDataEngine,
|
|
1803
|
+
) -> FlowDataEngine:
|
|
1801
1804
|
"""Performs a standard SQL-style join with another DataFrame."""
|
|
1802
1805
|
# Create manager from input
|
|
1803
1806
|
join_manager = transform_schemas.JoinInputManager(join_input)
|
|
@@ -1864,7 +1867,7 @@ class FlowDataEngine:
|
|
|
1864
1867
|
|
|
1865
1868
|
return FlowDataEngine(joined_df, calculate_schema_stats=False, number_of_records=0, streamable=False)
|
|
1866
1869
|
|
|
1867
|
-
def solve_graph(self, graph_solver_input: transform_schemas.GraphSolverInput) ->
|
|
1870
|
+
def solve_graph(self, graph_solver_input: transform_schemas.GraphSolverInput) -> FlowDataEngine:
|
|
1868
1871
|
"""Solves a graph problem represented by 'from' and 'to' columns.
|
|
1869
1872
|
|
|
1870
1873
|
This is used for operations like finding connected components in a graph.
|
|
@@ -1883,7 +1886,7 @@ class FlowDataEngine:
|
|
|
1883
1886
|
)
|
|
1884
1887
|
return FlowDataEngine(lf)
|
|
1885
1888
|
|
|
1886
|
-
def add_new_values(self, values: Iterable, col_name: str = None) ->
|
|
1889
|
+
def add_new_values(self, values: Iterable, col_name: str = None) -> FlowDataEngine:
|
|
1887
1890
|
"""Adds a new column with the provided values.
|
|
1888
1891
|
|
|
1889
1892
|
Args:
|
|
@@ -1897,7 +1900,7 @@ class FlowDataEngine:
|
|
|
1897
1900
|
col_name = "new_values"
|
|
1898
1901
|
return FlowDataEngine(self.data_frame.with_columns(pl.Series(values).alias(col_name)))
|
|
1899
1902
|
|
|
1900
|
-
def get_record_count(self) ->
|
|
1903
|
+
def get_record_count(self) -> FlowDataEngine:
|
|
1901
1904
|
"""Returns a new FlowDataEngine with a single column 'number_of_records'
|
|
1902
1905
|
containing the total number of records.
|
|
1903
1906
|
|
|
@@ -1906,7 +1909,7 @@ class FlowDataEngine:
|
|
|
1906
1909
|
"""
|
|
1907
1910
|
return FlowDataEngine(self.data_frame.select(pl.len().alias("number_of_records")))
|
|
1908
1911
|
|
|
1909
|
-
def assert_equal(self, other:
|
|
1912
|
+
def assert_equal(self, other: FlowDataEngine, ordered: bool = True, strict_schema: bool = False):
|
|
1910
1913
|
"""Asserts that this DataFrame is equal to another.
|
|
1911
1914
|
|
|
1912
1915
|
Useful for testing.
|
|
@@ -2075,7 +2078,7 @@ class FlowDataEngine:
|
|
|
2075
2078
|
[transform_schemas.SelectInput(old_name=c.name, data_type=c.data_type) for c in self.schema]
|
|
2076
2079
|
)
|
|
2077
2080
|
|
|
2078
|
-
def select_columns(self, list_select: list[str] | tuple[str] | str) ->
|
|
2081
|
+
def select_columns(self, list_select: list[str] | tuple[str] | str) -> FlowDataEngine:
|
|
2079
2082
|
"""Selects a subset of columns from the DataFrame.
|
|
2080
2083
|
|
|
2081
2084
|
Args:
|
|
@@ -2098,7 +2101,7 @@ class FlowDataEngine:
|
|
|
2098
2101
|
streamable=self._streamable,
|
|
2099
2102
|
)
|
|
2100
2103
|
|
|
2101
|
-
def drop_columns(self, columns: list[str]) ->
|
|
2104
|
+
def drop_columns(self, columns: list[str]) -> FlowDataEngine:
|
|
2102
2105
|
"""Drops specified columns from the DataFrame.
|
|
2103
2106
|
|
|
2104
2107
|
Args:
|
|
@@ -2115,7 +2118,7 @@ class FlowDataEngine:
|
|
|
2115
2118
|
self.data_frame.select(cols_for_select), number_of_records=self.number_of_records, schema=new_schema
|
|
2116
2119
|
)
|
|
2117
2120
|
|
|
2118
|
-
def reorganize_order(self, column_order: list[str]) ->
|
|
2121
|
+
def reorganize_order(self, column_order: list[str]) -> FlowDataEngine:
|
|
2119
2122
|
"""Reorganizes columns into a specified order.
|
|
2120
2123
|
|
|
2121
2124
|
Args:
|
|
@@ -2130,7 +2133,7 @@ class FlowDataEngine:
|
|
|
2130
2133
|
|
|
2131
2134
|
def apply_flowfile_formula(
|
|
2132
2135
|
self, func: str, col_name: str, output_data_type: pl.DataType = None
|
|
2133
|
-
) ->
|
|
2136
|
+
) -> FlowDataEngine:
|
|
2134
2137
|
"""Applies a formula to create a new column or transform an existing one.
|
|
2135
2138
|
|
|
2136
2139
|
Args:
|
|
@@ -2149,7 +2152,7 @@ class FlowDataEngine:
|
|
|
2149
2152
|
|
|
2150
2153
|
return FlowDataEngine(df2, number_of_records=self.number_of_records)
|
|
2151
2154
|
|
|
2152
|
-
def apply_sql_formula(self, func: str, col_name: str, output_data_type: pl.DataType = None) ->
|
|
2155
|
+
def apply_sql_formula(self, func: str, col_name: str, output_data_type: pl.DataType = None) -> FlowDataEngine:
|
|
2153
2156
|
"""Applies an SQL-style formula using `pl.sql_expr`.
|
|
2154
2157
|
|
|
2155
2158
|
Args:
|
|
@@ -2170,7 +2173,7 @@ class FlowDataEngine:
|
|
|
2170
2173
|
|
|
2171
2174
|
def output(
|
|
2172
2175
|
self, output_fs: input_schema.OutputSettings, flow_id: int, node_id: int | str, execute_remote: bool = True
|
|
2173
|
-
) ->
|
|
2176
|
+
) -> FlowDataEngine:
|
|
2174
2177
|
"""Writes the DataFrame to an output file.
|
|
2175
2178
|
|
|
2176
2179
|
Can execute the write operation locally or in a remote worker process.
|
|
@@ -2214,7 +2217,7 @@ class FlowDataEngine:
|
|
|
2214
2217
|
logger.info("Finished writing output")
|
|
2215
2218
|
return self
|
|
2216
2219
|
|
|
2217
|
-
def make_unique(self, unique_input: transform_schemas.UniqueInput = None) ->
|
|
2220
|
+
def make_unique(self, unique_input: transform_schemas.UniqueInput = None) -> FlowDataEngine:
|
|
2218
2221
|
"""Gets the unique rows from the DataFrame.
|
|
2219
2222
|
|
|
2220
2223
|
Args:
|
|
@@ -2228,7 +2231,7 @@ class FlowDataEngine:
|
|
|
2228
2231
|
return FlowDataEngine(self.data_frame.unique())
|
|
2229
2232
|
return FlowDataEngine(self.data_frame.unique(unique_input.columns, keep=unique_input.strategy))
|
|
2230
2233
|
|
|
2231
|
-
def concat(self, other: Iterable[
|
|
2234
|
+
def concat(self, other: Iterable[FlowDataEngine] | FlowDataEngine) -> FlowDataEngine:
|
|
2232
2235
|
"""Concatenates this DataFrame with one or more other DataFrames.
|
|
2233
2236
|
|
|
2234
2237
|
Args:
|
|
@@ -2243,7 +2246,7 @@ class FlowDataEngine:
|
|
|
2243
2246
|
dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [self.data_frame] + [flt.data_frame for flt in other]
|
|
2244
2247
|
return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
|
|
2245
2248
|
|
|
2246
|
-
def do_select(self, select_inputs: transform_schemas.SelectInputs, keep_missing: bool = True) ->
|
|
2249
|
+
def do_select(self, select_inputs: transform_schemas.SelectInputs, keep_missing: bool = True) -> FlowDataEngine:
|
|
2247
2250
|
"""Performs a complex column selection, renaming, and reordering operation.
|
|
2248
2251
|
|
|
2249
2252
|
Args:
|
|
@@ -2256,7 +2259,6 @@ class FlowDataEngine:
|
|
|
2256
2259
|
"""
|
|
2257
2260
|
new_schema = deepcopy(self.schema)
|
|
2258
2261
|
renames = [r for r in select_inputs.renames if r.is_available]
|
|
2259
|
-
|
|
2260
2262
|
if not keep_missing:
|
|
2261
2263
|
drop_cols = set(self.data_frame.collect_schema().names()) - set(r.old_name for r in renames).union(
|
|
2262
2264
|
set(r.old_name for r in renames if not r.keep)
|
|
@@ -2322,7 +2324,7 @@ class FlowDataEngine:
|
|
|
2322
2324
|
return cls(external_fetcher.get_result())
|
|
2323
2325
|
|
|
2324
2326
|
|
|
2325
|
-
def execute_polars_code(*flowfile_tables:
|
|
2327
|
+
def execute_polars_code(*flowfile_tables: FlowDataEngine, code: str) -> FlowDataEngine:
|
|
2326
2328
|
"""Executes arbitrary Polars code on one or more FlowDataEngine objects.
|
|
2327
2329
|
|
|
2328
2330
|
This function takes a string of Python code that uses Polars and executes it.
|
|
@@ -5,32 +5,6 @@ import polars as pl
|
|
|
5
5
|
DataTypeGroup = Literal["numeric", "string", "datetime", "boolean", "binary", "complex", "unknown"]
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def convert_pl_type_to_string(pl_type: pl.DataType
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
return f"pl.List({inner_str})"
|
|
12
|
-
elif isinstance(pl_type, pl.Array):
|
|
13
|
-
inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
|
|
14
|
-
return f"pl.Array({inner_str})"
|
|
15
|
-
elif isinstance(pl_type, pl.Decimal):
|
|
16
|
-
precision = pl_type.precision if hasattr(pl_type, "precision") else None
|
|
17
|
-
scale = pl_type.scale if hasattr(pl_type, "scale") else None
|
|
18
|
-
if precision is not None and scale is not None:
|
|
19
|
-
return f"pl.Decimal({precision}, {scale})"
|
|
20
|
-
elif precision is not None:
|
|
21
|
-
return f"pl.Decimal({precision})"
|
|
22
|
-
else:
|
|
23
|
-
return "pl.Decimal()"
|
|
24
|
-
elif isinstance(pl_type, pl.Struct):
|
|
25
|
-
# Handle Struct with field definitions
|
|
26
|
-
fields = []
|
|
27
|
-
if hasattr(pl_type, "fields"):
|
|
28
|
-
for field in pl_type.fields:
|
|
29
|
-
field_name = field.name
|
|
30
|
-
field_type = convert_pl_type_to_string(field.dtype, inner=True)
|
|
31
|
-
fields.append(f'pl.Field("{field_name}", {field_type})')
|
|
32
|
-
field_str = ", ".join(fields)
|
|
33
|
-
return f"pl.Struct([{field_str}])"
|
|
34
|
-
else:
|
|
35
|
-
# For base types, we want the full pl.TypeName format
|
|
36
|
-
return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
|
|
8
|
+
def convert_pl_type_to_string(pl_type: pl.DataType) -> str:
|
|
9
|
+
"""Convert a Polars DataType to its string representation."""
|
|
10
|
+
return str(pl_type)
|
|
@@ -21,20 +21,51 @@ dtype_to_pl = {
|
|
|
21
21
|
def safe_eval_pl_type(type_string: str):
|
|
22
22
|
"""
|
|
23
23
|
Safely evaluate a Polars type string with restricted namespace.
|
|
24
|
-
|
|
24
|
+
Supports both formats:
|
|
25
|
+
- With pl. prefix: pl.List(pl.Int64)
|
|
26
|
+
- Without pl. prefix: List(Int64)
|
|
25
27
|
"""
|
|
26
28
|
# Define allowed names in the evaluation namespace
|
|
27
29
|
safe_dict = {
|
|
28
|
-
#
|
|
30
|
+
# Keep pl module for backwards compatibility with pl.X format
|
|
29
31
|
"pl": pl,
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
|
|
32
|
+
|
|
33
|
+
# Polars types directly available (without pl. prefix)
|
|
34
|
+
"List": pl.List,
|
|
35
|
+
"Array": pl.Array,
|
|
36
|
+
"Struct": pl.Struct,
|
|
37
|
+
"Field": pl.Field,
|
|
38
|
+
"Decimal": pl.Decimal,
|
|
39
|
+
|
|
40
|
+
# Integer types
|
|
41
|
+
"Int8": pl.Int8,
|
|
42
|
+
"Int16": pl.Int16,
|
|
43
|
+
"Int32": pl.Int32,
|
|
44
|
+
"Int64": pl.Int64,
|
|
45
|
+
"Int128": pl.Int128,
|
|
46
|
+
"UInt8": pl.UInt8,
|
|
47
|
+
"UInt16": pl.UInt16,
|
|
48
|
+
"UInt32": pl.UInt32,
|
|
49
|
+
"UInt64": pl.UInt64,
|
|
50
|
+
|
|
51
|
+
# Float types
|
|
52
|
+
"Float32": pl.Float32,
|
|
53
|
+
"Float64": pl.Float64,
|
|
54
|
+
|
|
55
|
+
# Other types
|
|
56
|
+
"Boolean": pl.Boolean,
|
|
57
|
+
"String": pl.String,
|
|
58
|
+
"Utf8": pl.Utf8,
|
|
59
|
+
"Binary": pl.Binary,
|
|
60
|
+
"Date": pl.Date,
|
|
61
|
+
"Time": pl.Time,
|
|
62
|
+
"Datetime": pl.Datetime,
|
|
63
|
+
"Duration": pl.Duration,
|
|
64
|
+
"Categorical": pl.Categorical,
|
|
65
|
+
"Enum": pl.Enum,
|
|
66
|
+
"Null": pl.Null,
|
|
67
|
+
"Object": pl.Object,
|
|
68
|
+
|
|
38
69
|
# Disable dangerous built-ins
|
|
39
70
|
"__builtins__": {},
|
|
40
71
|
}
|
|
@@ -57,10 +88,10 @@ def get_polars_type(dtype: str):
|
|
|
57
88
|
pl_datetype = dtype_to_pl.get(dtype.lower())
|
|
58
89
|
if pl_datetype is not None:
|
|
59
90
|
return pl_datetype
|
|
60
|
-
|
|
61
|
-
return
|
|
62
|
-
|
|
63
|
-
return pl.String
|
|
91
|
+
try:
|
|
92
|
+
return safe_eval_pl_type(dtype)
|
|
93
|
+
except Exception:
|
|
94
|
+
return pl.String # Fallback to String if evaluation fails
|
|
64
95
|
|
|
65
96
|
|
|
66
97
|
def cast_str_to_polars_type(dtype: str) -> pl.DataType:
|
|
@@ -1,13 +1,30 @@
|
|
|
1
|
-
from
|
|
1
|
+
from base64 import b64decode, b64encode
|
|
2
|
+
from typing import Annotated, Any, Literal
|
|
2
3
|
|
|
3
4
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
4
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel, BeforeValidator, PlainSerializer
|
|
5
6
|
|
|
6
7
|
OperationType = Literal["store", "calculate_schema", "calculate_number_of_records", "write_output", "store_sample"]
|
|
7
8
|
|
|
8
9
|
|
|
10
|
+
# Custom type for bytes that serializes to/from base64 string in JSON
|
|
11
|
+
def _decode_bytes(v: Any) -> bytes:
|
|
12
|
+
if isinstance(v, bytes):
|
|
13
|
+
return v
|
|
14
|
+
if isinstance(v, str):
|
|
15
|
+
return b64decode(v)
|
|
16
|
+
raise ValueError(f"Expected bytes or base64 string, got {type(v)}")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
Base64Bytes = Annotated[
|
|
20
|
+
bytes,
|
|
21
|
+
BeforeValidator(_decode_bytes),
|
|
22
|
+
PlainSerializer(lambda x: b64encode(x).decode('ascii'), return_type=str),
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
9
26
|
class PolarsOperation(BaseModel):
|
|
10
|
-
operation:
|
|
27
|
+
operation: Base64Bytes # Automatically encodes/decodes base64 for JSON
|
|
11
28
|
|
|
12
29
|
|
|
13
30
|
class PolarsScript(PolarsOperation):
|