Flowfile 0.5.6__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. flowfile/api.py +8 -6
  2. flowfile/web/static/assets/{AdminView-c2c7942b.js → AdminView-C4K1DdHI.js} +28 -33
  3. flowfile/web/static/assets/{CloudConnectionView-7a3042c6.js → CloudConnectionView-BZbPvPUL.js} +39 -50
  4. flowfile/web/static/assets/{CloudStorageReader-24c54524.css → CloudStorageReader-BDByiqPI.css} +25 -25
  5. flowfile/web/static/assets/{CloudStorageReader-709c4037.js → CloudStorageReader-DLVukNJ7.js} +30 -35
  6. flowfile/web/static/assets/{CloudStorageWriter-604c51a8.js → CloudStorageWriter-Bfi-C1QW.js} +32 -37
  7. flowfile/web/static/assets/{CloudStorageWriter-60547855.css → CloudStorageWriter-y8jL8yjG.css} +24 -24
  8. flowfile/web/static/assets/{ColumnActionInput-d63d6746.js → ColumnActionInput-BpiCApw9.js} +7 -12
  9. flowfile/web/static/assets/{ColumnSelector-0c8cd1cd.js → ColumnSelector-CEAwedI7.js} +1 -2
  10. flowfile/web/static/assets/ContextMenu-CdojQu0w.js +9 -0
  11. flowfile/web/static/assets/ContextMenu-D12mhsy1.js +9 -0
  12. flowfile/web/static/assets/ContextMenu-EWUR98va.js +9 -0
  13. flowfile/web/static/assets/{ContextMenu.vue_vue_type_script_setup_true_lang-774c517c.js → ContextMenu.vue_vue_type_script_setup_true_lang-I4rXXd6G.js} +4 -5
  14. flowfile/web/static/assets/{CrossJoin-38e5b99a.js → CrossJoin-BOFfxkJO.js} +19 -18
  15. flowfile/web/static/assets/{CrossJoin-71b4cc10.css → CrossJoin-Cmbyt9im.css} +18 -18
  16. flowfile/web/static/assets/{CustomNode-76e8f3f5.js → CustomNode-Bhpezobq.js} +12 -17
  17. flowfile/web/static/assets/{DatabaseConnectionSettings-38155669.js → DatabaseConnectionSettings-Dw3bSJKB.js} +10 -11
  18. flowfile/web/static/assets/{DatabaseReader-5bf8c75b.css → DatabaseReader-D6pUNUCs.css} +21 -21
  19. flowfile/web/static/assets/{DatabaseReader-2e549c8f.js → DatabaseReader-m87ghlw0.js} +36 -34
  20. flowfile/web/static/assets/{DatabaseView-dc877c29.js → DatabaseView-CisSAtpe.js} +30 -38
  21. flowfile/web/static/assets/{DatabaseWriter-ffb91864.js → DatabaseWriter-Bbj9JLdL.js} +33 -35
  22. flowfile/web/static/assets/{DatabaseWriter-bdcf2c8b.css → DatabaseWriter-RBqdFLj8.css} +17 -17
  23. flowfile/web/static/assets/{DesignerView-a4466dab.js → DesignerView-DemDevTQ.js} +1752 -2054
  24. flowfile/web/static/assets/{DesignerView-71d4e9a1.css → DesignerView-Dm6OzlIc.css} +209 -168
  25. flowfile/web/static/assets/{DocumentationView-979afc84.js → DocumentationView-BrC1ZR3H.js} +3 -4
  26. flowfile/web/static/assets/{ExploreData-e4b92aaf.js → ExploreData-BMKcDuRb.js} +8 -10
  27. flowfile/web/static/assets/{ExternalSource-d08e7227.js → ExternalSource-BXrNNS-f.js} +40 -42
  28. flowfile/web/static/assets/{ExternalSource-7ac7373f.css → ExternalSource-NB6WVl5R.css} +14 -14
  29. flowfile/web/static/assets/{Filter-7add806d.js → Filter-C2MjsN6P.js} +36 -33
  30. flowfile/web/static/assets/{Filter-7494ea97.css → Filter-DCMGGuGC.css} +9 -9
  31. flowfile/web/static/assets/{Formula-53d58c43.css → Formula-BYafbDj8.css} +4 -4
  32. flowfile/web/static/assets/{Formula-36ab24d2.js → Formula-ufuy4mVD.js} +27 -26
  33. flowfile/web/static/assets/{FuzzyMatch-ad6361d6.css → FuzzyMatch-BGJAwgd0.css} +42 -42
  34. flowfile/web/static/assets/{FuzzyMatch-cc01bb04.js → FuzzyMatch-BOHODq3h.js} +36 -38
  35. flowfile/web/static/assets/{GraphSolver-4fb98f3b.js → GraphSolver-B6ZzpNGO.js} +23 -21
  36. flowfile/web/static/assets/{GraphSolver-4b4d7db9.css → GraphSolver-DFN83sj3.css} +4 -4
  37. flowfile/web/static/assets/{GroupBy-b3c8f429.js → GroupBy-B9BRNcfe.js} +30 -29
  38. flowfile/web/static/assets/{Sort-4abb7fae.css → GroupBy-x4ooP5np.css} +1 -1
  39. flowfile/web/static/assets/Join-Bx_g5bZz.css +118 -0
  40. flowfile/web/static/assets/{Join-096b7b26.js → Join-DsBEy1IH.js} +48 -43
  41. flowfile/web/static/assets/{LoginView-c33a246a.js → LoginView-Ct0rhdcO.js} +1 -2
  42. flowfile/web/static/assets/{ManualInput-39111f19.css → ManualInput-DlZmtMdt.css} +48 -48
  43. flowfile/web/static/assets/{ManualInput-7307e9b1.js → ManualInput-bC4BUgnG.js} +40 -41
  44. flowfile/web/static/assets/{MultiSelect-14822c48.js → MultiSelect-DIQ8PuTC.js} +2 -2
  45. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-90c4d340.js → MultiSelect.vue_vue_type_script_setup_true_lang-BefHfqTI.js} +1 -1
  46. flowfile/web/static/assets/{NodeDesigner-5036c392.js → NodeDesigner-D39yzr2k.js} +178 -208
  47. flowfile/web/static/assets/{NodeDesigner-94cd4dd3.css → NodeDesigner-R0l6sYyY.css} +76 -76
  48. flowfile/web/static/assets/{NumericInput-15cf3b72.js → NumericInput-DMSX3oOr.js} +2 -2
  49. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-91e679d7.js → NumericInput.vue_vue_type_script_setup_true_lang-d0YlVHAl.js} +1 -1
  50. flowfile/web/static/assets/{Output-1f8ed42c.js → Output-D0VoXGcW.js} +26 -34
  51. flowfile/web/static/assets/{Output-692dd25d.css → Output-DsmglIDy.css} +5 -5
  52. flowfile/web/static/assets/{Pivot-0e153f4e.js → Pivot-BnMB4sEe.js} +26 -26
  53. flowfile/web/static/assets/{Pivot-0eda81b4.css → Pivot-qKTyWxop.css} +4 -4
  54. flowfile/web/static/assets/{PivotValidation-81ec2a33.js → PivotValidation-B2lWvugt.js} +7 -9
  55. flowfile/web/static/assets/{PivotValidation-5a4f7c79.js → PivotValidation-BPlhRjpL.js} +7 -9
  56. flowfile/web/static/assets/{PolarsCode-a39f15ac.js → PolarsCode-5h0tHnWR.js} +22 -20
  57. flowfile/web/static/assets/{PopOver-ddcfe4f6.js → PopOver-BHpt5rsj.js} +5 -9
  58. flowfile/web/static/assets/{PopOver-d96599db.css → PopOver-CyYM4-rV.css} +1 -1
  59. flowfile/web/static/assets/{Read-90f366bc.css → Read-DJxkrTb_.css} +10 -10
  60. flowfile/web/static/assets/Read-TsLEFh3B.js +227 -0
  61. flowfile/web/static/assets/{RecordCount-e9048ccd.js → RecordCount-DkVixq9v.js} +18 -17
  62. flowfile/web/static/assets/{RecordId-ad02521d.js → RecordId-C2UEGlCf.js} +42 -39
  63. flowfile/web/static/assets/{SQLQueryComponent-2eeecf0b.js → SQLQueryComponent-Dr5KMoD3.js} +2 -3
  64. flowfile/web/static/assets/{Sample-9a68c23d.js → Sample-Cb3eQNmd.js} +30 -30
  65. flowfile/web/static/assets/{SecretSelector-2429f35a.js → SecretSelector-De2L2bSx.js} +3 -4
  66. flowfile/web/static/assets/{SecretsView-c6afc915.js → SecretsView-CheC9BPV.js} +13 -16
  67. flowfile/web/static/assets/{Select-fcd002b6.js → Select-CI8TloRs.js} +41 -36
  68. flowfile/web/static/assets/{SettingsSection-5ce15962.js → SettingsSection-B39ulIiI.js} +1 -2
  69. flowfile/web/static/assets/{SettingsSection-c6b1362c.js → SettingsSection-BiCc7S9h.js} +1 -2
  70. flowfile/web/static/assets/{SettingsSection-cebb91d5.js → SettingsSection-CITK_R7o.js} +2 -3
  71. flowfile/web/static/assets/{SettingsSection-26fe48d4.css → SettingsSection-D2GgY-Aq.css} +4 -4
  72. flowfile/web/static/assets/{SetupView-2d12e01f.js → SetupView-C1aXRDvp.js} +1 -2
  73. flowfile/web/static/assets/{SingleSelect-b67de4eb.js → SingleSelect-Kr_hz90m.js} +2 -2
  74. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-eedb70eb.js → SingleSelect.vue_vue_type_script_setup_true_lang-Rxht5Z5N.js} +1 -1
  75. flowfile/web/static/assets/{SliderInput-fd8134ac.js → SliderInput-CLqpCxCb.js} +1 -2
  76. flowfile/web/static/assets/{GroupBy-5792782d.css → Sort-BIt2kc_p.css} +1 -1
  77. flowfile/web/static/assets/{Sort-c005a573.js → Sort-Dnw_J6Qi.js} +25 -25
  78. flowfile/web/static/assets/{TextInput-1bb31dab.js → TextInput-wdlunIZC.js} +2 -2
  79. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-a51fe730.js → TextInput.vue_vue_type_script_setup_true_lang-Bcj3ywzv.js} +1 -1
  80. flowfile/web/static/assets/{TextToRows-4f363753.js → TextToRows-BhtyGWPq.js} +42 -49
  81. flowfile/web/static/assets/{TextToRows-12afb4f4.css → TextToRows-DivDOLDx.css} +9 -9
  82. flowfile/web/static/assets/{ToggleSwitch-ca0f2e5e.js → ToggleSwitch-B-6WzfFf.js} +2 -2
  83. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-49aa41d8.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-Cj8LqT-b.js} +1 -1
  84. flowfile/web/static/assets/{UnavailableFields-f6147968.js → UnavailableFields-Yf6XSqFB.js} +2 -3
  85. flowfile/web/static/assets/{Union-c65f17b7.js → Union-CwpjeKYC.js} +20 -23
  86. flowfile/web/static/assets/{Unpivot-b6ad6427.css → Union-DQJcpp3-.css} +6 -6
  87. flowfile/web/static/assets/{Unique-a1d96fb2.js → Unique-25v3urqH.js} +75 -74
  88. flowfile/web/static/assets/{Union-d6a8d7d5.css → Unpivot-Deqh1gtI.css} +6 -6
  89. flowfile/web/static/assets/{Unpivot-c2657ff3.js → Unpivot-sYcTTXrq.js} +34 -27
  90. flowfile/web/static/assets/{UnpivotValidation-28e29a3b.js → UnpivotValidation-C5DDEKY2.js} +5 -7
  91. flowfile/web/static/assets/VueGraphicWalker-B8l1_Z92.js +131 -0
  92. flowfile/web/static/assets/VueGraphicWalker-Da_1-3me.css +21 -0
  93. flowfile/web/static/assets/{api-df48ec50.js → api-C0LvF-0C.js} +1 -1
  94. flowfile/web/static/assets/{api-ee542cf7.js → api-DaC83EO_.js} +1 -1
  95. flowfile/web/static/assets/client-C8Ygr6Gb.js +42 -0
  96. flowfile/web/static/assets/{dropDown-7576a76a.js → dropDown-D5YXaPRR.js} +7 -12
  97. flowfile/web/static/assets/{fullEditor-7583bef5.js → fullEditor-BVYnWm05.js} +300 -18
  98. flowfile/web/static/assets/genericNodeSettings-2wAu-QKn.css +75 -0
  99. flowfile/web/static/assets/genericNodeSettings-BBtW_Cpz.js +590 -0
  100. flowfile/web/static/assets/{VueGraphicWalker-2fc3ddd4.js → graphic-walker.es-VrK6vdGE.js} +92305 -89751
  101. flowfile/web/static/assets/index-BCJxPfM5.js +6693 -0
  102. flowfile/web/static/assets/{index-057d770d.js → index-CHPMUR0d.js} +150 -170
  103. flowfile/web/static/assets/index-DPkoZWq8.js +32 -0
  104. flowfile/web/static/assets/index-DnW_KC_I.js +277 -0
  105. flowfile/web/static/assets/index-UFXyfirV.css +10797 -0
  106. flowfile/web/static/assets/index-bcuE0Z0p.js +87456 -0
  107. flowfile/web/static/assets/{node.types-2c15bb7e.js → node.types-Dl4gtSW9.js} +2 -2
  108. flowfile/web/static/assets/{outputCsv-c492b15e.js → outputCsv-BELuBiJZ.js} +1 -2
  109. flowfile/web/static/assets/outputCsv-CdGkv-fN.css +2581 -0
  110. flowfile/web/static/assets/{outputExcel-13bfa10f.js → outputExcel-D0TTNM79.js} +1 -2
  111. flowfile/web/static/assets/{outputParquet-9be1523a.js → outputParquet-Cz9EbRHj.js} +1 -2
  112. flowfile/web/static/assets/{readCsv-5a49a8c9.js → readCsv-7bd3kUMI.js} +1 -2
  113. flowfile/web/static/assets/{readExcel-27c30ad8.js → readExcel-Cq8CCwIv.js} +3 -4
  114. flowfile/web/static/assets/{readParquet-c5244ad5.css → readParquet-CRDmBrsp.css} +4 -4
  115. flowfile/web/static/assets/{readParquet-446bde68.js → readParquet-DjR4mRaj.js} +4 -5
  116. flowfile/web/static/assets/{secrets.api-34431884.js → secrets.api-C9o2KE5V.js} +1 -1
  117. flowfile/web/static/assets/{selectDynamic-5754a2b1.js → selectDynamic-Bl5FVsME.js} +5 -7
  118. flowfile/web/static/assets/useNodeSettings-dMS9zmh_.js +69 -0
  119. flowfile/web/static/assets/{vue-codemirror.esm-8f46fb36.js → vue-codemirror.esm-CwaYwln0.js} +3469 -3064
  120. flowfile/web/static/assets/{vue-content-loader.es-808fe33a.js → vue-content-loader.es-CMoRXo7N.js} +3 -3
  121. flowfile/web/static/index.html +2 -3
  122. {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/METADATA +2 -1
  123. flowfile-0.6.1.dist-info/RECORD +417 -0
  124. {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/WHEEL +1 -1
  125. flowfile_core/auth/password.py +1 -0
  126. flowfile_core/database/init_db.py +7 -5
  127. flowfile_core/fileExplorer/funcs.py +2 -2
  128. flowfile_core/flowfile/code_generator/code_generator.py +13 -11
  129. flowfile_core/flowfile/filter_expressions.py +327 -0
  130. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +61 -59
  131. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +3 -29
  132. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +45 -14
  133. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +20 -3
  134. flowfile_core/flowfile/flow_data_engine/subprocess_operations/streaming.py +206 -0
  135. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +146 -24
  136. flowfile_core/flowfile/flow_graph.py +504 -190
  137. flowfile_core/flowfile/flow_node/__init__.py +32 -0
  138. flowfile_core/flowfile/flow_node/executor.py +404 -0
  139. flowfile_core/flowfile/flow_node/flow_node.py +207 -106
  140. flowfile_core/flowfile/flow_node/models.py +40 -0
  141. flowfile_core/flowfile/flow_node/output_field_config_applier.py +217 -0
  142. flowfile_core/flowfile/flow_node/schema_utils.py +78 -0
  143. flowfile_core/flowfile/flow_node/state.py +155 -0
  144. flowfile_core/flowfile/history_manager.py +401 -0
  145. flowfile_core/flowfile/manage/compatibility_enhancements.py +9 -0
  146. flowfile_core/flowfile/manage/io_flowfile.py +3 -1
  147. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +20 -4
  148. flowfile_core/flowfile/util/execution_orderer.py +89 -36
  149. flowfile_core/routes/auth.py +8 -9
  150. flowfile_core/routes/routes.py +320 -101
  151. flowfile_core/routes/user_defined_components.py +18 -16
  152. flowfile_core/schemas/history_schema.py +220 -0
  153. flowfile_core/schemas/input_schema.py +130 -6
  154. flowfile_core/schemas/schemas.py +9 -0
  155. flowfile_core/schemas/transform_schema.py +27 -5
  156. flowfile_core/schemas/yaml_types.py +23 -5
  157. flowfile_frame/adding_expr.py +18 -126
  158. flowfile_frame/callable_utils.py +261 -0
  159. flowfile_frame/database/connection_manager.py +0 -1
  160. flowfile_frame/expr.py +8 -4
  161. flowfile_frame/flow_frame.py +41 -41
  162. flowfile_frame/lazy.py +3 -12
  163. flowfile_frame/lazy_methods.py +5 -64
  164. flowfile_frame/utils.py +13 -32
  165. flowfile_worker/funcs.py +6 -4
  166. flowfile_worker/main.py +2 -0
  167. flowfile_worker/models.py +31 -11
  168. flowfile_worker/routes.py +60 -35
  169. flowfile_worker/spawner.py +7 -1
  170. flowfile_worker/streaming.py +335 -0
  171. flowfile/web/static/assets/ContextMenu-366bf1b4.js +0 -9
  172. flowfile/web/static/assets/ContextMenu-85cf5b44.js +0 -9
  173. flowfile/web/static/assets/ContextMenu-9d28ae6d.js +0 -9
  174. flowfile/web/static/assets/Join-28b5e18f.css +0 -109
  175. flowfile/web/static/assets/Read-39b63932.js +0 -222
  176. flowfile/web/static/assets/VueGraphicWalker-430f0b86.css +0 -6
  177. flowfile/web/static/assets/database_reader-ce1e55f3.svg +0 -24
  178. flowfile/web/static/assets/database_writer-b4ad0753.svg +0 -23
  179. flowfile/web/static/assets/element-icons-9c88a535.woff +0 -0
  180. flowfile/web/static/assets/element-icons-de5eb258.ttf +0 -0
  181. flowfile/web/static/assets/genericNodeSettings-0155288b.js +0 -136
  182. flowfile/web/static/assets/genericNodeSettings-3b2507ea.css +0 -46
  183. flowfile/web/static/assets/index-aeec439d.js +0 -38
  184. flowfile/web/static/assets/index-ca6799de.js +0 -62760
  185. flowfile/web/static/assets/index-d60c9dd4.css +0 -10777
  186. flowfile/web/static/assets/nodeInput-d478b9ac.js +0 -2
  187. flowfile/web/static/assets/outputCsv-cc84e09f.css +0 -2499
  188. flowfile-0.5.6.dist-info/RECORD +0 -407
  189. /flowfile/web/static/assets/{AdminView-f53bad23.css → AdminView-B2Dthl3u.css} +0 -0
  190. /flowfile/web/static/assets/{CloudConnectionView-cf85f943.css → CloudConnectionView-BdFYGWV7.css} +0 -0
  191. /flowfile/web/static/assets/{ColumnActionInput-c44b7aee.css → ColumnActionInput-dCasSIC9.css} +0 -0
  192. /flowfile/web/static/assets/{ColumnSelector-371637fb.css → ColumnSelector-j6sEOjo1.css} +0 -0
  193. /flowfile/web/static/assets/{CustomNode-edb9b939.css → CustomNode-VPlajG0j.css} +0 -0
  194. /flowfile/web/static/assets/{DatabaseConnectionSettings-c20a1e16.css → DatabaseConnectionSettings-B78hXYgu.css} +0 -0
  195. /flowfile/web/static/assets/{DatabaseView-6655afd6.css → DatabaseView-B-_adk1s.css} +0 -0
  196. /flowfile/web/static/assets/{DocumentationView-9ea6e871.css → DocumentationView-CL7iipFL.css} +0 -0
  197. /flowfile/web/static/assets/{ExploreData-10c5acc8.css → ExploreData-DHjv0Plr.css} +0 -0
  198. /flowfile/web/static/assets/{LoginView-d325d632.css → LoginView-DN1BXY3e.css} +0 -0
  199. /flowfile/web/static/assets/{PivotValidation-0e905b1a.css → PivotValidation-DK-FARWe.css} +0 -0
  200. /flowfile/web/static/assets/{PivotValidation-41b57ad6.css → PivotValidation-FUa9F47u.css} +0 -0
  201. /flowfile/web/static/assets/{PolarsCode-2b1f1f23.css → PolarsCode-G-gRSrSc.css} +0 -0
  202. /flowfile/web/static/assets/{SQLQueryComponent-edb90b98.css → SQLQueryComponent-oAbWw0r-.css} +0 -0
  203. /flowfile/web/static/assets/{SecretSelector-6329f743.css → SecretSelector-CJSadIZx.css} +0 -0
  204. /flowfile/web/static/assets/{SecretsView-aa291340.css → SecretsView-DbzIRAba.css} +0 -0
  205. /flowfile/web/static/assets/{SettingsSection-8f980839.css → SettingsSection-BGcJnH6q.css} +0 -0
  206. /flowfile/web/static/assets/{SettingsSection-07fbbc39.css → SettingsSection-DDWn_EGW.css} +0 -0
  207. /flowfile/web/static/assets/{SetupView-ec26f76a.css → SetupView-CI1nd-5Z.css} +0 -0
  208. /flowfile/web/static/assets/{SliderInput-f2e4f23c.css → SliderInput-BRk-q_Dk.css} +0 -0
  209. /flowfile/web/static/assets/{UnavailableFields-394a1f78.css → UnavailableFields-DRKDImKe.css} +0 -0
  210. /flowfile/web/static/assets/{Unique-2b705521.css → Unique-Absb0aON.css} +0 -0
  211. /flowfile/web/static/assets/{UnpivotValidation-d5ca3b7b.css → UnpivotValidation-DSBkFgS-.css} +0 -0
  212. /flowfile/web/static/assets/{airbyte-292aa232.png → airbyte-W0xvIXwZ.png} +0 -0
  213. /flowfile/web/static/assets/{cloud_storage_reader-aa1415d6.png → cloud_storage_reader-3GpSCk90.png} +0 -0
  214. /flowfile/web/static/assets/{cross_join-d30c0290.png → cross_join-B0qpgYoV.png} +0 -0
  215. /flowfile/web/static/assets/{dropDown-1d6acbd9.css → dropDown-CE0VF5_P.css} +0 -0
  216. /flowfile/web/static/assets/{explore_data-8a0a2861.png → explore_data-tX6olPPL.png} +0 -0
  217. /flowfile/web/static/assets/{fa-brands-400-808443ae.ttf → fa-brands-400-D1LuMI3I.ttf} +0 -0
  218. /flowfile/web/static/assets/{fa-brands-400-d7236a19.woff2 → fa-brands-400-D_cYUPeE.woff2} +0 -0
  219. /flowfile/web/static/assets/{fa-regular-400-e3456d12.woff2 → fa-regular-400-BjRzuEpd.woff2} +0 -0
  220. /flowfile/web/static/assets/{fa-regular-400-54cf6086.ttf → fa-regular-400-DZaxPHgR.ttf} +0 -0
  221. /flowfile/web/static/assets/{fa-solid-900-aa759986.woff2 → fa-solid-900-CTAAxXor.woff2} +0 -0
  222. /flowfile/web/static/assets/{fa-solid-900-d2f05935.ttf → fa-solid-900-D0aA9rwL.ttf} +0 -0
  223. /flowfile/web/static/assets/{fa-v4compatibility-0ce9033c.woff2 → fa-v4compatibility-C9RhG_FT.woff2} +0 -0
  224. /flowfile/web/static/assets/{fa-v4compatibility-30f6abf6.ttf → fa-v4compatibility-CCth-dXg.ttf} +0 -0
  225. /flowfile/web/static/assets/{filter-d7708bda.png → filter-WRdZyUOw.png} +0 -0
  226. /flowfile/web/static/assets/{formula-eeeb1611.png → formula-CgM7uHVI.png} +0 -0
  227. /flowfile/web/static/assets/{fullEditor-fe9f7e18.css → fullEditor-CmDI7T9F.css} +0 -0
  228. /flowfile/web/static/assets/{fuzzy_match-40c161b2.png → fuzzy_match-Yon3k5Tc.png} +0 -0
  229. /flowfile/web/static/assets/{graph_solver-8b7888b8.png → graph_solver-BlMrBttD.png} +0 -0
  230. /flowfile/web/static/assets/{group_by-80561fc3.png → group_by-Gici0CSS.png} +0 -0
  231. /flowfile/web/static/assets/{input_data-ab2eb678.png → input_data-BRdGecLc.png} +0 -0
  232. /flowfile/web/static/assets/{join-349043ae.png → join-BITWRu73.png} +0 -0
  233. /flowfile/web/static/assets/{manual_input-ae98f31d.png → manual_input-CFvo_EUS.png} +0 -0
  234. /flowfile/web/static/assets/{old_join-5d0eb604.png → old_join-B9bkpPqv.png} +0 -0
  235. /flowfile/web/static/assets/{output-06ec0371.png → output-Dp7-ZpC4.png} +0 -0
  236. /flowfile/web/static/assets/{outputExcel-f5d272b2.css → outputExcel-CKgRe2iT.css} +0 -0
  237. /flowfile/web/static/assets/{outputParquet-54597c3c.css → outputParquet-d7j407cK.css} +0 -0
  238. /flowfile/web/static/assets/{pivot-9660df51.png → pivot-DSxKhNlD.png} +0 -0
  239. /flowfile/web/static/assets/{polars_code-05ce5dc6.png → polars_code-DxiztZ1c.png} +0 -0
  240. /flowfile/web/static/assets/{readCsv-3bfac4c3.css → readCsv-BG-1Jilp.css} +0 -0
  241. /flowfile/web/static/assets/{readExcel-3db6b763.css → readExcel-DBQXKPtC.css} +0 -0
  242. /flowfile/web/static/assets/{record_count-dab44eb5.png → record_count-DCeaLtpS.png} +0 -0
  243. /flowfile/web/static/assets/{record_id-0b15856b.png → record_id-FeUjyIFh.png} +0 -0
  244. /flowfile/web/static/assets/{sample-693a88b5.png → sample-DeqfRiB-.png} +0 -0
  245. /flowfile/web/static/assets/{select-b0d0437a.png → select-D4JjbdjS.png} +0 -0
  246. /flowfile/web/static/assets/{selectDynamic-f2fb394f.css → selectDynamic-CjeTPUUo.css} +0 -0
  247. /flowfile/web/static/assets/{sort-2aa579f0.png → sort-DGwUG9WS.png} +0 -0
  248. /flowfile/web/static/assets/{summarize-2a099231.png → summarize-DFaNHpfp.png} +0 -0
  249. /flowfile/web/static/assets/{text_to_rows-859b29ea.png → text_to_rows-BdiAewrN.png} +0 -0
  250. /flowfile/web/static/assets/{union-2d8609f4.png → union-DCK-LSMq.png} +0 -0
  251. /flowfile/web/static/assets/{unique-1958b98a.png → unique-CdP3zZIq.png} +0 -0
  252. /flowfile/web/static/assets/{unpivot-d3cb4b5b.png → unpivot-CHttrEt8.png} +0 -0
  253. /flowfile/web/static/assets/{user-defined-icon-0ae16c90.png → user-defined-icon-BcIp2Vzo.png} +0 -0
  254. /flowfile/web/static/assets/{view-7a0f0be1.png → view-DUSRwjvq.png} +0 -0
  255. {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/entry_points.txt +0 -0
  256. {flowfile-0.5.6.dist-info → flowfile-0.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,7 @@ from collections.abc import Callable, Generator, Iterable
7
7
  from copy import deepcopy
8
8
  from dataclasses import dataclass
9
9
  from math import ceil
10
- from typing import Any, Literal, TypeVar, Union
10
+ from typing import Any, Literal, TypeVar
11
11
 
12
12
  import polars as pl
13
13
 
@@ -37,7 +37,11 @@ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import (
37
37
  assert_if_flowfile_schema,
38
38
  convert_stats_to_column_info,
39
39
  )
40
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
40
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import (
41
+ cast_str_to_polars_type,
42
+ get_polars_type,
43
+ safe_eval_pl_type,
44
+ )
41
45
  from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.prepare_for_fuzzy_match import prepare_for_fuzzy_match
42
46
  from flowfile_core.flowfile.flow_data_engine.join import (
43
47
  get_col_name_to_delete,
@@ -171,7 +175,7 @@ class FlowDataEngine:
171
175
  name: str = None
172
176
  number_of_records: int = None
173
177
  errors: list = None
174
- _schema: list["FlowfileColumn"] | None = None
178
+ _schema: list[FlowfileColumn] | None = None
175
179
 
176
180
  # Configuration attributes
177
181
  _optimize_memory: bool = False
@@ -204,13 +208,11 @@ class FlowDataEngine:
204
208
 
205
209
  def __init__(
206
210
  self,
207
- raw_data: Union[
208
- list[dict], list[Any], dict[str, Any], "ParquetFile", pl.DataFrame, pl.LazyFrame, input_schema.RawData
209
- ] = None,
211
+ raw_data: list[dict] | list[Any] | dict[str, Any] | ParquetFile | pl.DataFrame | pl.LazyFrame | input_schema.RawData = None,
210
212
  path_ref: str = None,
211
213
  name: str = None,
212
214
  optimize_memory: bool = True,
213
- schema: list["FlowfileColumn"] | list[str] | pl.Schema = None,
215
+ schema: list[FlowfileColumn] | list[str] | pl.Schema = None,
214
216
  number_of_records: int = None,
215
217
  calculate_schema_stats: bool = False,
216
218
  streamable: bool = True,
@@ -523,7 +525,7 @@ class FlowDataEngine:
523
525
  @classmethod
524
526
  def from_cloud_storage_obj(
525
527
  cls, settings: cloud_storage_schemas.CloudStorageReadSettingsInternal
526
- ) -> "FlowDataEngine":
528
+ ) -> FlowDataEngine:
527
529
  """Creates a FlowDataEngine from an object in cloud storage.
528
530
 
529
531
  This method supports reading from various cloud storage providers like AWS S3,
@@ -607,7 +609,7 @@ class FlowDataEngine:
607
609
  storage_options: dict[str, Any],
608
610
  credential_provider: Callable | None,
609
611
  read_settings: cloud_storage_schemas.CloudStorageReadSettings,
610
- ) -> "FlowDataEngine":
612
+ ) -> FlowDataEngine:
611
613
  """Reads Iceberg table(s) from cloud storage."""
612
614
  raise NotImplementedError("Failed to read Iceberg table from cloud storage: Not yet implemented")
613
615
 
@@ -618,7 +620,7 @@ class FlowDataEngine:
618
620
  storage_options: dict[str, Any],
619
621
  credential_provider: Callable | None,
620
622
  is_directory: bool,
621
- ) -> "FlowDataEngine":
623
+ ) -> FlowDataEngine:
622
624
  """Reads Parquet file(s) from cloud storage."""
623
625
  try:
624
626
  # Use scan_parquet for lazy evaluation
@@ -656,7 +658,7 @@ class FlowDataEngine:
656
658
  storage_options: dict[str, Any],
657
659
  credential_provider: Callable | None,
658
660
  read_settings: cloud_storage_schemas.CloudStorageReadSettings,
659
- ) -> "FlowDataEngine":
661
+ ) -> FlowDataEngine:
660
662
  """Reads a Delta Lake table from cloud storage."""
661
663
  try:
662
664
  logger.info("Reading Delta file from cloud storage...")
@@ -687,7 +689,7 @@ class FlowDataEngine:
687
689
  storage_options: dict[str, Any],
688
690
  credential_provider: Callable | None,
689
691
  read_settings: cloud_storage_schemas.CloudStorageReadSettings,
690
- ) -> "FlowDataEngine":
692
+ ) -> FlowDataEngine:
691
693
  """Reads CSV file(s) from cloud storage."""
692
694
  try:
693
695
  scan_kwargs = {
@@ -730,7 +732,7 @@ class FlowDataEngine:
730
732
  storage_options: dict[str, Any],
731
733
  credential_provider: Callable | None,
732
734
  is_directory: bool,
733
- ) -> "FlowDataEngine":
735
+ ) -> FlowDataEngine:
734
736
  """Reads JSON file(s) from cloud storage."""
735
737
  try:
736
738
  if is_directory:
@@ -821,6 +823,7 @@ class FlowDataEngine:
821
823
  if self.lazy and isinstance(df, pl.DataFrame):
822
824
  raise Exception("Cannot set a non-lazy dataframe to a lazy flowfile")
823
825
  self._data_frame = df
826
+ self._schema = None
824
827
 
825
828
  @staticmethod
826
829
  def _create_schema_stats_from_pl_schema(pl_schema: pl.Schema) -> list[dict]:
@@ -968,7 +971,7 @@ class FlowDataEngine:
968
971
 
969
972
  def do_group_by(
970
973
  self, group_by_input: transform_schemas.GroupByInput, calculate_schema_stats: bool = True
971
- ) -> "FlowDataEngine":
974
+ ) -> FlowDataEngine:
972
975
  """Performs a group-by operation on the DataFrame.
973
976
 
974
977
  Args:
@@ -1008,7 +1011,7 @@ class FlowDataEngine:
1008
1011
  calculate_schema_stats=calculate_schema_stats,
1009
1012
  )
1010
1013
 
1011
- def do_sort(self, sorts: list[transform_schemas.SortByInput]) -> "FlowDataEngine":
1014
+ def do_sort(self, sorts: list[transform_schemas.SortByInput]) -> FlowDataEngine:
1012
1015
  """Sorts the DataFrame by one or more columns.
1013
1016
 
1014
1017
  Args:
@@ -1027,7 +1030,7 @@ class FlowDataEngine:
1027
1030
 
1028
1031
  def change_column_types(
1029
1032
  self, transforms: list[transform_schemas.SelectInput], calculate_schema: bool = False
1030
- ) -> "FlowDataEngine":
1033
+ ) -> FlowDataEngine:
1031
1034
  """Changes the data type of one or more columns.
1032
1035
 
1033
1036
  Args:
@@ -1040,7 +1043,7 @@ class FlowDataEngine:
1040
1043
  """
1041
1044
  dtypes = [dtype.base_type() for dtype in self.data_frame.collect_schema().dtypes()]
1042
1045
  idx_mapping = list(
1043
- (transform.old_name, self.cols_idx.get(transform.old_name), getattr(pl, transform.polars_type))
1046
+ (transform.old_name, self.cols_idx.get(transform.old_name), get_polars_type(transform.polars_type))
1044
1047
  for transform in transforms
1045
1048
  if transform.data_type is not None
1046
1049
  )
@@ -1122,7 +1125,7 @@ class FlowDataEngine:
1122
1125
  return self.data_frame.to_dict(as_series=False)
1123
1126
 
1124
1127
  @classmethod
1125
- def create_from_external_source(cls, external_source: ExternalDataSource) -> "FlowDataEngine":
1128
+ def create_from_external_source(cls, external_source: ExternalDataSource) -> FlowDataEngine:
1126
1129
  """Creates a FlowDataEngine from an external data source.
1127
1130
 
1128
1131
  Args:
@@ -1142,7 +1145,7 @@ class FlowDataEngine:
1142
1145
  return ff
1143
1146
 
1144
1147
  @classmethod
1145
- def create_from_sql(cls, sql: str, conn: Any) -> "FlowDataEngine":
1148
+ def create_from_sql(cls, sql: str, conn: Any) -> FlowDataEngine:
1146
1149
  """Creates a FlowDataEngine by executing a SQL query.
1147
1150
 
1148
1151
  Args:
@@ -1155,7 +1158,7 @@ class FlowDataEngine:
1155
1158
  return cls(pl.read_sql(sql, conn))
1156
1159
 
1157
1160
  @classmethod
1158
- def create_from_schema(cls, schema: list[FlowfileColumn]) -> "FlowDataEngine":
1161
+ def create_from_schema(cls, schema: list[FlowfileColumn]) -> FlowDataEngine:
1159
1162
  """Creates an empty FlowDataEngine from a schema definition.
1160
1163
 
1161
1164
  Args:
@@ -1172,7 +1175,7 @@ class FlowDataEngine:
1172
1175
  return cls(df, schema=schema, calculate_schema_stats=False, number_of_records=0)
1173
1176
 
1174
1177
  @classmethod
1175
- def create_from_path(cls, received_table: input_schema.ReceivedTable) -> "FlowDataEngine":
1178
+ def create_from_path(cls, received_table: input_schema.ReceivedTable) -> FlowDataEngine:
1176
1179
  """Creates a FlowDataEngine from a local file path.
1177
1180
 
1178
1181
  Supports various file types like CSV, Parquet, and Excel.
@@ -1200,7 +1203,7 @@ class FlowDataEngine:
1200
1203
  return flow_file
1201
1204
 
1202
1205
  @classmethod
1203
- def create_random(cls, number_of_records: int = 1000) -> "FlowDataEngine":
1206
+ def create_random(cls, number_of_records: int = 1000) -> FlowDataEngine:
1204
1207
  """Creates a FlowDataEngine with randomly generated data.
1205
1208
 
1206
1209
  Useful for testing and examples.
@@ -1214,7 +1217,7 @@ class FlowDataEngine:
1214
1217
  return cls(create_fake_data(number_of_records))
1215
1218
 
1216
1219
  @classmethod
1217
- def generate_enumerator(cls, length: int = 1000, output_name: str = "output_column") -> "FlowDataEngine":
1220
+ def generate_enumerator(cls, length: int = 1000, output_name: str = "output_column") -> FlowDataEngine:
1218
1221
  """Generates a FlowDataEngine with a single column containing a sequence of integers.
1219
1222
 
1220
1223
  Args:
@@ -1277,7 +1280,7 @@ class FlowDataEngine:
1277
1280
 
1278
1281
  return flow_file_columns
1279
1282
 
1280
- def split(self, split_input: transform_schemas.TextToRowsInput) -> "FlowDataEngine":
1283
+ def split(self, split_input: transform_schemas.TextToRowsInput) -> FlowDataEngine:
1281
1284
  """Splits a column's text values into multiple rows based on a delimiter.
1282
1285
 
1283
1286
  This operation is often referred to as "exploding" the DataFrame, as it
@@ -1304,7 +1307,7 @@ class FlowDataEngine:
1304
1307
 
1305
1308
  return FlowDataEngine(df)
1306
1309
 
1307
- def unpivot(self, unpivot_input: transform_schemas.UnpivotInput) -> "FlowDataEngine":
1310
+ def unpivot(self, unpivot_input: transform_schemas.UnpivotInput) -> FlowDataEngine:
1308
1311
  """Converts the DataFrame from a wide to a long format.
1309
1312
 
1310
1313
  This is the inverse of a pivot operation, taking columns and transforming
@@ -1328,7 +1331,7 @@ class FlowDataEngine:
1328
1331
 
1329
1332
  return FlowDataEngine(result)
1330
1333
 
1331
- def do_pivot(self, pivot_input: transform_schemas.PivotInput, node_logger: NodeLogger = None) -> "FlowDataEngine":
1334
+ def do_pivot(self, pivot_input: transform_schemas.PivotInput, node_logger: NodeLogger = None) -> FlowDataEngine:
1332
1335
  """Converts the DataFrame from a long to a wide format, aggregating values.
1333
1336
 
1334
1337
  Args:
@@ -1399,7 +1402,7 @@ class FlowDataEngine:
1399
1402
 
1400
1403
  return FlowDataEngine(df, calculate_schema_stats=False)
1401
1404
 
1402
- def do_filter(self, predicate: str) -> "FlowDataEngine":
1405
+ def do_filter(self, predicate: str) -> FlowDataEngine:
1403
1406
  """Filters rows based on a predicate expression.
1404
1407
 
1405
1408
  Args:
@@ -1418,7 +1421,7 @@ class FlowDataEngine:
1418
1421
  df = self.data_frame.filter(f)
1419
1422
  return FlowDataEngine(df, schema=self.schema, streamable=self._streamable)
1420
1423
 
1421
- def add_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> "FlowDataEngine":
1424
+ def add_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> FlowDataEngine:
1422
1425
  """Adds a record ID (row number) column to the DataFrame.
1423
1426
 
1424
1427
  Can generate a simple sequential ID or a grouped ID that resets for
@@ -1435,7 +1438,7 @@ class FlowDataEngine:
1435
1438
  return self._add_grouped_record_id(record_id_settings)
1436
1439
  return self._add_simple_record_id(record_id_settings)
1437
1440
 
1438
- def _add_grouped_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> "FlowDataEngine":
1441
+ def _add_grouped_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> FlowDataEngine:
1439
1442
  """Adds a record ID column with grouping."""
1440
1443
  select_cols = [pl.col(record_id_settings.output_column_name)] + [pl.col(c) for c in self.columns]
1441
1444
 
@@ -1456,7 +1459,7 @@ class FlowDataEngine:
1456
1459
 
1457
1460
  return FlowDataEngine(df, schema=output_schema)
1458
1461
 
1459
- def _add_simple_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> "FlowDataEngine":
1462
+ def _add_simple_record_id(self, record_id_settings: transform_schemas.RecordIdInput) -> FlowDataEngine:
1460
1463
  """Adds a simple sequential record ID column."""
1461
1464
  df = self.data_frame.with_row_index(record_id_settings.output_column_name, record_id_settings.offset)
1462
1465
 
@@ -1494,7 +1497,7 @@ class FlowDataEngine:
1494
1497
  """Returns a string representation of the FlowDataEngine."""
1495
1498
  return f"flow data engine\n{self.data_frame.__repr__()}"
1496
1499
 
1497
- def __call__(self) -> "FlowDataEngine":
1500
+ def __call__(self) -> FlowDataEngine:
1498
1501
  """Makes the class instance callable, returning itself."""
1499
1502
  return self
1500
1503
 
@@ -1502,7 +1505,7 @@ class FlowDataEngine:
1502
1505
  """Returns the number of records in the table."""
1503
1506
  return self.number_of_records if self.number_of_records >= 0 else self.get_number_of_records()
1504
1507
 
1505
- def cache(self) -> "FlowDataEngine":
1508
+ def cache(self) -> FlowDataEngine:
1506
1509
  """Caches the current DataFrame to disk and updates the internal reference.
1507
1510
 
1508
1511
  This triggers a background process to write the current LazyFrame's result
@@ -1557,7 +1560,7 @@ class FlowDataEngine:
1557
1560
  df = self.collect()
1558
1561
  return df.to_dicts()
1559
1562
 
1560
- def __get_sample__(self, n_rows: int = 100, streamable: bool = True) -> "FlowDataEngine":
1563
+ def __get_sample__(self, n_rows: int = 100, streamable: bool = True) -> FlowDataEngine:
1561
1564
  """Internal method to get a sample of the data."""
1562
1565
  if not self.lazy:
1563
1566
  df = self.data_frame.lazy()
@@ -1581,7 +1584,7 @@ class FlowDataEngine:
1581
1584
  shuffle: bool = False,
1582
1585
  seed: int = None,
1583
1586
  execution_location: ExecutionLocationsLiteral | None = None,
1584
- ) -> "FlowDataEngine":
1587
+ ) -> FlowDataEngine:
1585
1588
  """Gets a sample of rows from the DataFrame.
1586
1589
 
1587
1590
  Args:
@@ -1620,7 +1623,7 @@ class FlowDataEngine:
1620
1623
 
1621
1624
  return FlowDataEngine(sample_df, schema=self.schema)
1622
1625
 
1623
- def get_subset(self, n_rows: int = 100) -> "FlowDataEngine":
1626
+ def get_subset(self, n_rows: int = 100) -> FlowDataEngine:
1624
1627
  """Gets the first `n_rows` from the DataFrame.
1625
1628
 
1626
1629
  Args:
@@ -1636,7 +1639,7 @@ class FlowDataEngine:
1636
1639
 
1637
1640
  def iter_batches(
1638
1641
  self, batch_size: int = 1000, columns: list | tuple | str = None
1639
- ) -> Generator["FlowDataEngine", None, None]:
1642
+ ) -> Generator[FlowDataEngine, None, None]:
1640
1643
  """Iterates over the DataFrame in batches.
1641
1644
 
1642
1645
  Args:
@@ -1657,7 +1660,7 @@ class FlowDataEngine:
1657
1660
  def start_fuzzy_join(
1658
1661
  self,
1659
1662
  fuzzy_match_input: transform_schemas.FuzzyMatchInput,
1660
- other: "FlowDataEngine",
1663
+ other: FlowDataEngine,
1661
1664
  file_ref: str,
1662
1665
  flow_id: int = -1,
1663
1666
  node_id: int | str = -1,
@@ -1696,7 +1699,7 @@ class FlowDataEngine:
1696
1699
  def fuzzy_join_external(
1697
1700
  self,
1698
1701
  fuzzy_match_input: transform_schemas.FuzzyMatchInput,
1699
- other: "FlowDataEngine",
1702
+ other: FlowDataEngine,
1700
1703
  file_ref: str = None,
1701
1704
  flow_id: int = -1,
1702
1705
  node_id: int = -1,
@@ -1722,9 +1725,9 @@ class FlowDataEngine:
1722
1725
  def fuzzy_join(
1723
1726
  self,
1724
1727
  fuzzy_match_input: transform_schemas.FuzzyMatchInput,
1725
- other: "FlowDataEngine",
1728
+ other: FlowDataEngine,
1726
1729
  node_logger: NodeLogger = None,
1727
- ) -> "FlowDataEngine":
1730
+ ) -> FlowDataEngine:
1728
1731
  fuzzy_match_input_manager = transform_schemas.FuzzyMatchInputManager(fuzzy_match_input)
1729
1732
  left_df, right_df = prepare_for_fuzzy_match(
1730
1733
  left=self, right=other, fuzzy_match_input_manager=fuzzy_match_input_manager
@@ -1741,8 +1744,8 @@ class FlowDataEngine:
1741
1744
  cross_join_input: transform_schemas.CrossJoinInput,
1742
1745
  auto_generate_selection: bool,
1743
1746
  verify_integrity: bool,
1744
- other: "FlowDataEngine",
1745
- ) -> "FlowDataEngine":
1747
+ other: FlowDataEngine,
1748
+ ) -> FlowDataEngine:
1746
1749
  """Performs a cross join with another DataFrame.
1747
1750
 
1748
1751
  A cross join produces the Cartesian product of the two DataFrames.
@@ -1796,8 +1799,8 @@ class FlowDataEngine:
1796
1799
  join_input: transform_schemas.JoinInput,
1797
1800
  auto_generate_selection: bool,
1798
1801
  verify_integrity: bool,
1799
- other: "FlowDataEngine",
1800
- ) -> "FlowDataEngine":
1802
+ other: FlowDataEngine,
1803
+ ) -> FlowDataEngine:
1801
1804
  """Performs a standard SQL-style join with another DataFrame."""
1802
1805
  # Create manager from input
1803
1806
  join_manager = transform_schemas.JoinInputManager(join_input)
@@ -1864,7 +1867,7 @@ class FlowDataEngine:
1864
1867
 
1865
1868
  return FlowDataEngine(joined_df, calculate_schema_stats=False, number_of_records=0, streamable=False)
1866
1869
 
1867
- def solve_graph(self, graph_solver_input: transform_schemas.GraphSolverInput) -> "FlowDataEngine":
1870
+ def solve_graph(self, graph_solver_input: transform_schemas.GraphSolverInput) -> FlowDataEngine:
1868
1871
  """Solves a graph problem represented by 'from' and 'to' columns.
1869
1872
 
1870
1873
  This is used for operations like finding connected components in a graph.
@@ -1883,7 +1886,7 @@ class FlowDataEngine:
1883
1886
  )
1884
1887
  return FlowDataEngine(lf)
1885
1888
 
1886
- def add_new_values(self, values: Iterable, col_name: str = None) -> "FlowDataEngine":
1889
+ def add_new_values(self, values: Iterable, col_name: str = None) -> FlowDataEngine:
1887
1890
  """Adds a new column with the provided values.
1888
1891
 
1889
1892
  Args:
@@ -1897,7 +1900,7 @@ class FlowDataEngine:
1897
1900
  col_name = "new_values"
1898
1901
  return FlowDataEngine(self.data_frame.with_columns(pl.Series(values).alias(col_name)))
1899
1902
 
1900
- def get_record_count(self) -> "FlowDataEngine":
1903
+ def get_record_count(self) -> FlowDataEngine:
1901
1904
  """Returns a new FlowDataEngine with a single column 'number_of_records'
1902
1905
  containing the total number of records.
1903
1906
 
@@ -1906,7 +1909,7 @@ class FlowDataEngine:
1906
1909
  """
1907
1910
  return FlowDataEngine(self.data_frame.select(pl.len().alias("number_of_records")))
1908
1911
 
1909
- def assert_equal(self, other: "FlowDataEngine", ordered: bool = True, strict_schema: bool = False):
1912
+ def assert_equal(self, other: FlowDataEngine, ordered: bool = True, strict_schema: bool = False):
1910
1913
  """Asserts that this DataFrame is equal to another.
1911
1914
 
1912
1915
  Useful for testing.
@@ -2075,7 +2078,7 @@ class FlowDataEngine:
2075
2078
  [transform_schemas.SelectInput(old_name=c.name, data_type=c.data_type) for c in self.schema]
2076
2079
  )
2077
2080
 
2078
- def select_columns(self, list_select: list[str] | tuple[str] | str) -> "FlowDataEngine":
2081
+ def select_columns(self, list_select: list[str] | tuple[str] | str) -> FlowDataEngine:
2079
2082
  """Selects a subset of columns from the DataFrame.
2080
2083
 
2081
2084
  Args:
@@ -2098,7 +2101,7 @@ class FlowDataEngine:
2098
2101
  streamable=self._streamable,
2099
2102
  )
2100
2103
 
2101
- def drop_columns(self, columns: list[str]) -> "FlowDataEngine":
2104
+ def drop_columns(self, columns: list[str]) -> FlowDataEngine:
2102
2105
  """Drops specified columns from the DataFrame.
2103
2106
 
2104
2107
  Args:
@@ -2115,7 +2118,7 @@ class FlowDataEngine:
2115
2118
  self.data_frame.select(cols_for_select), number_of_records=self.number_of_records, schema=new_schema
2116
2119
  )
2117
2120
 
2118
- def reorganize_order(self, column_order: list[str]) -> "FlowDataEngine":
2121
+ def reorganize_order(self, column_order: list[str]) -> FlowDataEngine:
2119
2122
  """Reorganizes columns into a specified order.
2120
2123
 
2121
2124
  Args:
@@ -2130,7 +2133,7 @@ class FlowDataEngine:
2130
2133
 
2131
2134
  def apply_flowfile_formula(
2132
2135
  self, func: str, col_name: str, output_data_type: pl.DataType = None
2133
- ) -> "FlowDataEngine":
2136
+ ) -> FlowDataEngine:
2134
2137
  """Applies a formula to create a new column or transform an existing one.
2135
2138
 
2136
2139
  Args:
@@ -2149,7 +2152,7 @@ class FlowDataEngine:
2149
2152
 
2150
2153
  return FlowDataEngine(df2, number_of_records=self.number_of_records)
2151
2154
 
2152
- def apply_sql_formula(self, func: str, col_name: str, output_data_type: pl.DataType = None) -> "FlowDataEngine":
2155
+ def apply_sql_formula(self, func: str, col_name: str, output_data_type: pl.DataType = None) -> FlowDataEngine:
2153
2156
  """Applies an SQL-style formula using `pl.sql_expr`.
2154
2157
 
2155
2158
  Args:
@@ -2170,7 +2173,7 @@ class FlowDataEngine:
2170
2173
 
2171
2174
  def output(
2172
2175
  self, output_fs: input_schema.OutputSettings, flow_id: int, node_id: int | str, execute_remote: bool = True
2173
- ) -> "FlowDataEngine":
2176
+ ) -> FlowDataEngine:
2174
2177
  """Writes the DataFrame to an output file.
2175
2178
 
2176
2179
  Can execute the write operation locally or in a remote worker process.
@@ -2214,7 +2217,7 @@ class FlowDataEngine:
2214
2217
  logger.info("Finished writing output")
2215
2218
  return self
2216
2219
 
2217
- def make_unique(self, unique_input: transform_schemas.UniqueInput = None) -> "FlowDataEngine":
2220
+ def make_unique(self, unique_input: transform_schemas.UniqueInput = None) -> FlowDataEngine:
2218
2221
  """Gets the unique rows from the DataFrame.
2219
2222
 
2220
2223
  Args:
@@ -2228,7 +2231,7 @@ class FlowDataEngine:
2228
2231
  return FlowDataEngine(self.data_frame.unique())
2229
2232
  return FlowDataEngine(self.data_frame.unique(unique_input.columns, keep=unique_input.strategy))
2230
2233
 
2231
- def concat(self, other: Iterable["FlowDataEngine"] | "FlowDataEngine") -> "FlowDataEngine":
2234
+ def concat(self, other: Iterable[FlowDataEngine] | FlowDataEngine) -> FlowDataEngine:
2232
2235
  """Concatenates this DataFrame with one or more other DataFrames.
2233
2236
 
2234
2237
  Args:
@@ -2243,7 +2246,7 @@ class FlowDataEngine:
2243
2246
  dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [self.data_frame] + [flt.data_frame for flt in other]
2244
2247
  return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
2245
2248
 
2246
- def do_select(self, select_inputs: transform_schemas.SelectInputs, keep_missing: bool = True) -> "FlowDataEngine":
2249
+ def do_select(self, select_inputs: transform_schemas.SelectInputs, keep_missing: bool = True) -> FlowDataEngine:
2247
2250
  """Performs a complex column selection, renaming, and reordering operation.
2248
2251
 
2249
2252
  Args:
@@ -2256,7 +2259,6 @@ class FlowDataEngine:
2256
2259
  """
2257
2260
  new_schema = deepcopy(self.schema)
2258
2261
  renames = [r for r in select_inputs.renames if r.is_available]
2259
-
2260
2262
  if not keep_missing:
2261
2263
  drop_cols = set(self.data_frame.collect_schema().names()) - set(r.old_name for r in renames).union(
2262
2264
  set(r.old_name for r in renames if not r.keep)
@@ -2322,7 +2324,7 @@ class FlowDataEngine:
2322
2324
  return cls(external_fetcher.get_result())
2323
2325
 
2324
2326
 
2325
- def execute_polars_code(*flowfile_tables: "FlowDataEngine", code: str) -> "FlowDataEngine":
2327
+ def execute_polars_code(*flowfile_tables: FlowDataEngine, code: str) -> FlowDataEngine:
2326
2328
  """Executes arbitrary Polars code on one or more FlowDataEngine objects.
2327
2329
 
2328
2330
  This function takes a string of Python code that uses Polars and executes it.
@@ -5,32 +5,6 @@ import polars as pl
5
5
  DataTypeGroup = Literal["numeric", "string", "datetime", "boolean", "binary", "complex", "unknown"]
6
6
 
7
7
 
8
- def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
9
- if isinstance(pl_type, pl.List):
10
- inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
11
- return f"pl.List({inner_str})"
12
- elif isinstance(pl_type, pl.Array):
13
- inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
14
- return f"pl.Array({inner_str})"
15
- elif isinstance(pl_type, pl.Decimal):
16
- precision = pl_type.precision if hasattr(pl_type, "precision") else None
17
- scale = pl_type.scale if hasattr(pl_type, "scale") else None
18
- if precision is not None and scale is not None:
19
- return f"pl.Decimal({precision}, {scale})"
20
- elif precision is not None:
21
- return f"pl.Decimal({precision})"
22
- else:
23
- return "pl.Decimal()"
24
- elif isinstance(pl_type, pl.Struct):
25
- # Handle Struct with field definitions
26
- fields = []
27
- if hasattr(pl_type, "fields"):
28
- for field in pl_type.fields:
29
- field_name = field.name
30
- field_type = convert_pl_type_to_string(field.dtype, inner=True)
31
- fields.append(f'pl.Field("{field_name}", {field_type})')
32
- field_str = ", ".join(fields)
33
- return f"pl.Struct([{field_str}])"
34
- else:
35
- # For base types, we want the full pl.TypeName format
36
- return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
8
+ def convert_pl_type_to_string(pl_type: pl.DataType) -> str:
9
+ """Convert a Polars DataType to its string representation."""
10
+ return str(pl_type)
@@ -21,20 +21,51 @@ dtype_to_pl = {
21
21
  def safe_eval_pl_type(type_string: str):
22
22
  """
23
23
  Safely evaluate a Polars type string with restricted namespace.
24
- Only allows Polars types and basic Python literals.
24
+ Supports both formats:
25
+ - With pl. prefix: pl.List(pl.Int64)
26
+ - Without pl. prefix: List(Int64)
25
27
  """
26
28
  # Define allowed names in the evaluation namespace
27
29
  safe_dict = {
28
- # Polars module and types
30
+ # Keep pl module for backwards compatibility with pl.X format
29
31
  "pl": pl,
30
- # Basic Python built-ins for literals
31
- "int": int,
32
- "str": str,
33
- "float": float,
34
- "bool": bool,
35
- "list": list,
36
- "dict": dict,
37
- "tuple": tuple,
32
+
33
+ # Polars types directly available (without pl. prefix)
34
+ "List": pl.List,
35
+ "Array": pl.Array,
36
+ "Struct": pl.Struct,
37
+ "Field": pl.Field,
38
+ "Decimal": pl.Decimal,
39
+
40
+ # Integer types
41
+ "Int8": pl.Int8,
42
+ "Int16": pl.Int16,
43
+ "Int32": pl.Int32,
44
+ "Int64": pl.Int64,
45
+ "Int128": pl.Int128,
46
+ "UInt8": pl.UInt8,
47
+ "UInt16": pl.UInt16,
48
+ "UInt32": pl.UInt32,
49
+ "UInt64": pl.UInt64,
50
+
51
+ # Float types
52
+ "Float32": pl.Float32,
53
+ "Float64": pl.Float64,
54
+
55
+ # Other types
56
+ "Boolean": pl.Boolean,
57
+ "String": pl.String,
58
+ "Utf8": pl.Utf8,
59
+ "Binary": pl.Binary,
60
+ "Date": pl.Date,
61
+ "Time": pl.Time,
62
+ "Datetime": pl.Datetime,
63
+ "Duration": pl.Duration,
64
+ "Categorical": pl.Categorical,
65
+ "Enum": pl.Enum,
66
+ "Null": pl.Null,
67
+ "Object": pl.Object,
68
+
38
69
  # Disable dangerous built-ins
39
70
  "__builtins__": {},
40
71
  }
@@ -57,10 +88,10 @@ def get_polars_type(dtype: str):
57
88
  pl_datetype = dtype_to_pl.get(dtype.lower())
58
89
  if pl_datetype is not None:
59
90
  return pl_datetype
60
- elif hasattr(pl, dtype):
61
- return getattr(pl, dtype)
62
- else:
63
- return pl.String
91
+ try:
92
+ return safe_eval_pl_type(dtype)
93
+ except Exception:
94
+ return pl.String # Fallback to String if evaluation fails
64
95
 
65
96
 
66
97
  def cast_str_to_polars_type(dtype: str) -> pl.DataType:
@@ -1,13 +1,30 @@
1
- from typing import Any, Literal
1
+ from base64 import b64decode, b64encode
2
+ from typing import Annotated, Any, Literal
2
3
 
3
4
  from pl_fuzzy_frame_match.models import FuzzyMapping
4
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, BeforeValidator, PlainSerializer
5
6
 
6
7
  OperationType = Literal["store", "calculate_schema", "calculate_number_of_records", "write_output", "store_sample"]
7
8
 
8
9
 
10
+ # Custom type for bytes that serializes to/from base64 string in JSON
11
+ def _decode_bytes(v: Any) -> bytes:
12
+ if isinstance(v, bytes):
13
+ return v
14
+ if isinstance(v, str):
15
+ return b64decode(v)
16
+ raise ValueError(f"Expected bytes or base64 string, got {type(v)}")
17
+
18
+
19
+ Base64Bytes = Annotated[
20
+ bytes,
21
+ BeforeValidator(_decode_bytes),
22
+ PlainSerializer(lambda x: b64encode(x).decode('ascii'), return_type=str),
23
+ ]
24
+
25
+
9
26
  class PolarsOperation(BaseModel):
10
- operation: bytes
27
+ operation: Base64Bytes # Automatically encodes/decodes base64 for JSON
11
28
 
12
29
 
13
30
  class PolarsScript(PolarsOperation):