Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +194 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  8. flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
  9. flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  12. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
  13. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
  14. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  15. flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
  16. flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
  17. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  18. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
  19. flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
  20. flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
  21. flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
  22. flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
  26. flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
  31. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
  32. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  33. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
  34. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
  43. flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
  44. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
  51. flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
  52. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
  55. flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
  62. flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
  65. flowfile/web/static/assets/Output-692dd25d.css +37 -0
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  71. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
  75. flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
  78. flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
  81. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
  82. flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  85. flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  90. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
  92. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
  93. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  94. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
  95. flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
  96. flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
  97. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
  98. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
  99. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
  100. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  101. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
  102. flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
  103. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
  104. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
  105. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  106. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
  107. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
  108. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
  109. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
  110. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
  111. flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
  112. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  113. flowfile/web/static/assets/Unique-2b705521.css +3 -0
  114. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
  115. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
  116. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
  117. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
  118. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  119. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  120. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
  121. flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
  122. flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
  123. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
  124. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
  125. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
  126. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
  127. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
  128. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  129. flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
  130. flowfile/web/static/assets/index-6b367bb5.js +38 -0
  131. flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
  132. flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
  133. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  134. flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
  135. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
  136. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  137. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
  138. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  139. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  140. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
  141. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  142. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
  143. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  144. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
  145. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
  146. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  147. flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
  148. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
  149. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  150. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
  151. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
  152. flowfile/web/static/index.html +2 -2
  153. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
  154. flowfile-0.5.4.dist-info/RECORD +407 -0
  155. flowfile_core/__init__.py +13 -6
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +64 -19
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +145 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/__init__.py +11 -0
  177. flowfile_core/flowfile/code_generator/code_generator.py +706 -247
  178. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  179. flowfile_core/flowfile/connection_manager/models.py +1 -1
  180. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  181. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  182. flowfile_core/flowfile/extensions.py +17 -12
  183. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  184. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  185. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  190. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  191. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  192. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  193. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  194. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  195. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  196. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  197. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  198. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  201. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  202. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  203. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  204. flowfile_core/flowfile/flow_graph.py +920 -571
  205. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  206. flowfile_core/flowfile/flow_node/flow_node.py +379 -258
  207. flowfile_core/flowfile/flow_node/models.py +53 -41
  208. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  209. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  210. flowfile_core/flowfile/handler.py +80 -30
  211. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  212. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  213. flowfile_core/flowfile/node_designer/__init__.py +19 -13
  214. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  215. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  216. flowfile_core/flowfile/node_designer/ui_components.py +278 -34
  217. flowfile_core/flowfile/schema_callbacks.py +71 -51
  218. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  219. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  220. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  221. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  223. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  224. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  225. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  226. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  227. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  228. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  229. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  230. flowfile_core/flowfile/util/node_skipper.py +4 -4
  231. flowfile_core/flowfile/utils.py +19 -21
  232. flowfile_core/main.py +26 -19
  233. flowfile_core/routes/auth.py +284 -11
  234. flowfile_core/routes/cloud_connections.py +25 -25
  235. flowfile_core/routes/logs.py +21 -29
  236. flowfile_core/routes/public.py +46 -4
  237. flowfile_core/routes/routes.py +70 -34
  238. flowfile_core/routes/secrets.py +25 -27
  239. flowfile_core/routes/user_defined_components.py +483 -4
  240. flowfile_core/run_lock.py +0 -1
  241. flowfile_core/schemas/__init__.py +4 -6
  242. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  243. flowfile_core/schemas/cloud_storage_schemas.py +96 -66
  244. flowfile_core/schemas/input_schema.py +231 -144
  245. flowfile_core/schemas/output_model.py +49 -34
  246. flowfile_core/schemas/schemas.py +116 -89
  247. flowfile_core/schemas/transform_schema.py +518 -263
  248. flowfile_core/schemas/yaml_types.py +21 -7
  249. flowfile_core/secret_manager/secret_manager.py +123 -18
  250. flowfile_core/types.py +29 -9
  251. flowfile_core/utils/arrow_reader.py +7 -6
  252. flowfile_core/utils/excel_file_manager.py +3 -3
  253. flowfile_core/utils/fileManager.py +7 -7
  254. flowfile_core/utils/fl_executor.py +8 -10
  255. flowfile_core/utils/utils.py +4 -4
  256. flowfile_core/utils/validate_setup.py +5 -4
  257. flowfile_frame/__init__.py +117 -51
  258. flowfile_frame/adapters.py +2 -9
  259. flowfile_frame/adding_expr.py +73 -32
  260. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  261. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  262. flowfile_frame/config.py +2 -5
  263. flowfile_frame/database/__init__.py +36 -0
  264. flowfile_frame/database/connection_manager.py +205 -0
  265. flowfile_frame/database/frame_helpers.py +249 -0
  266. flowfile_frame/expr.py +311 -218
  267. flowfile_frame/expr.pyi +160 -159
  268. flowfile_frame/expr_name.py +23 -23
  269. flowfile_frame/flow_frame.py +571 -476
  270. flowfile_frame/flow_frame.pyi +123 -104
  271. flowfile_frame/flow_frame_methods.py +227 -246
  272. flowfile_frame/group_frame.py +50 -20
  273. flowfile_frame/join.py +2 -2
  274. flowfile_frame/lazy.py +129 -87
  275. flowfile_frame/lazy_methods.py +83 -30
  276. flowfile_frame/list_name_space.py +55 -50
  277. flowfile_frame/selectors.py +148 -68
  278. flowfile_frame/series.py +9 -7
  279. flowfile_frame/utils.py +19 -21
  280. flowfile_worker/__init__.py +12 -7
  281. flowfile_worker/configs.py +41 -33
  282. flowfile_worker/create/__init__.py +14 -9
  283. flowfile_worker/create/funcs.py +114 -77
  284. flowfile_worker/create/models.py +46 -43
  285. flowfile_worker/create/pl_types.py +14 -15
  286. flowfile_worker/create/read_excel_tables.py +34 -41
  287. flowfile_worker/create/utils.py +22 -19
  288. flowfile_worker/external_sources/s3_source/main.py +18 -51
  289. flowfile_worker/external_sources/s3_source/models.py +34 -27
  290. flowfile_worker/external_sources/sql_source/main.py +8 -5
  291. flowfile_worker/external_sources/sql_source/models.py +13 -9
  292. flowfile_worker/flow_logger.py +10 -8
  293. flowfile_worker/funcs.py +214 -155
  294. flowfile_worker/main.py +11 -17
  295. flowfile_worker/models.py +35 -28
  296. flowfile_worker/process_manager.py +2 -3
  297. flowfile_worker/routes.py +121 -90
  298. flowfile_worker/secrets.py +114 -21
  299. flowfile_worker/spawner.py +89 -54
  300. flowfile_worker/utils.py +3 -2
  301. shared/__init__.py +2 -7
  302. shared/storage_config.py +25 -13
  303. test_utils/postgres/commands.py +3 -2
  304. test_utils/postgres/fixtures.py +9 -9
  305. test_utils/s3/commands.py +1 -1
  306. test_utils/s3/data_generator.py +3 -4
  307. test_utils/s3/demo_data_generator.py +4 -7
  308. test_utils/s3/fixtures.py +7 -5
  309. tools/migrate/__init__.py +1 -1
  310. tools/migrate/__main__.py +16 -29
  311. tools/migrate/legacy_schemas.py +251 -190
  312. tools/migrate/migrate.py +193 -181
  313. tools/migrate/tests/conftest.py +1 -3
  314. tools/migrate/tests/test_migrate.py +36 -41
  315. tools/migrate/tests/test_migration_e2e.py +28 -29
  316. tools/migrate/tests/test_node_migrations.py +50 -20
  317. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  318. flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
  319. flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
  320. flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
  321. flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
  322. flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
  323. flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
  324. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  325. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  326. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  327. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  328. flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
  329. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  330. flowfile/web/static/assets/Output-283fe388.css +0 -37
  331. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  332. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  333. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
  334. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  335. flowfile/web/static/assets/Sort-3643d625.css +0 -51
  336. flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
  337. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  338. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  339. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  340. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  341. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  342. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  343. flowfile-0.5.1.dist-info/RECORD +0 -388
  344. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
  345. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
  346. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,34 +1,36 @@
1
+ from __future__ import annotations
2
+
1
3
  import inspect
2
4
  import os
3
- from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable, get_args, get_origin
4
-
5
5
  import re
6
+ from collections.abc import Iterable, Iterator, Mapping
7
+ from typing import Any, Literal, Optional, Union, get_args, get_origin
6
8
 
7
9
  import polars as pl
8
- from flowfile_frame.lazy_methods import add_lazyframe_methods
9
-
10
- from polars._typing import (CsvEncoding, FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
11
- from collections.abc import Iterator
12
-
13
10
  from pl_fuzzy_frame_match import FuzzyMapping
11
+ from polars._typing import CsvEncoding, FrameInitTypes, Orientation, SchemaDefinition, SchemaDict
14
12
 
13
+ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
15
14
  from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
16
15
  from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
17
- from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
18
16
  from flowfile_core.flowfile.flow_node.flow_node import FlowNode
19
17
  from flowfile_core.schemas import input_schema, transform_schema
20
-
21
- from flowfile_frame.expr import Expr, Column, lit, col
22
- from flowfile_frame.selectors import Selector
23
- from flowfile_frame.group_frame import GroupByFrame
24
- from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
25
- ensure_inputs_as_iterable, generate_node_id, data as node_id_data)
26
- from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
27
- from flowfile_frame.utils import _check_if_convertible_to_code
28
- from flowfile_frame.config import logger
29
18
  from flowfile_frame.cloud_storage.frame_helpers import add_write_ff_to_cloud_storage
30
- from collections.abc import Mapping
31
-
19
+ from flowfile_frame.config import logger
20
+ from flowfile_frame.expr import Column, Expr, col, lit
21
+ from flowfile_frame.group_frame import GroupByFrame
22
+ from flowfile_frame.join import _create_join_mappings, _normalize_columns_to_list
23
+ from flowfile_frame.lazy_methods import add_lazyframe_methods
24
+ from flowfile_frame.selectors import Selector
25
+ from flowfile_frame.utils import (
26
+ _check_if_convertible_to_code,
27
+ _parse_inputs_as_iterable,
28
+ create_flow_graph,
29
+ ensure_inputs_as_iterable,
30
+ generate_node_id,
31
+ stringify_values,
32
+ )
33
+ from flowfile_frame.utils import data as node_id_data
32
34
 
33
35
 
34
36
  def can_be_expr(param: inspect.Parameter) -> bool:
@@ -83,7 +85,7 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
83
85
  raw_definitions = []
84
86
 
85
87
  # Add function sources if any
86
- if hasattr(expr_obj, '_function_sources') and expr_obj._function_sources:
88
+ if hasattr(expr_obj, "_function_sources") and expr_obj._function_sources:
87
89
  # Remove duplicates while preserving order
88
90
  unique_sources = []
89
91
  seen = set()
@@ -101,8 +103,9 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
101
103
  return pure_expr_str, raw_defs_str
102
104
 
103
105
 
104
- def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr | None = None,
105
- group_expr: pl.Expr | None = None) -> None:
106
+ def _check_ok_for_serialization(
107
+ method_name: str = None, polars_expr: pl.Expr | None = None, group_expr: pl.Expr | None = None
108
+ ) -> None:
106
109
  if method_name is None:
107
110
  raise NotImplementedError("Cannot create a polars lambda expression without the method")
108
111
  if polars_expr is None:
@@ -110,7 +113,7 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
110
113
  method_ref = getattr(pl.LazyFrame, method_name)
111
114
  if method_ref is None:
112
115
  raise ModuleNotFoundError(f"Could not find the method {method_name} in polars lazyframe")
113
- if method_name == 'group_by':
116
+ if method_name == "group_by":
114
117
  if group_expr is None:
115
118
  raise NotImplementedError("Cannot create a polars lambda expression without the groupby expression")
116
119
  if not all(isinstance(ge, pl.Expr) for ge in group_expr):
@@ -120,6 +123,7 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
120
123
  @add_lazyframe_methods
121
124
  class FlowFrame:
122
125
  """Main class that wraps FlowDataEngine and maintains the ETL graph."""
126
+
123
127
  flow_graph: FlowGraph
124
128
  data: pl.LazyFrame
125
129
 
@@ -197,8 +201,10 @@ class FlowFrame:
197
201
  raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
198
202
  # Create a FlowDataEngine to get data in the right format for manual input
199
203
  flow_table = FlowDataEngine(raw_data=pl_data)
200
- raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
201
- columns=[c.get_minimal_field_info() for c in flow_table.schema])
204
+ raw_data_format = input_schema.RawData(
205
+ data=list(flow_table.to_dict().values()),
206
+ columns=[c.get_minimal_field_info() for c in flow_table.schema],
207
+ )
202
208
  # Create a manual input node
203
209
  input_node = input_schema.NodeManualInput(
204
210
  flow_id=flow_id,
@@ -220,19 +226,19 @@ class FlowFrame:
220
226
  )
221
227
 
222
228
  def __new__(
223
- cls,
224
- data: pl.LazyFrame | FrameInitTypes = None,
225
- schema: SchemaDefinition | None = None,
226
- *,
227
- schema_overrides: SchemaDict | None = None,
228
- strict: bool = True,
229
- orient: Orientation | None = None,
230
- infer_schema_length: int | None = 100,
231
- nan_to_null: bool = False,
232
- flow_graph: Optional[FlowGraph] = None,
233
- node_id: Optional[int] = None,
234
- parent_node_id: Optional[int] = None,
235
- **kwargs, # Accept and ignore any other kwargs for API compatibility
229
+ cls,
230
+ data: pl.LazyFrame | FrameInitTypes = None,
231
+ schema: SchemaDefinition | None = None,
232
+ *,
233
+ schema_overrides: SchemaDict | None = None,
234
+ strict: bool = True,
235
+ orient: Orientation | None = None,
236
+ infer_schema_length: int | None = 100,
237
+ nan_to_null: bool = False,
238
+ flow_graph: FlowGraph | None = None,
239
+ node_id: int | None = None,
240
+ parent_node_id: int | None = None,
241
+ **kwargs, # Accept and ignore any other kwargs for API compatibility
236
242
  ) -> "FlowFrame":
237
243
  """
238
244
  Unified constructor for FlowFrame.
@@ -252,11 +258,18 @@ class FlowFrame:
252
258
  instance.parent_node_id = parent_node_id
253
259
  return instance
254
260
  elif flow_graph is not None and not isinstance(data, pl.LazyFrame):
255
- instance = cls.create_from_any_type(data=data, schema=schema, schema_overrides=schema_overrides,
256
- strict=strict, orient=orient, infer_schema_length=infer_schema_length,
257
- nan_to_null=nan_to_null, flow_graph=flow_graph, node_id=node_id,
258
- parent_node_id=parent_node_id
259
- )
261
+ instance = cls.create_from_any_type(
262
+ data=data,
263
+ schema=schema,
264
+ schema_overrides=schema_overrides,
265
+ strict=strict,
266
+ orient=orient,
267
+ infer_schema_length=infer_schema_length,
268
+ nan_to_null=nan_to_null,
269
+ flow_graph=flow_graph,
270
+ node_id=node_id,
271
+ parent_node_id=parent_node_id,
272
+ )
260
273
  return instance
261
274
 
262
275
  source_graph = create_flow_graph()
@@ -265,37 +278,41 @@ class FlowFrame:
265
278
  if data is None:
266
279
  data = pl.LazyFrame()
267
280
  if not isinstance(data, pl.LazyFrame):
268
-
269
281
  description = "Data imported from Python object"
270
282
  try:
271
283
  pl_df = pl.DataFrame(
272
- data, schema=schema, schema_overrides=schema_overrides,
273
- strict=strict, orient=orient, infer_schema_length=infer_schema_length,
274
- nan_to_null=nan_to_null
284
+ data,
285
+ schema=schema,
286
+ schema_overrides=schema_overrides,
287
+ strict=strict,
288
+ orient=orient,
289
+ infer_schema_length=infer_schema_length,
290
+ nan_to_null=nan_to_null,
275
291
  )
276
292
  pl_data = pl_df.lazy()
277
293
  except Exception as e:
278
294
  raise ValueError(f"Could not convert data to a Polars DataFrame: {e}")
279
295
 
280
296
  flow_table = FlowDataEngine(raw_data=pl_data)
281
- raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
282
- columns=[c.get_minimal_field_info() for c in flow_table.schema])
297
+ raw_data_format = input_schema.RawData(
298
+ data=list(flow_table.to_dict().values()),
299
+ columns=[c.get_minimal_field_info() for c in flow_table.schema],
300
+ )
283
301
  input_node = input_schema.NodeManualInput(
284
- flow_id=source_graph.flow_id, node_id=source_node_id,
285
- raw_data_format=raw_data_format, pos_x=100, pos_y=100,
286
- is_setup=True, description=description
302
+ flow_id=source_graph.flow_id,
303
+ node_id=source_node_id,
304
+ raw_data_format=raw_data_format,
305
+ pos_x=100,
306
+ pos_y=100,
307
+ is_setup=True,
308
+ description=description,
287
309
  )
288
310
  source_graph.add_manual_input(input_node)
289
311
  else:
290
312
  source_graph.add_dependency_on_polars_lazy_frame(data, source_node_id)
291
313
 
292
314
  final_data = source_graph.get_node(source_node_id).get_resulting_data().data_frame
293
- return cls(
294
- data=final_data,
295
- flow_graph=source_graph,
296
- node_id=source_node_id,
297
- parent_node_id=parent_node_id
298
- )
315
+ return cls(data=final_data, flow_graph=source_graph, node_id=source_node_id, parent_node_id=parent_node_id)
299
316
 
300
317
  def __init__(self, *args, **kwargs):
301
318
  """
@@ -328,20 +345,20 @@ class FlowFrame:
328
345
  parent_node_id=self.node_id,
329
346
  )
330
347
  except AttributeError:
331
- raise ValueError('Could not execute the function')
348
+ raise ValueError("Could not execute the function")
332
349
 
333
350
  @staticmethod
334
351
  def _generate_sort_polars_code(
335
- pure_sort_expr_strs: List[str],
336
- descending_values: List[bool],
337
- nulls_last_values: List[bool],
338
- multithreaded: bool,
339
- maintain_order: bool,
352
+ pure_sort_expr_strs: list[str],
353
+ descending_values: list[bool],
354
+ nulls_last_values: list[bool],
355
+ multithreaded: bool,
356
+ maintain_order: bool,
340
357
  ) -> str:
341
358
  """
342
359
  Generates the `input_df.sort(...)` Polars code string using pure expression strings.
343
360
  """
344
- kwargs_for_code: Dict[str, Any] = {}
361
+ kwargs_for_code: dict[str, Any] = {}
345
362
  if any(descending_values):
346
363
  kwargs_for_code["descending"] = descending_values[0] if len(descending_values) == 1 else descending_values
347
364
  if any(nulls_last_values):
@@ -353,19 +370,20 @@ class FlowFrame:
353
370
 
354
371
  kwargs_str_for_code = ", ".join(f"{k}={repr(v)}" for k, v in kwargs_for_code.items())
355
372
 
356
- by_arg_for_code = pure_sort_expr_strs[0] if len(
357
- pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
373
+ by_arg_for_code = (
374
+ pure_sort_expr_strs[0] if len(pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
375
+ )
358
376
  return f"input_df.sort({by_arg_for_code}{', ' + kwargs_str_for_code if kwargs_str_for_code else ''})"
359
377
 
360
378
  def sort(
361
- self,
362
- by: Union[List[Union[Expr, str]], Expr, str],
363
- *more_by: Union[Expr, str],
364
- descending: Union[bool, List[bool]] = False,
365
- nulls_last: Union[bool, List[bool]] = False,
366
- multithreaded: bool = True,
367
- maintain_order: bool = False,
368
- description: Optional[str] = None,
379
+ self,
380
+ by: list[Expr | str] | Expr | str,
381
+ *more_by: Expr | str,
382
+ descending: bool | list[bool] = False,
383
+ nulls_last: bool | list[bool] = False,
384
+ multithreaded: bool = True,
385
+ maintain_order: bool = False,
386
+ description: str | None = None,
369
387
  ) -> "FlowFrame":
370
388
  """
371
389
  Sort the dataframe by the given columns.
@@ -377,10 +395,10 @@ class FlowFrame:
377
395
  if more_by:
378
396
  sort_expressions_input.extend(list(_parse_inputs_as_iterable(more_by)))
379
397
 
380
- all_processed_expr_objects: List[Expr] = []
381
- pure_polars_expr_strings_for_sort: List[str] = []
382
- collected_raw_definitions: List[str] = []
383
- column_names_for_native_node: List[str] = []
398
+ all_processed_expr_objects: list[Expr] = []
399
+ pure_polars_expr_strings_for_sort: list[str] = []
400
+ collected_raw_definitions: list[str] = []
401
+ column_names_for_native_node: list[str] = []
384
402
 
385
403
  use_polars_code_path = False
386
404
 
@@ -429,10 +447,12 @@ class FlowFrame:
429
447
  if not is_simple_col_for_native: # If it wasn't a simple string or unaltered Column
430
448
  use_polars_code_path = True
431
449
 
432
- desc_values = list(descending) if isinstance(descending, list) else [descending] * len(
433
- all_processed_expr_objects)
434
- null_last_values = list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(
435
- all_processed_expr_objects)
450
+ desc_values = (
451
+ list(descending) if isinstance(descending, list) else [descending] * len(all_processed_expr_objects)
452
+ )
453
+ null_last_values = (
454
+ list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(all_processed_expr_objects)
455
+ )
436
456
 
437
457
  if len(desc_values) != len(all_processed_expr_objects):
438
458
  raise ValueError("Length of 'descending' does not match the number of sort expressions.")
@@ -448,23 +468,31 @@ class FlowFrame:
448
468
  if collected_raw_definitions:
449
469
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions)) # Order-preserving unique
450
470
  definitions_section = "\n\n".join(unique_raw_definitions)
451
- final_code_for_node = definitions_section + \
452
- "\#─────SPLIT─────\n\n" + \
453
- f"output_df = {polars_operation_code}"
471
+ final_code_for_node = (
472
+ definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
473
+ )
454
474
  else:
455
475
  final_code_for_node = polars_operation_code
456
476
 
457
- pl_expressions_for_fallback = [e.expr for e in all_processed_expr_objects if
458
- hasattr(e, 'expr') and e.expr is not None]
477
+ pl_expressions_for_fallback = [
478
+ e.expr for e in all_processed_expr_objects if hasattr(e, "expr") and e.expr is not None
479
+ ]
459
480
  kwargs_for_fallback = {
460
481
  "descending": desc_values[0] if len(desc_values) == 1 else desc_values,
461
482
  "nulls_last": null_last_values[0] if len(null_last_values) == 1 else null_last_values,
462
- "multithreaded": multithreaded, "maintain_order": maintain_order}
483
+ "multithreaded": multithreaded,
484
+ "maintain_order": maintain_order,
485
+ }
463
486
 
464
- self._add_polars_code(new_node_id, final_code_for_node, description, method_name="sort",
465
- convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
466
- polars_expr=pl_expressions_for_fallback,
467
- kwargs_expr=kwargs_for_fallback)
487
+ self._add_polars_code(
488
+ new_node_id,
489
+ final_code_for_node,
490
+ description,
491
+ method_name="sort",
492
+ convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
493
+ polars_expr=pl_expressions_for_fallback,
494
+ kwargs_expr=kwargs_for_fallback,
495
+ )
468
496
  else:
469
497
  sort_inputs_for_node = []
470
498
  for i, col_name_for_native in enumerate(column_names_for_native_node):
@@ -473,30 +501,44 @@ class FlowFrame:
473
501
  # type: ignore
474
502
  )
475
503
  sort_settings = input_schema.NodeSort(
476
- flow_id=self.flow_graph.flow_id, node_id=new_node_id, sort_input=sort_inputs_for_node, # type: ignore
477
- pos_x=200, pos_y=150, is_setup=True, depending_on_id=self.node_id,
478
- description=description or f"Sort by {', '.join(column_names_for_native_node)}")
504
+ flow_id=self.flow_graph.flow_id,
505
+ node_id=new_node_id,
506
+ sort_input=sort_inputs_for_node, # type: ignore
507
+ pos_x=200,
508
+ pos_y=150,
509
+ is_setup=True,
510
+ depending_on_id=self.node_id,
511
+ description=description or f"Sort by {', '.join(column_names_for_native_node)}",
512
+ )
479
513
  self.flow_graph.add_sort(sort_settings)
480
514
 
481
515
  return self._create_child_frame(new_node_id)
482
516
 
483
- def _add_polars_code(self, new_node_id: int, code: str, description: str = None,
484
- depending_on_ids: List[str] | None = None, convertable_to_code: bool = True,
485
- method_name: str = None, polars_expr: Expr | List[Expr] | None = None,
486
- group_expr: Expr | List[Expr] | None = None,
487
- kwargs_expr: Dict | None = None,
488
- group_kwargs: Dict | None = None, ):
517
+ def _add_polars_code(
518
+ self,
519
+ new_node_id: int,
520
+ code: str,
521
+ description: str = None,
522
+ depending_on_ids: list[str] | None = None,
523
+ convertable_to_code: bool = True,
524
+ method_name: str = None,
525
+ polars_expr: Expr | list[Expr] | None = None,
526
+ group_expr: Expr | list[Expr] | None = None,
527
+ kwargs_expr: dict | None = None,
528
+ group_kwargs: dict | None = None,
529
+ ):
489
530
  polars_code_for_node: str
490
531
  if not convertable_to_code or _contains_lambda_pattern(code):
491
-
492
- effective_method_name = get_method_name_from_code(
493
- code) if method_name is None and "input_df." in code else method_name
532
+ effective_method_name = (
533
+ get_method_name_from_code(code) if method_name is None and "input_df." in code else method_name
534
+ )
494
535
 
495
536
  pl_expr_list = ensure_inputs_as_iterable(polars_expr) if polars_expr is not None else []
496
537
  group_expr_list = ensure_inputs_as_iterable(group_expr) if group_expr is not None else []
497
538
 
498
- _check_ok_for_serialization(polars_expr=pl_expr_list, method_name=effective_method_name,
499
- group_expr=group_expr_list)
539
+ _check_ok_for_serialization(
540
+ polars_expr=pl_expr_list, method_name=effective_method_name, group_expr=group_expr_list
541
+ )
500
542
 
501
543
  current_kwargs_expr = kwargs_expr if kwargs_expr is not None else {}
502
544
  result_lazyframe_or_expr: Any
@@ -508,22 +550,27 @@ class FlowFrame:
508
550
  target_obj = getattr(self.data, effective_method_name)(*group_expr_list, **group_kwargs)
509
551
  if not pl_expr_list:
510
552
  raise ValueError(
511
- "Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback.")
553
+ "Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback."
554
+ )
512
555
  result_lazyframe_or_expr = target_obj.agg(*pl_expr_list, **current_kwargs_expr)
513
556
  elif effective_method_name:
514
- result_lazyframe_or_expr = getattr(self.data, effective_method_name)(*pl_expr_list,
515
- **current_kwargs_expr)
557
+ result_lazyframe_or_expr = getattr(self.data, effective_method_name)(
558
+ *pl_expr_list, **current_kwargs_expr
559
+ )
516
560
  else:
517
561
  raise ValueError(
518
- "Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback.")
562
+ "Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback."
563
+ )
519
564
  try:
520
565
  if isinstance(result_lazyframe_or_expr, pl.LazyFrame):
521
- serialized_value_for_code = result_lazyframe_or_expr.serialize(format='json')
522
- polars_code_for_node = "\n".join([
523
- f"serialized_value = r'''{serialized_value_for_code}'''",
524
- "buffer = BytesIO(serialized_value.encode('utf-8'))",
525
- "output_df = pl.LazyFrame.deserialize(buffer, format='json')",
526
- ])
566
+ serialized_value_for_code = result_lazyframe_or_expr.serialize(format="json")
567
+ polars_code_for_node = "\n".join(
568
+ [
569
+ f"serialized_value = r'''{serialized_value_for_code}'''",
570
+ "buffer = BytesIO(serialized_value.encode('utf-8'))",
571
+ "output_df = pl.LazyFrame.deserialize(buffer, format='json')",
572
+ ]
573
+ )
527
574
  logger.warning(
528
575
  f"Transformation '{effective_method_name}' uses non-serializable elements. "
529
576
  "Falling back to serializing the resulting Polars LazyFrame object."
@@ -556,18 +603,18 @@ class FlowFrame:
556
603
  self.flow_graph.add_polars_code(polars_code_settings)
557
604
 
558
605
  def join(
559
- self,
560
- other,
561
- on: List[str | Column] | str | Column = None,
562
- how: str = "inner",
563
- left_on: List[str | Column] | str | Column = None,
564
- right_on: List[str | Column] | str | Column = None,
565
- suffix: str = "_right",
566
- validate: str = None,
567
- nulls_equal: bool = False,
568
- coalesce: bool = None,
569
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
570
- description: str = None,
606
+ self,
607
+ other,
608
+ on: list[str | Column] | str | Column = None,
609
+ how: str = "inner",
610
+ left_on: list[str | Column] | str | Column = None,
611
+ right_on: list[str | Column] | str | Column = None,
612
+ suffix: str = "_right",
613
+ validate: str = None,
614
+ nulls_equal: bool = False,
615
+ coalesce: bool = None,
616
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
617
+ description: str = None,
571
618
  ) -> "FlowFrame":
572
619
  """
573
620
  Add a join operation to the Logical Plan.
@@ -613,9 +660,7 @@ class FlowFrame:
613
660
  New FlowFrame with join operation applied.
614
661
  """
615
662
  # Step 1: Determine if we need to use Polars code
616
- use_polars_code = self._should_use_polars_code_for_join(
617
- maintain_order, coalesce, nulls_equal, validate, suffix
618
- )
663
+ use_polars_code = self._should_use_polars_code_for_join(maintain_order, coalesce, nulls_equal, validate, suffix)
619
664
  # Step 2: Ensure both FlowFrames are in the same graph
620
665
  self._ensure_same_graph(other)
621
666
 
@@ -623,11 +668,9 @@ class FlowFrame:
623
668
  new_node_id = generate_node_id()
624
669
 
625
670
  # Step 4: Parse and validate join columns
626
- left_columns, right_columns = self._parse_join_columns(
627
- on, left_on, right_on, how
628
- )
671
+ left_columns, right_columns = self._parse_join_columns(on, left_on, right_on, how)
629
672
  # Step 5: Validate column lists have same length (except for cross join)
630
- if how != 'cross' and left_columns is not None and right_columns is not None:
673
+ if how != "cross" and left_columns is not None and right_columns is not None:
631
674
  if len(left_columns) != len(right_columns):
632
675
  raise ValueError(
633
676
  f"Length mismatch: left columns ({len(left_columns)}) != right columns ({len(right_columns)})"
@@ -635,42 +678,46 @@ class FlowFrame:
635
678
 
636
679
  # Step 6: Create join mappings if not using Polars code
637
680
  join_mappings = None
638
- if not use_polars_code and how != 'cross':
639
- join_mappings, use_polars_code = _create_join_mappings(
640
- left_columns or [], right_columns or []
641
- )
681
+ if not use_polars_code and how != "cross":
682
+ join_mappings, use_polars_code = _create_join_mappings(left_columns or [], right_columns or [])
642
683
 
643
684
  # Step 7: Execute join based on approach
644
- if use_polars_code or suffix != '_right':
685
+ if use_polars_code or suffix != "_right":
645
686
  return self._execute_polars_code_join(
646
- other, new_node_id, on, left_on, right_on, left_columns, right_columns,
647
- how, suffix, validate, nulls_equal, coalesce, maintain_order, description
648
- )
649
- elif join_mappings or how == 'cross':
650
- return self._execute_native_join(
651
- other, new_node_id, join_mappings, how, description
687
+ other,
688
+ new_node_id,
689
+ on,
690
+ left_on,
691
+ right_on,
692
+ left_columns,
693
+ right_columns,
694
+ how,
695
+ suffix,
696
+ validate,
697
+ nulls_equal,
698
+ coalesce,
699
+ maintain_order,
700
+ description,
652
701
  )
702
+ elif join_mappings or how == "cross":
703
+ return self._execute_native_join(other, new_node_id, join_mappings, how, description)
653
704
  else:
654
705
  raise ValueError("Could not execute join")
655
706
 
656
- def _should_use_polars_code_for_join(
657
- self, maintain_order, coalesce, nulls_equal, validate, suffix
658
- ) -> bool:
707
+ def _should_use_polars_code_for_join(self, maintain_order, coalesce, nulls_equal, validate, suffix) -> bool:
659
708
  """Determine if we should use Polars code instead of native join."""
660
709
  return not (
661
- maintain_order is None and
662
- coalesce is None and
663
- nulls_equal is False and
664
- validate is None and
665
- suffix == '_right'
710
+ maintain_order is None
711
+ and coalesce is None
712
+ and nulls_equal is False
713
+ and validate is None
714
+ and suffix == "_right"
666
715
  )
667
716
 
668
717
  def _ensure_same_graph(self, other: "FlowFrame") -> None:
669
718
  """Ensure both FlowFrames are in the same graph, combining if necessary."""
670
719
  if self.flow_graph.flow_id != other.flow_graph.flow_id:
671
- combined_graph, node_mappings = combine_flow_graphs_with_mapping(
672
- self.flow_graph, other.flow_graph
673
- )
720
+ combined_graph, node_mappings = combine_flow_graphs_with_mapping(self.flow_graph, other.flow_graph)
674
721
 
675
722
  new_self_node_id = node_mappings.get((self.flow_graph.flow_id, self.node_id), None)
676
723
  new_other_node_id = node_mappings.get((other.flow_graph.flow_id, other.node_id), None)
@@ -685,19 +732,19 @@ class FlowFrame:
685
732
  node_id_data["c"] = node_id_data["c"] + len(combined_graph.nodes)
686
733
 
687
734
  def _parse_join_columns(
688
- self,
689
- on: List[str | Column] | str | Column,
690
- left_on: List[str | Column] | str | Column,
691
- right_on: List[str | Column] | str | Column,
692
- how: str
693
- ) -> tuple[List[str] | None, List[str] | None]:
735
+ self,
736
+ on: list[str | Column] | str | Column,
737
+ left_on: list[str | Column] | str | Column,
738
+ right_on: list[str | Column] | str | Column,
739
+ how: str,
740
+ ) -> tuple[list[str] | None, list[str] | None]:
694
741
  """Parse and validate join column specifications."""
695
742
  if on is not None:
696
743
  left_columns = right_columns = _normalize_columns_to_list(on)
697
744
  elif left_on is not None and right_on is not None:
698
745
  left_columns = _normalize_columns_to_list(left_on)
699
746
  right_columns = _normalize_columns_to_list(right_on)
700
- elif how == 'cross' and left_on is None and right_on is None and on is None:
747
+ elif how == "cross" and left_on is None and right_on is None and on is None:
701
748
  left_columns = None
702
749
  right_columns = None
703
750
  else:
@@ -706,37 +753,43 @@ class FlowFrame:
706
753
  return left_columns, right_columns
707
754
 
708
755
  def _execute_polars_code_join(
709
- self,
710
- other: "FlowFrame",
711
- new_node_id: int,
712
- on: List[str | Column] | str | Column,
713
- left_on: List[str | Column] | str | Column,
714
- right_on: List[str | Column] | str | Column,
715
- left_columns: List[str] | None,
716
- right_columns: List[str] | None,
717
- how: str,
718
- suffix: str,
719
- validate: str,
720
- nulls_equal: bool,
721
- coalesce: bool,
722
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
723
- description: str,
756
+ self,
757
+ other: "FlowFrame",
758
+ new_node_id: int,
759
+ on: list[str | Column] | str | Column,
760
+ left_on: list[str | Column] | str | Column,
761
+ right_on: list[str | Column] | str | Column,
762
+ left_columns: list[str] | None,
763
+ right_columns: list[str] | None,
764
+ how: str,
765
+ suffix: str,
766
+ validate: str,
767
+ nulls_equal: bool,
768
+ coalesce: bool,
769
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
770
+ description: str,
724
771
  ) -> "FlowFrame":
725
772
  """Execute join using Polars code approach."""
726
773
  # Build the code arguments
727
774
  code_kwargs = self._build_polars_join_kwargs(
728
- on, left_on, right_on, left_columns, right_columns,
729
- how, suffix, validate, nulls_equal, coalesce, maintain_order
775
+ on,
776
+ left_on,
777
+ right_on,
778
+ left_columns,
779
+ right_columns,
780
+ how,
781
+ suffix,
782
+ validate,
783
+ nulls_equal,
784
+ coalesce,
785
+ maintain_order,
730
786
  )
731
787
 
732
788
  kwargs_str = ", ".join(f"{k}={v}" for k, v in code_kwargs.items() if v is not None)
733
789
  code = f"input_df_1.join({kwargs_str})"
734
790
 
735
791
  # Add the Polars code node
736
- self._add_polars_code(
737
- new_node_id, code, description,
738
- depending_on_ids=[self.node_id, other.node_id]
739
- )
792
+ self._add_polars_code(new_node_id, code, description, depending_on_ids=[self.node_id, other.node_id])
740
793
 
741
794
  # Add connections
742
795
  self._add_connection(self.node_id, new_node_id, "main")
@@ -751,28 +804,29 @@ class FlowFrame:
751
804
  )
752
805
 
753
806
  def _build_polars_join_kwargs(
754
- self,
755
- on: List[str | Column] | str | Column,
756
- left_on: List[str | Column] | str | Column,
757
- right_on: List[str | Column] | str | Column,
758
- left_columns: List[str] | None,
759
- right_columns: List[str] | None,
760
- how: str,
761
- suffix: str,
762
- validate: str,
763
- nulls_equal: bool,
764
- coalesce: bool,
765
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
807
+ self,
808
+ on: list[str | Column] | str | Column,
809
+ left_on: list[str | Column] | str | Column,
810
+ right_on: list[str | Column] | str | Column,
811
+ left_columns: list[str] | None,
812
+ right_columns: list[str] | None,
813
+ how: str,
814
+ suffix: str,
815
+ validate: str,
816
+ nulls_equal: bool,
817
+ coalesce: bool,
818
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
766
819
  ) -> dict:
767
820
  """Build kwargs dictionary for Polars join code."""
768
821
 
769
822
  def format_column_list(cols):
770
823
  if cols is None:
771
824
  return None
772
- return "[" + ', '.join(
773
- f"'{v}'" if isinstance(v, str) else str(v)
774
- for v in _normalize_columns_to_list(cols)
775
- ) + "]"
825
+ return (
826
+ "["
827
+ + ", ".join(f"'{v}'" if isinstance(v, str) else str(v) for v in _normalize_columns_to_list(cols))
828
+ + "]"
829
+ )
776
830
 
777
831
  return {
778
832
  "other": "input_df_2",
@@ -784,16 +838,16 @@ class FlowFrame:
784
838
  "validate": _to_string_val(validate),
785
839
  "nulls_equal": nulls_equal,
786
840
  "coalesce": coalesce,
787
- "maintain_order": _to_string_val(maintain_order)
841
+ "maintain_order": _to_string_val(maintain_order),
788
842
  }
789
843
 
790
844
  def _execute_native_join(
791
- self,
792
- other: "FlowFrame",
793
- new_node_id: int,
794
- join_mappings: List | None,
795
- how: str,
796
- description: str,
845
+ self,
846
+ other: "FlowFrame",
847
+ new_node_id: int,
848
+ join_mappings: list | None,
849
+ how: str,
850
+ description: str,
797
851
  ) -> "FlowFrame":
798
852
  """Execute join using native FlowFile join nodes."""
799
853
  # Create select inputs for both frames
@@ -801,7 +855,7 @@ class FlowFrame:
801
855
  left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
802
856
  right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
803
857
  # Create appropriate join input based on join type
804
- if how == 'cross':
858
+ if how == "cross":
805
859
  join_input = transform_schema.CrossJoinInput(
806
860
  left_select=transform_schema.JoinInputs(renames=left_select.renames),
807
861
  right_select=right_select.renames,
@@ -823,7 +877,7 @@ class FlowFrame:
823
877
  right_column.keep = False
824
878
 
825
879
  # Create and add appropriate node
826
- if how == 'cross':
880
+ if how == "cross":
827
881
  self._add_cross_join_node(new_node_id, join_input_manager.to_cross_join_input(), description, other)
828
882
  else:
829
883
  self._add_regular_join_node(new_node_id, join_input_manager.to_join_input(), description, other)
@@ -840,11 +894,11 @@ class FlowFrame:
840
894
  )
841
895
 
842
896
  def _add_cross_join_node(
843
- self,
844
- new_node_id: int,
845
- join_input: "transform_schema.CrossJoinInput",
846
- description: str,
847
- other: "FlowFrame",
897
+ self,
898
+ new_node_id: int,
899
+ join_input: "transform_schema.CrossJoinInput",
900
+ description: str,
901
+ other: "FlowFrame",
848
902
  ) -> None:
849
903
  """Add a cross join node to the graph."""
850
904
  cross_join_settings = input_schema.NodeCrossJoin(
@@ -853,18 +907,18 @@ class FlowFrame:
853
907
  cross_join_input=join_input,
854
908
  is_setup=True,
855
909
  depending_on_ids=[self.node_id, other.node_id],
856
- description=description or f"Join with cross strategy",
910
+ description=description or "Join with cross strategy",
857
911
  auto_generate_selection=True,
858
912
  verify_integrity=True,
859
913
  )
860
914
  self.flow_graph.add_cross_join(cross_join_settings)
861
915
 
862
916
  def _add_regular_join_node(
863
- self,
864
- new_node_id: int,
865
- join_input: "transform_schema.JoinInput",
866
- description: str,
867
- other: "FlowFrame",
917
+ self,
918
+ new_node_id: int,
919
+ join_input: "transform_schema.JoinInput",
920
+ description: str,
921
+ other: "FlowFrame",
868
922
  ) -> None:
869
923
  """Add a regular join node to the graph."""
870
924
  join_settings = input_schema.NodeJoin(
@@ -889,34 +943,41 @@ class FlowFrame:
889
943
  pos_y=100,
890
944
  is_setup=True,
891
945
  depending_on_id=self.node_id,
892
- description=description
946
+ description=description,
893
947
  )
894
948
  self.flow_graph.add_record_count(node_number_of_records)
895
949
  return self._create_child_frame(new_node_id)
896
950
 
897
- def rename(self, mapping: Mapping[str, str], *, strict: bool = True,
898
- description: str = None) -> "FlowFrame":
951
+ def rename(self, mapping: Mapping[str, str], *, strict: bool = True, description: str = None) -> "FlowFrame":
899
952
  """Rename columns based on a mapping or function."""
900
- return self.select([col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
901
- description=description, _keep_missing=True)
953
+ return self.select(
954
+ [col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
955
+ description=description,
956
+ _keep_missing=True,
957
+ )
902
958
 
903
- def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None, _keep_missing: bool = False) -> "FlowFrame":
959
+ def select(
960
+ self, *columns: str | Expr | Selector, description: str | None = None, _keep_missing: bool = False
961
+ ) -> "FlowFrame":
904
962
  """
905
963
  Select columns from the frame.
906
964
  """
907
965
  columns_iterable = list(_parse_inputs_as_iterable(columns))
908
966
  new_node_id = generate_node_id()
909
- if (len(columns_iterable) == 1 and isinstance(columns_iterable[0], Expr)
910
- and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"):
967
+ if (
968
+ len(columns_iterable) == 1
969
+ and isinstance(columns_iterable[0], Expr)
970
+ and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"
971
+ ):
911
972
  return self._add_number_of_records(new_node_id, description)
912
973
 
913
- all_input_expr_objects: List[Expr] = []
914
- pure_polars_expr_strings_for_select: List[str] = []
915
- collected_raw_definitions: List[str] = []
916
- selected_col_names_for_native: List[transform_schema.SelectInput] = [] # For native node
974
+ all_input_expr_objects: list[Expr] = []
975
+ pure_polars_expr_strings_for_select: list[str] = []
976
+ collected_raw_definitions: list[str] = []
977
+ selected_col_names_for_native: list[transform_schema.SelectInput] = [] # For native node
917
978
 
918
979
  can_use_native_node = True
919
- if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == '*':
980
+ if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == "*":
920
981
  effective_columns_iterable = [col(c_name) for c_name in self.columns]
921
982
  else:
922
983
  effective_columns_iterable = columns_iterable
@@ -950,13 +1011,17 @@ class FlowFrame:
950
1011
  if can_use_native_node:
951
1012
  existing_cols = self.columns
952
1013
  selected_col_names = {select_col.old_name for select_col in selected_col_names_for_native}
953
- not_selected_columns = [transform_schema.SelectInput(c, keep=_keep_missing) for c in existing_cols if
954
- c not in selected_col_names]
1014
+ not_selected_columns = [
1015
+ transform_schema.SelectInput(c, keep=_keep_missing)
1016
+ for c in existing_cols
1017
+ if c not in selected_col_names
1018
+ ]
955
1019
  selected_col_names_for_native.extend(not_selected_columns)
956
1020
  if _keep_missing:
957
1021
  lookup_selection = {_col.old_name: _col for _col in selected_col_names_for_native}
958
- selected_col_names_for_native = [lookup_selection.get(_col) for
959
- _col in existing_cols if _col in lookup_selection]
1022
+ selected_col_names_for_native = [
1023
+ lookup_selection.get(_col) for _col in existing_cols if _col in lookup_selection
1024
+ ]
960
1025
  select_settings = input_schema.NodeSelect(
961
1026
  flow_id=self.flow_graph.flow_id,
962
1027
  node_id=new_node_id,
@@ -966,7 +1031,7 @@ class FlowFrame:
966
1031
  pos_y=100,
967
1032
  is_setup=True,
968
1033
  depending_on_id=self.node_id,
969
- description=description
1034
+ description=description,
970
1035
  )
971
1036
  self.flow_graph.add_select(select_settings)
972
1037
  else:
@@ -975,23 +1040,35 @@ class FlowFrame:
975
1040
  if collected_raw_definitions:
976
1041
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
977
1042
  definitions_section = "\n\n".join(unique_raw_definitions)
978
- final_code_for_node = definitions_section + \
979
- "\#─────SPLIT─────\n\n" + \
980
- f"output_df = {polars_operation_code}"
1043
+ final_code_for_node = (
1044
+ definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
1045
+ )
981
1046
  else:
982
1047
  final_code_for_node = polars_operation_code
983
1048
 
984
- pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
985
- isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
986
- self._add_polars_code(new_node_id, final_code_for_node, description,
987
- method_name="select",
988
- convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
989
- polars_expr=pl_expressions_for_fallback)
1049
+ pl_expressions_for_fallback = [
1050
+ e.expr
1051
+ for e in all_input_expr_objects
1052
+ if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
1053
+ ]
1054
+ self._add_polars_code(
1055
+ new_node_id,
1056
+ final_code_for_node,
1057
+ description,
1058
+ method_name="select",
1059
+ convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
1060
+ polars_expr=pl_expressions_for_fallback,
1061
+ )
990
1062
 
991
1063
  return self._create_child_frame(new_node_id)
992
1064
 
993
- def filter(self, *predicates: Union[Expr, Any], flowfile_formula: Optional[str] = None,
994
- description: Optional[str] = None, **constraints: Any) -> "FlowFrame":
1065
+ def filter(
1066
+ self,
1067
+ *predicates: Expr | Any,
1068
+ flowfile_formula: str | None = None,
1069
+ description: str | None = None,
1070
+ **constraints: Any,
1071
+ ) -> "FlowFrame":
995
1072
  """
996
1073
  Filter rows based on a predicate.
997
1074
  """
@@ -1000,9 +1077,9 @@ class FlowFrame:
1000
1077
  available_columns = self.columns
1001
1078
  new_node_id = generate_node_id()
1002
1079
  if len(predicates) > 0 or len(constraints) > 0:
1003
- all_input_expr_objects: List[Expr] = []
1004
- pure_polars_expr_strings: List[str] = []
1005
- collected_raw_definitions: List[str] = []
1080
+ all_input_expr_objects: list[Expr] = []
1081
+ pure_polars_expr_strings: list[str] = []
1082
+ collected_raw_definitions: list[str] = []
1006
1083
 
1007
1084
  processed_predicates = []
1008
1085
  for pred_item in predicates:
@@ -1031,10 +1108,11 @@ class FlowFrame:
1031
1108
  collected_raw_definitions.append(raw_defs_str)
1032
1109
 
1033
1110
  for k, v_val in constraints.items():
1034
- constraint_expr_obj = (col(k) == lit(v_val))
1111
+ constraint_expr_obj = col(k) == lit(v_val)
1035
1112
  all_input_expr_objects.append(constraint_expr_obj)
1036
1113
  pure_expr_str, raw_defs_str = _extract_expr_parts(
1037
- constraint_expr_obj) # Constraint exprs are unlikely to have defs
1114
+ constraint_expr_obj
1115
+ ) # Constraint exprs are unlikely to have defs
1038
1116
  pure_polars_expr_strings.append(f"({pure_expr_str})")
1039
1117
  if raw_defs_str and raw_defs_str not in collected_raw_definitions: # Should be rare here
1040
1118
  collected_raw_definitions.append(raw_defs_str)
@@ -1046,31 +1124,36 @@ class FlowFrame:
1046
1124
  if collected_raw_definitions:
1047
1125
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions)) # Order-preserving unique
1048
1126
  definitions_section = "\n\n".join(unique_raw_definitions)
1049
- final_code_for_node = definitions_section + \
1050
- "\#─────SPLIT─────\n\n" + \
1051
- f"output_df = {polars_operation_code}"
1127
+ final_code_for_node = (
1128
+ definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
1129
+ )
1052
1130
  else:
1053
1131
  final_code_for_node = polars_operation_code
1054
1132
 
1055
1133
  convertable_to_code = _check_if_convertible_to_code(all_input_expr_objects)
1056
- pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
1057
- isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
1058
- self._add_polars_code(new_node_id, final_code_for_node, description, method_name="filter",
1059
- convertable_to_code=convertable_to_code,
1060
- polars_expr=pl_expressions_for_fallback)
1134
+ pl_expressions_for_fallback = [
1135
+ e.expr
1136
+ for e in all_input_expr_objects
1137
+ if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
1138
+ ]
1139
+ self._add_polars_code(
1140
+ new_node_id,
1141
+ final_code_for_node,
1142
+ description,
1143
+ method_name="filter",
1144
+ convertable_to_code=convertable_to_code,
1145
+ polars_expr=pl_expressions_for_fallback,
1146
+ )
1061
1147
  elif flowfile_formula:
1062
1148
  filter_settings = input_schema.NodeFilter(
1063
1149
  flow_id=self.flow_graph.flow_id,
1064
1150
  node_id=new_node_id,
1065
- filter_input=transform_schema.FilterInput(
1066
- advanced_filter=flowfile_formula,
1067
- filter_type="advanced"
1068
- ),
1151
+ filter_input=transform_schema.FilterInput(advanced_filter=flowfile_formula, filter_type="advanced"),
1069
1152
  pos_x=200,
1070
1153
  pos_y=150,
1071
1154
  is_setup=True,
1072
1155
  depending_on_id=self.node_id,
1073
- description=description
1156
+ description=description,
1074
1157
  )
1075
1158
  self.flow_graph.add_filter(filter_settings)
1076
1159
  else:
@@ -1079,12 +1162,7 @@ class FlowFrame:
1079
1162
 
1080
1163
  return self._create_child_frame(new_node_id)
1081
1164
 
1082
- def sink_csv(self,
1083
- file: str,
1084
- *args,
1085
- separator: str = ",",
1086
- encoding: str = "utf-8",
1087
- description: str = None):
1165
+ def sink_csv(self, file: str, *args, separator: str = ",", encoding: str = "utf-8", description: str = None):
1088
1166
  """
1089
1167
  Write the data to a CSV file.
1090
1168
 
@@ -1100,12 +1178,12 @@ class FlowFrame:
1100
1178
  return self.write_csv(file, *args, separator=separator, encoding=encoding, description=description)
1101
1179
 
1102
1180
  def write_parquet(
1103
- self,
1104
- path: str | os.PathLike,
1105
- *,
1106
- description: str = None,
1107
- convert_to_absolute_path: bool = True,
1108
- **kwargs: Any,
1181
+ self,
1182
+ path: str | os.PathLike,
1183
+ *,
1184
+ description: str = None,
1185
+ convert_to_absolute_path: bool = True,
1186
+ **kwargs: Any,
1109
1187
  ) -> "FlowFrame":
1110
1188
  """
1111
1189
  Write the data to a Parquet file. Creates a standard Output node if only
@@ -1143,10 +1221,10 @@ class FlowFrame:
1143
1221
  use_polars_code = bool(kwargs.items()) or not is_path_input
1144
1222
 
1145
1223
  output_settings = input_schema.OutputSettings(
1146
- file_type='parquet',
1224
+ file_type="parquet",
1147
1225
  name=file_name,
1148
1226
  directory=file_str if is_path_input else str(file_str),
1149
- table_settings=input_schema.OutputParquetTable()
1227
+ table_settings=input_schema.OutputParquetTable(),
1150
1228
  )
1151
1229
 
1152
1230
  if is_path_input:
@@ -1163,7 +1241,7 @@ class FlowFrame:
1163
1241
  node_id=new_node_id,
1164
1242
  output_settings=output_settings,
1165
1243
  depending_on_id=self.node_id,
1166
- description=description
1244
+ description=description,
1167
1245
  )
1168
1246
  self.flow_graph.add_output(node_output)
1169
1247
  else:
@@ -1189,16 +1267,15 @@ class FlowFrame:
1189
1267
  return self._create_child_frame(new_node_id)
1190
1268
 
1191
1269
  def write_csv(
1192
- self,
1193
- file: str | os.PathLike,
1194
- *,
1195
- separator: str = ",",
1196
- encoding: str = "utf-8",
1197
- description: str = None,
1198
- convert_to_absolute_path: bool = True,
1199
- **kwargs: Any,
1270
+ self,
1271
+ file: str | os.PathLike,
1272
+ *,
1273
+ separator: str = ",",
1274
+ encoding: str = "utf-8",
1275
+ description: str = None,
1276
+ convert_to_absolute_path: bool = True,
1277
+ **kwargs: Any,
1200
1278
  ) -> "FlowFrame":
1201
-
1202
1279
  new_node_id = generate_node_id()
1203
1280
  is_path_input = isinstance(file, (str, os.PathLike))
1204
1281
  if isinstance(file, os.PathLike):
@@ -1214,13 +1291,10 @@ class FlowFrame:
1214
1291
 
1215
1292
  use_polars_code = bool(kwargs) or not is_path_input
1216
1293
  output_settings = input_schema.OutputSettings(
1217
- file_type='csv',
1294
+ file_type="csv",
1218
1295
  name=file_name,
1219
1296
  directory=file_str if is_path_input else str(file_str),
1220
- table_settings=input_schema.OutputCsvTable(
1221
- delimiter=separator,
1222
- encoding=encoding
1223
- )
1297
+ table_settings=input_schema.OutputCsvTable(delimiter=separator, encoding=encoding),
1224
1298
  )
1225
1299
  if is_path_input:
1226
1300
  try:
@@ -1236,7 +1310,7 @@ class FlowFrame:
1236
1310
  node_id=new_node_id,
1237
1311
  output_settings=output_settings,
1238
1312
  depending_on_id=self.node_id,
1239
- description=description
1313
+ description=description,
1240
1314
  )
1241
1315
  self.flow_graph.add_output(node_output)
1242
1316
  else:
@@ -1250,9 +1324,9 @@ class FlowFrame:
1250
1324
  path_arg_repr = repr(output_settings.directory)
1251
1325
 
1252
1326
  all_kwargs_for_code = {
1253
- 'separator': separator,
1254
- 'encoding': encoding,
1255
- **kwargs # Add the extra kwargs
1327
+ "separator": separator,
1328
+ "encoding": encoding,
1329
+ **kwargs, # Add the extra kwargs
1256
1330
  }
1257
1331
  kwargs_repr = ", ".join(f"{k}={repr(v)}" for k, v in all_kwargs_for_code.items())
1258
1332
 
@@ -1266,42 +1340,47 @@ class FlowFrame:
1266
1340
 
1267
1341
  return self._create_child_frame(new_node_id)
1268
1342
 
1269
- def write_parquet_to_cloud_storage(self,
1270
- path: str,
1271
- connection_name: Optional[str] = None,
1272
- compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy",
1273
- description: Optional[str] = None,
1274
- ) -> "FlowFrame":
1343
+ def write_parquet_to_cloud_storage(
1344
+ self,
1345
+ path: str,
1346
+ connection_name: str | None = None,
1347
+ compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy",
1348
+ description: str | None = None,
1349
+ ) -> "FlowFrame":
1275
1350
  """
1276
- Write the data frame to cloud storage in Parquet format.
1277
-
1278
- Args:
1279
- path (str): The destination path in cloud storage where the Parquet file will be written.
1280
- connection_name (Optional[str], optional): The name of the storage connection
1281
- that a user can create. If None, uses the default connection. Defaults to None.
1282
- compression (Literal["snappy", "gzip", "brotli", "lz4", "zstd"], optional):
1283
- The compression algorithm to use for the Parquet file. Defaults to "snappy".
1284
- description (Optional[str], optional): Description of this operation for the ETL graph.
1285
-
1286
- Returns:
1287
- FlowFrame: A new child data frame representing the written data.
1351
+ Write the data frame to cloud storage in Parquet format.
1352
+
1353
+ Args:
1354
+ path (str): The destination path in cloud storage where the Parquet file will be written.
1355
+ connection_name (Optional[str], optional): The name of the storage connection
1356
+ that a user can create. If None, uses the default connection. Defaults to None.
1357
+ compression (Literal["snappy", "gzip", "brotli", "lz4", "zstd"], optional):
1358
+ The compression algorithm to use for the Parquet file. Defaults to "snappy".
1359
+ description (Optional[str], optional): Description of this operation for the ETL graph.
1360
+
1361
+ Returns:
1362
+ FlowFrame: A new child data frame representing the written data.
1288
1363
  """
1289
1364
 
1290
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1291
- connection_name=connection_name,
1292
- depends_on_node_id=self.node_id,
1293
- parquet_compression=compression,
1294
- file_format="parquet",
1295
- description=description)
1365
+ new_node_id = add_write_ff_to_cloud_storage(
1366
+ path,
1367
+ flow_graph=self.flow_graph,
1368
+ connection_name=connection_name,
1369
+ depends_on_node_id=self.node_id,
1370
+ parquet_compression=compression,
1371
+ file_format="parquet",
1372
+ description=description,
1373
+ )
1296
1374
  return self._create_child_frame(new_node_id)
1297
1375
 
1298
- def write_csv_to_cloud_storage(self,
1299
- path: str,
1300
- connection_name: Optional[str] = None,
1301
- delimiter: str = ";",
1302
- encoding: CsvEncoding = "utf8",
1303
- description: Optional[str] = None,
1304
- ) -> "FlowFrame":
1376
+ def write_csv_to_cloud_storage(
1377
+ self,
1378
+ path: str,
1379
+ connection_name: str | None = None,
1380
+ delimiter: str = ";",
1381
+ encoding: CsvEncoding = "utf8",
1382
+ description: str | None = None,
1383
+ ) -> "FlowFrame":
1305
1384
  """
1306
1385
  Write the data frame to cloud storage in CSV format.
1307
1386
 
@@ -1318,21 +1397,25 @@ class FlowFrame:
1318
1397
  Returns:
1319
1398
  FlowFrame: A new child data frame representing the written data.
1320
1399
  """
1321
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1322
- connection_name=connection_name,
1323
- depends_on_node_id=self.node_id,
1324
- csv_delimiter=delimiter,
1325
- csv_encoding=encoding,
1326
- file_format="csv",
1327
- description=description)
1400
+ new_node_id = add_write_ff_to_cloud_storage(
1401
+ path,
1402
+ flow_graph=self.flow_graph,
1403
+ connection_name=connection_name,
1404
+ depends_on_node_id=self.node_id,
1405
+ csv_delimiter=delimiter,
1406
+ csv_encoding=encoding,
1407
+ file_format="csv",
1408
+ description=description,
1409
+ )
1328
1410
  return self._create_child_frame(new_node_id)
1329
1411
 
1330
- def write_delta(self,
1331
- path: str,
1332
- connection_name: Optional[str] = None,
1333
- write_mode: Literal["overwrite", "append"] = "overwrite",
1334
- description: Optional[str] = None,
1335
- ) -> "FlowFrame":
1412
+ def write_delta(
1413
+ self,
1414
+ path: str,
1415
+ connection_name: str | None = None,
1416
+ write_mode: Literal["overwrite", "append"] = "overwrite",
1417
+ description: str | None = None,
1418
+ ) -> "FlowFrame":
1336
1419
  """
1337
1420
  Write the data frame to cloud storage in Delta Lake format.
1338
1421
 
@@ -1346,19 +1429,23 @@ class FlowFrame:
1346
1429
  Returns:
1347
1430
  FlowFrame: A new child data frame representing the written data.
1348
1431
  """
1349
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1350
- connection_name=connection_name,
1351
- depends_on_node_id=self.node_id,
1352
- write_mode=write_mode,
1353
- file_format="delta",
1354
- description=description)
1432
+ new_node_id = add_write_ff_to_cloud_storage(
1433
+ path,
1434
+ flow_graph=self.flow_graph,
1435
+ connection_name=connection_name,
1436
+ depends_on_node_id=self.node_id,
1437
+ write_mode=write_mode,
1438
+ file_format="delta",
1439
+ description=description,
1440
+ )
1355
1441
  return self._create_child_frame(new_node_id)
1356
1442
 
1357
- def write_json_to_cloud_storage(self,
1358
- path: str,
1359
- connection_name: Optional[str] = None,
1360
- description: Optional[str] = None,
1361
- ) -> "FlowFrame":
1443
+ def write_json_to_cloud_storage(
1444
+ self,
1445
+ path: str,
1446
+ connection_name: str | None = None,
1447
+ description: str | None = None,
1448
+ ) -> "FlowFrame":
1362
1449
  """
1363
1450
  Write the data frame to cloud storage in JSON format.
1364
1451
 
@@ -1370,11 +1457,14 @@ class FlowFrame:
1370
1457
  Returns:
1371
1458
  FlowFrame: A new child data frame representing the written data.
1372
1459
  """
1373
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1374
- connection_name=connection_name,
1375
- depends_on_node_id=self.node_id,
1376
- file_format="json",
1377
- description=description)
1460
+ new_node_id = add_write_ff_to_cloud_storage(
1461
+ path,
1462
+ flow_graph=self.flow_graph,
1463
+ connection_name=connection_name,
1464
+ depends_on_node_id=self.node_id,
1465
+ file_format="json",
1466
+ description=description,
1467
+ )
1378
1468
  return self._create_child_frame(new_node_id)
1379
1469
 
1380
1470
  def group_by(self, *by, description: str = None, maintain_order=False, **named_by) -> GroupByFrame:
@@ -1411,7 +1501,10 @@ class FlowFrame:
1411
1501
  # Create a GroupByFrame
1412
1502
  return GroupByFrame(
1413
1503
  node_id=new_node_id,
1414
- parent_frame=self, by_cols=by_cols, maintain_order=maintain_order, description=description
1504
+ parent_frame=self,
1505
+ by_cols=by_cols,
1506
+ maintain_order=maintain_order,
1507
+ description=description,
1415
1508
  )
1416
1509
 
1417
1510
  def to_graph(self):
@@ -1419,7 +1512,7 @@ class FlowFrame:
1419
1512
  return self.flow_graph
1420
1513
 
1421
1514
  def save_graph(self, file_path: str, auto_arrange: bool = True):
1422
- """Save the graph """
1515
+ """Save the graph"""
1423
1516
  if auto_arrange:
1424
1517
  self.flow_graph.apply_layout()
1425
1518
  self.flow_graph.save_flow(file_path)
@@ -1432,23 +1525,27 @@ class FlowFrame:
1432
1525
 
1433
1526
  def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> "FlowFrame":
1434
1527
  new_node_id = generate_node_id()
1435
- function_settings = (
1436
- input_schema.NodeFormula(flow_id=self.flow_graph.flow_id, node_id=new_node_id, depending_on_id=self.node_id,
1437
- function=transform_schema.FunctionInput(
1438
- function=flowfile_formula,
1439
- field=transform_schema.FieldInput(name=output_column_name, data_type='Auto')),
1440
- description=description))
1528
+ function_settings = input_schema.NodeFormula(
1529
+ flow_id=self.flow_graph.flow_id,
1530
+ node_id=new_node_id,
1531
+ depending_on_id=self.node_id,
1532
+ function=transform_schema.FunctionInput(
1533
+ function=flowfile_formula, field=transform_schema.FieldInput(name=output_column_name, data_type="Auto")
1534
+ ),
1535
+ description=description,
1536
+ )
1441
1537
  self.flow_graph.add_formula(function_settings)
1442
1538
  return self._create_child_frame(new_node_id)
1443
1539
 
1444
1540
  def head(self, n: int, description: str = None):
1445
1541
  new_node_id = generate_node_id()
1446
- settings = input_schema.NodeSample(flow_id=self.flow_graph.flow_id,
1447
- node_id=new_node_id,
1448
- depending_on_id=self.node_id,
1449
- sample_size=n,
1450
- description=description
1451
- )
1542
+ settings = input_schema.NodeSample(
1543
+ flow_id=self.flow_graph.flow_id,
1544
+ node_id=new_node_id,
1545
+ depending_on_id=self.node_id,
1546
+ sample_size=n,
1547
+ description=description,
1548
+ )
1452
1549
  self.flow_graph.add_sample(settings)
1453
1550
  return self._create_child_frame(new_node_id)
1454
1551
 
@@ -1464,16 +1561,18 @@ class FlowFrame:
1464
1561
  def get_node_settings(self) -> FlowNode:
1465
1562
  return self.flow_graph.get_node(self.node_id)
1466
1563
 
1467
- def pivot(self,
1468
- on: str | list[str],
1469
- *,
1470
- index: str | list[str] | None = None,
1471
- values: str | list[str] | None = None,
1472
- aggregate_function: str | None = "first",
1473
- maintain_order: bool = True,
1474
- sort_columns: bool = False,
1475
- separator: str = '_',
1476
- description: str = None) -> "FlowFrame":
1564
+ def pivot(
1565
+ self,
1566
+ on: str | list[str],
1567
+ *,
1568
+ index: str | list[str] | None = None,
1569
+ values: str | list[str] | None = None,
1570
+ aggregate_function: str | None = "first",
1571
+ maintain_order: bool = True,
1572
+ sort_columns: bool = False,
1573
+ separator: str = "_",
1574
+ description: str = None,
1575
+ ) -> "FlowFrame":
1477
1576
  """
1478
1577
  Pivot a DataFrame from long to wide format.
1479
1578
 
@@ -1522,17 +1621,14 @@ class FlowFrame:
1522
1621
  value_col = values if isinstance(values, str) else values[0]
1523
1622
 
1524
1623
  # Set valid aggregations
1525
- valid_aggs = ['first', 'last', 'min', 'max', 'sum', 'mean', 'median', 'count']
1624
+ valid_aggs = ["first", "last", "min", "max", "sum", "mean", "median", "count"]
1526
1625
  if aggregate_function not in valid_aggs:
1527
- raise ValueError(f"Invalid aggregate_function: {aggregate_function}. "
1528
- f"Must be one of: {', '.join(valid_aggs)}")
1626
+ raise ValueError(
1627
+ f"Invalid aggregate_function: {aggregate_function}. " f"Must be one of: {', '.join(valid_aggs)}"
1628
+ )
1529
1629
 
1530
1630
  # Check if we can use the native implementation
1531
- can_use_native = (
1532
- isinstance(on_value, str) and
1533
- isinstance(value_col, str) and
1534
- aggregate_function in valid_aggs
1535
- )
1631
+ can_use_native = isinstance(on_value, str) and isinstance(value_col, str) and aggregate_function in valid_aggs
1536
1632
 
1537
1633
  if can_use_native:
1538
1634
  # Create pivot input for native implementation
@@ -1540,7 +1636,7 @@ class FlowFrame:
1540
1636
  index_columns=index_columns,
1541
1637
  pivot_column=on_value,
1542
1638
  value_col=value_col,
1543
- aggregations=[aggregate_function]
1639
+ aggregations=[aggregate_function],
1544
1640
  )
1545
1641
 
1546
1642
  # Create node settings
@@ -1552,7 +1648,7 @@ class FlowFrame:
1552
1648
  pos_y=150,
1553
1649
  is_setup=True,
1554
1650
  depending_on_id=self.node_id,
1555
- description=description or f"Pivot {value_col} by {on_value}"
1651
+ description=description or f"Pivot {value_col} by {on_value}",
1556
1652
  )
1557
1653
 
1558
1654
  # Add to graph using native implementation
@@ -1580,8 +1676,9 @@ class FlowFrame:
1580
1676
  # Generate description if not provided
1581
1677
  if description is None:
1582
1678
  on_str = on if isinstance(on, str) else ", ".join(on if isinstance(on, list) else [on])
1583
- values_str = values if isinstance(values, str) else ", ".join(
1584
- values if isinstance(values, list) else [values])
1679
+ values_str = (
1680
+ values if isinstance(values, str) else ", ".join(values if isinstance(values, list) else [values])
1681
+ )
1585
1682
  description = f"Pivot {values_str} by {on_str}"
1586
1683
 
1587
1684
  # Add polars code node
@@ -1589,13 +1686,15 @@ class FlowFrame:
1589
1686
 
1590
1687
  return self._create_child_frame(new_node_id)
1591
1688
 
1592
- def unpivot(self,
1593
- on: list[str | Selector] | str | None | Selector = None,
1594
- *,
1595
- index: list[str] | str | None = None,
1596
- variable_name: str = "variable",
1597
- value_name: str = "value",
1598
- description: str = None) -> "FlowFrame":
1689
+ def unpivot(
1690
+ self,
1691
+ on: list[str | Selector] | str | None | Selector = None,
1692
+ *,
1693
+ index: list[str] | str | None = None,
1694
+ variable_name: str = "variable",
1695
+ value_name: str = "value",
1696
+ description: str = None,
1697
+ ) -> "FlowFrame":
1599
1698
  """
1600
1699
  Unpivot a DataFrame from wide to long format.
1601
1700
 
@@ -1642,13 +1741,13 @@ class FlowFrame:
1642
1741
  value_columns = [on]
1643
1742
 
1644
1743
  if can_use_native:
1645
- can_use_native = (variable_name == "variable" and value_name == "value")
1744
+ can_use_native = variable_name == "variable" and value_name == "value"
1646
1745
  if can_use_native:
1647
1746
  unpivot_input = transform_schema.UnpivotInput(
1648
1747
  index_columns=index_columns,
1649
1748
  value_columns=value_columns,
1650
1749
  data_type_selector=None,
1651
- data_type_selector_mode='column'
1750
+ data_type_selector_mode="column",
1652
1751
  )
1653
1752
 
1654
1753
  # Create node settings
@@ -1660,7 +1759,7 @@ class FlowFrame:
1660
1759
  pos_y=150,
1661
1760
  is_setup=True,
1662
1761
  depending_on_id=self.node_id,
1663
- description=description or "Unpivot data from wide to long format"
1762
+ description=description or "Unpivot data from wide to long format",
1664
1763
  )
1665
1764
 
1666
1765
  # Add to graph using native implementation
@@ -1696,7 +1795,7 @@ class FlowFrame:
1696
1795
 
1697
1796
  def concat(
1698
1797
  self,
1699
- other: "FlowFrame" | List["FlowFrame"],
1798
+ other: "FlowFrame" | list["FlowFrame"],
1700
1799
  how: str = "vertical",
1701
1800
  rechunk: bool = False,
1702
1801
  parallel: bool = True,
@@ -1797,14 +1896,11 @@ class FlowFrame:
1797
1896
 
1798
1897
  # Add polars code node with dependencies on all input frames
1799
1898
  depending_on_ids = [self.node_id] + [frame.node_id for frame in others]
1800
- self._add_polars_code(
1801
- new_node_id, code, description, depending_on_ids=depending_on_ids
1802
- )
1899
+ self._add_polars_code(new_node_id, code, description, depending_on_ids=depending_on_ids)
1803
1900
  # Add connections to ensure all frames are available
1804
1901
  self._add_connection(self.node_id, new_node_id, "main")
1805
1902
 
1806
1903
  for other_frame in others:
1807
-
1808
1904
  other_frame.flow_graph = combined_graph
1809
1905
  other_frame._add_connection(other_frame.node_id, new_node_id, "main")
1810
1906
  # Create and return the new frame
@@ -1816,8 +1912,8 @@ class FlowFrame:
1816
1912
  )
1817
1913
 
1818
1914
  def _detect_cum_count_record_id(
1819
- self, expr: Any, new_node_id: int, description: Optional[str] = None
1820
- ) -> Tuple[bool, Optional["FlowFrame"]]:
1915
+ self, expr: Any, new_node_id: int, description: str | None = None
1916
+ ) -> tuple[bool, Optional["FlowFrame"]]:
1821
1917
  """
1822
1918
  Detect if the expression is a cum_count operation and use record_id if possible.
1823
1919
 
@@ -1838,8 +1934,12 @@ class FlowFrame:
1838
1934
  - Optional[FlowFrame]: The new FlowFrame if detection was successful, otherwise None
1839
1935
  """
1840
1936
  # Check if this is a cum_count operation
1841
- if (not isinstance(expr, Expr) or not expr._repr_str
1842
- or "cum_count" not in expr._repr_str or not hasattr(expr, "name")):
1937
+ if (
1938
+ not isinstance(expr, Expr)
1939
+ or not expr._repr_str
1940
+ or "cum_count" not in expr._repr_str
1941
+ or not hasattr(expr, "name")
1942
+ ):
1843
1943
  return False, None
1844
1944
 
1845
1945
  # Extract the output name
@@ -1926,24 +2026,24 @@ class FlowFrame:
1926
2026
  return False, None
1927
2027
 
1928
2028
  def with_columns(
1929
- self,
1930
- *exprs: Union[Expr, Iterable[Expr], Any], # Allow Any for implicit lit conversion
1931
- flowfile_formulas: Optional[List[str]] = None,
1932
- output_column_names: Optional[List[str]] = None,
1933
- description: Optional[str] = None,
1934
- **named_exprs: Union[Expr, Any], # Allow Any for implicit lit conversion
2029
+ self,
2030
+ *exprs: Expr | Iterable[Expr] | Any, # Allow Any for implicit lit conversion
2031
+ flowfile_formulas: list[str] | None = None,
2032
+ output_column_names: list[str] | None = None,
2033
+ description: str | None = None,
2034
+ **named_exprs: Expr | Any, # Allow Any for implicit lit conversion
1935
2035
  ) -> "FlowFrame":
1936
2036
  """
1937
2037
  Add or replace columns in the DataFrame.
1938
2038
  """
1939
2039
  new_node_id = generate_node_id()
1940
2040
 
1941
- all_input_expr_objects: List[Expr] = []
1942
- pure_polars_expr_strings_for_wc: List[str] = []
1943
- collected_raw_definitions: List[str] = []
2041
+ all_input_expr_objects: list[Expr] = []
2042
+ pure_polars_expr_strings_for_wc: list[str] = []
2043
+ collected_raw_definitions: list[str] = []
1944
2044
  has_exprs_or_named_exprs = bool(exprs or named_exprs)
1945
2045
  if has_exprs_or_named_exprs:
1946
- actual_exprs_to_process: List[Expr] = []
2046
+ actual_exprs_to_process: list[Expr] = []
1947
2047
  temp_exprs_iterable = list(_parse_inputs_as_iterable(exprs))
1948
2048
 
1949
2049
  for item in temp_exprs_iterable:
@@ -1974,38 +2074,43 @@ class FlowFrame:
1974
2074
  if collected_raw_definitions:
1975
2075
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
1976
2076
  definitions_section = "\n\n".join(unique_raw_definitions)
1977
- final_code_for_node = definitions_section + \
1978
- "\n#─────SPLIT─────\n\n" + \
1979
- f"output_df = {polars_operation_code}"
2077
+ final_code_for_node = (
2078
+ definitions_section + "\n#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
2079
+ )
1980
2080
  else:
1981
2081
  final_code_for_node = polars_operation_code
1982
2082
 
1983
- pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
1984
- isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
1985
- self._add_polars_code(new_node_id, final_code_for_node, description, method_name='with_columns',
1986
- convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
1987
- polars_expr=pl_expressions_for_fallback)
2083
+ pl_expressions_for_fallback = [
2084
+ e.expr
2085
+ for e in all_input_expr_objects
2086
+ if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
2087
+ ]
2088
+ self._add_polars_code(
2089
+ new_node_id,
2090
+ final_code_for_node,
2091
+ description,
2092
+ method_name="with_columns",
2093
+ convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
2094
+ polars_expr=pl_expressions_for_fallback,
2095
+ )
1988
2096
  return self._create_child_frame(new_node_id)
1989
2097
 
1990
2098
  elif flowfile_formulas is not None and output_column_names is not None:
1991
-
1992
2099
  if len(output_column_names) != len(flowfile_formulas):
1993
- raise ValueError(
1994
- "Length of both the formulas and the output columns names must be identical"
1995
- )
2100
+ raise ValueError("Length of both the formulas and the output columns names must be identical")
1996
2101
 
1997
2102
  if len(flowfile_formulas) == 1:
1998
2103
  return self._with_flowfile_formula(flowfile_formulas[0], output_column_names[0], description)
1999
2104
  ff = self
2000
- for i, (flowfile_formula, output_column_name) in enumerate(zip(flowfile_formulas, output_column_names)):
2105
+ for i, (flowfile_formula, output_column_name) in enumerate(
2106
+ zip(flowfile_formulas, output_column_names, strict=False)
2107
+ ):
2001
2108
  ff = ff._with_flowfile_formula(flowfile_formula, output_column_name, f"{i}: {description}")
2002
2109
  return ff
2003
2110
  else:
2004
2111
  raise ValueError("Either exprs/named_exprs or flowfile_formulas with output_column_names must be provided")
2005
2112
 
2006
- def with_row_index(
2007
- self, name: str = "index", offset: int = 0, description: str = None
2008
- ) -> "FlowFrame":
2113
+ def with_row_index(self, name: str = "index", offset: int = 0, description: str = None) -> "FlowFrame":
2009
2114
  """
2010
2115
  Add a row index as the first column in the DataFrame.
2011
2116
 
@@ -2052,9 +2157,7 @@ class FlowFrame:
2052
2157
  else:
2053
2158
  # Use the polars code approach for other cases
2054
2159
  code = f"input_df.with_row_index(name='{name}', offset={offset})"
2055
- self._add_polars_code(
2056
- new_node_id, code, description or f"Add row index column '{name}'"
2057
- )
2160
+ self._add_polars_code(new_node_id, code, description or f"Add row index column '{name}'")
2058
2161
 
2059
2162
  return self._create_child_frame(new_node_id)
2060
2163
 
@@ -2088,9 +2191,7 @@ class FlowFrame:
2088
2191
  all_columns = []
2089
2192
 
2090
2193
  if isinstance(columns, (list, tuple)):
2091
- all_columns.extend(
2092
- [col.column_name if isinstance(col, Column) else col for col in columns]
2093
- )
2194
+ all_columns.extend([col.column_name if isinstance(col, Column) else col for col in columns])
2094
2195
  else:
2095
2196
  all_columns.append(columns.column_name if isinstance(columns, Column) else columns)
2096
2197
 
@@ -2099,10 +2200,9 @@ class FlowFrame:
2099
2200
  all_columns.append(col.column_name if isinstance(col, Column) else col)
2100
2201
 
2101
2202
  if len(all_columns) == 1:
2102
-
2103
2203
  columns_str = stringify_values(all_columns[0])
2104
2204
  else:
2105
- columns_str = "[" + ", ".join([ stringify_values(col) for col in all_columns]) + "]"
2205
+ columns_str = "[" + ", ".join([stringify_values(col) for col in all_columns]) + "]"
2106
2206
 
2107
2207
  code = f"""
2108
2208
  # Explode columns into multiple rows
@@ -2117,24 +2217,25 @@ class FlowFrame:
2117
2217
 
2118
2218
  return self._create_child_frame(new_node_id)
2119
2219
 
2120
- def fuzzy_match(self,
2121
- other: "FlowFrame",
2122
- fuzzy_mappings: List[FuzzyMapping],
2123
- description: str = None,
2124
- ) -> "FlowFrame":
2220
+ def fuzzy_match(
2221
+ self,
2222
+ other: "FlowFrame",
2223
+ fuzzy_mappings: list[FuzzyMapping],
2224
+ description: str = None,
2225
+ ) -> "FlowFrame":
2125
2226
  self._ensure_same_graph(other)
2126
2227
 
2127
2228
  # Step 3: Generate new node ID
2128
2229
  new_node_id = generate_node_id()
2129
- node_fuzzy_match = input_schema.NodeFuzzyMatch(flow_id=self.flow_graph.flow_id,
2130
- node_id=new_node_id,
2131
- join_input=
2132
- transform_schema.FuzzyMatchInput(join_mapping=fuzzy_mappings,
2133
- left_select=self.columns,
2134
- right_select=other.columns),
2135
- description=description or "Fuzzy match between two FlowFrames",
2136
- depending_on_ids=[self.node_id, other.node_id],
2137
- )
2230
+ node_fuzzy_match = input_schema.NodeFuzzyMatch(
2231
+ flow_id=self.flow_graph.flow_id,
2232
+ node_id=new_node_id,
2233
+ join_input=transform_schema.FuzzyMatchInput(
2234
+ join_mapping=fuzzy_mappings, left_select=self.columns, right_select=other.columns
2235
+ ),
2236
+ description=description or "Fuzzy match between two FlowFrames",
2237
+ depending_on_ids=[self.node_id, other.node_id],
2238
+ )
2138
2239
  self.flow_graph.add_fuzzy_match(node_fuzzy_match)
2139
2240
  self._add_connection(self.node_id, new_node_id, "main")
2140
2241
  other._add_connection(other.node_id, new_node_id, "right")
@@ -2213,7 +2314,7 @@ class FlowFrame:
2213
2314
 
2214
2315
  def unique(
2215
2316
  self,
2216
- subset: Union[str, "Expr", List[ Union[ str, "Expr"]]] = None,
2317
+ subset: Union[str, "Expr", list[Union[str, "Expr"]]] = None,
2217
2318
  *,
2218
2319
  keep: Literal["first", "last", "any", "none"] = "any",
2219
2320
  maintain_order: bool = False,
@@ -2270,17 +2371,11 @@ class FlowFrame:
2270
2371
  break
2271
2372
 
2272
2373
  # Determine if we can use the native implementation
2273
- can_use_native = (
2274
- can_use_native
2275
- and keep in ["any", "first", "last", "none"]
2276
- and not maintain_order
2277
- )
2374
+ can_use_native = can_use_native and keep in ["any", "first", "last", "none"] and not maintain_order
2278
2375
 
2279
2376
  if can_use_native:
2280
2377
  # Use the native NodeUnique implementation
2281
- unique_input = transform_schema.UniqueInput(
2282
- columns=processed_subset, strategy=keep
2283
- )
2378
+ unique_input = transform_schema.UniqueInput(columns=processed_subset, strategy=keep)
2284
2379
 
2285
2380
  # Create node settings
2286
2381
  unique_settings = input_schema.NodeUnique(
@@ -2333,12 +2428,12 @@ class FlowFrame:
2333
2428
  return self._create_child_frame(new_node_id)
2334
2429
 
2335
2430
  @property
2336
- def columns(self) -> List[str]:
2431
+ def columns(self) -> list[str]:
2337
2432
  """Get the column names."""
2338
2433
  return self.data.collect_schema().names()
2339
2434
 
2340
2435
  @property
2341
- def dtypes(self) -> List[pl.DataType]:
2436
+ def dtypes(self) -> list[pl.DataType]:
2342
2437
  """Get the column data types."""
2343
2438
  return self.data.dtypes
2344
2439