Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +179 -73
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +52 -59
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
  36. flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
  37. flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
  79. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
  140. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
  143. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
  149. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
  154. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
  155. flowfile_core/__init__.py +13 -3
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +8 -6
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +123 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/code_generator.py +391 -279
  177. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  178. flowfile_core/flowfile/connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  180. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  181. flowfile_core/flowfile/extensions.py +17 -12
  182. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  183. flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
  184. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  190. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
  191. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  192. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  193. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
  194. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  195. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  196. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  197. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
  201. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  202. flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
  203. flowfile_core/flowfile/flow_graph.py +1011 -561
  204. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  205. flowfile_core/flowfile/flow_node/flow_node.py +332 -232
  206. flowfile_core/flowfile/flow_node/models.py +54 -41
  207. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  208. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  209. flowfile_core/flowfile/handler.py +82 -32
  210. flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
  211. flowfile_core/flowfile/manage/io_flowfile.py +391 -0
  212. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  213. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  214. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  215. flowfile_core/flowfile/node_designer/ui_components.py +136 -35
  216. flowfile_core/flowfile/schema_callbacks.py +77 -54
  217. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  218. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  219. flowfile_core/flowfile/setting_generator/settings.py +72 -55
  220. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  221. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  223. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  224. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  225. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  226. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  227. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  228. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  229. flowfile_core/flowfile/util/node_skipper.py +4 -4
  230. flowfile_core/flowfile/utils.py +19 -21
  231. flowfile_core/main.py +26 -19
  232. flowfile_core/routes/auth.py +284 -11
  233. flowfile_core/routes/cloud_connections.py +25 -25
  234. flowfile_core/routes/logs.py +21 -29
  235. flowfile_core/routes/public.py +3 -3
  236. flowfile_core/routes/routes.py +77 -43
  237. flowfile_core/routes/secrets.py +25 -27
  238. flowfile_core/routes/user_defined_components.py +483 -4
  239. flowfile_core/run_lock.py +0 -1
  240. flowfile_core/schemas/__init__.py +4 -6
  241. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  242. flowfile_core/schemas/cloud_storage_schemas.py +59 -55
  243. flowfile_core/schemas/input_schema.py +398 -154
  244. flowfile_core/schemas/output_model.py +50 -35
  245. flowfile_core/schemas/schemas.py +207 -67
  246. flowfile_core/schemas/transform_schema.py +1360 -435
  247. flowfile_core/schemas/yaml_types.py +117 -0
  248. flowfile_core/secret_manager/secret_manager.py +17 -13
  249. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
  250. flowfile_core/utils/arrow_reader.py +7 -6
  251. flowfile_core/utils/excel_file_manager.py +3 -3
  252. flowfile_core/utils/fileManager.py +7 -7
  253. flowfile_core/utils/fl_executor.py +8 -10
  254. flowfile_core/utils/utils.py +4 -4
  255. flowfile_core/utils/validate_setup.py +5 -4
  256. flowfile_frame/__init__.py +107 -50
  257. flowfile_frame/adapters.py +2 -9
  258. flowfile_frame/adding_expr.py +73 -32
  259. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  260. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  261. flowfile_frame/config.py +2 -5
  262. flowfile_frame/expr.py +311 -218
  263. flowfile_frame/expr.pyi +160 -159
  264. flowfile_frame/expr_name.py +23 -23
  265. flowfile_frame/flow_frame.py +581 -489
  266. flowfile_frame/flow_frame.pyi +123 -104
  267. flowfile_frame/flow_frame_methods.py +236 -252
  268. flowfile_frame/group_frame.py +50 -20
  269. flowfile_frame/join.py +2 -2
  270. flowfile_frame/lazy.py +129 -87
  271. flowfile_frame/lazy_methods.py +83 -30
  272. flowfile_frame/list_name_space.py +55 -50
  273. flowfile_frame/selectors.py +148 -68
  274. flowfile_frame/series.py +9 -7
  275. flowfile_frame/utils.py +19 -21
  276. flowfile_worker/__init__.py +12 -4
  277. flowfile_worker/configs.py +11 -19
  278. flowfile_worker/create/__init__.py +14 -27
  279. flowfile_worker/create/funcs.py +143 -94
  280. flowfile_worker/create/models.py +139 -68
  281. flowfile_worker/create/pl_types.py +14 -15
  282. flowfile_worker/create/read_excel_tables.py +34 -41
  283. flowfile_worker/create/utils.py +22 -19
  284. flowfile_worker/external_sources/s3_source/main.py +18 -51
  285. flowfile_worker/external_sources/s3_source/models.py +34 -27
  286. flowfile_worker/external_sources/sql_source/main.py +8 -5
  287. flowfile_worker/external_sources/sql_source/models.py +13 -9
  288. flowfile_worker/flow_logger.py +10 -8
  289. flowfile_worker/funcs.py +214 -155
  290. flowfile_worker/main.py +11 -17
  291. flowfile_worker/models.py +35 -28
  292. flowfile_worker/process_manager.py +2 -3
  293. flowfile_worker/routes.py +121 -93
  294. flowfile_worker/secrets.py +9 -6
  295. flowfile_worker/spawner.py +80 -49
  296. flowfile_worker/utils.py +3 -2
  297. shared/__init__.py +2 -7
  298. shared/storage_config.py +25 -13
  299. test_utils/postgres/commands.py +3 -2
  300. test_utils/postgres/fixtures.py +9 -9
  301. test_utils/s3/commands.py +1 -1
  302. test_utils/s3/data_generator.py +3 -4
  303. test_utils/s3/demo_data_generator.py +4 -7
  304. test_utils/s3/fixtures.py +7 -5
  305. tools/migrate/README.md +56 -0
  306. tools/migrate/__init__.py +12 -0
  307. tools/migrate/__main__.py +118 -0
  308. tools/migrate/legacy_schemas.py +682 -0
  309. tools/migrate/migrate.py +610 -0
  310. tools/migrate/tests/__init__.py +0 -0
  311. tools/migrate/tests/conftest.py +21 -0
  312. tools/migrate/tests/test_migrate.py +622 -0
  313. tools/migrate/tests/test_migration_e2e.py +1009 -0
  314. tools/migrate/tests/test_node_migrations.py +843 -0
  315. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  316. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  317. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  318. flowfile/web/static/assets/Filter-812dcbca.js +0 -164
  319. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  320. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  321. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  322. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  323. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  324. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  325. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  326. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  327. flowfile/web/static/assets/secretApi-538058f3.js +0 -46
  328. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  329. flowfile-0.4.1.dist-info/RECORD +0 -376
  330. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  331. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
  332. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,34 +1,36 @@
1
+ from __future__ import annotations
2
+
1
3
  import inspect
2
4
  import os
3
- from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable, get_args, get_origin
4
-
5
5
  import re
6
+ from collections.abc import Iterable, Iterator, Mapping
7
+ from typing import Any, Literal, Optional, Union, get_args, get_origin
6
8
 
7
9
  import polars as pl
8
- from flowfile_frame.lazy_methods import add_lazyframe_methods
9
-
10
- from polars._typing import (CsvEncoding, FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
11
- from collections.abc import Iterator
12
-
13
- from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs
10
+ from pl_fuzzy_frame_match import FuzzyMapping
11
+ from polars._typing import CsvEncoding, FrameInitTypes, Orientation, SchemaDefinition, SchemaDict
14
12
 
13
+ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
15
14
  from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
16
15
  from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
17
- from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
18
16
  from flowfile_core.flowfile.flow_node.flow_node import FlowNode
19
17
  from flowfile_core.schemas import input_schema, transform_schema
20
-
21
- from flowfile_frame.expr import Expr, Column, lit, col
22
- from flowfile_frame.selectors import Selector
23
- from flowfile_frame.group_frame import GroupByFrame
24
- from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
25
- ensure_inputs_as_iterable, generate_node_id, data as node_id_data)
26
- from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
27
- from flowfile_frame.utils import _check_if_convertible_to_code
28
- from flowfile_frame.config import logger
29
18
  from flowfile_frame.cloud_storage.frame_helpers import add_write_ff_to_cloud_storage
30
- from collections.abc import Mapping
31
-
19
+ from flowfile_frame.config import logger
20
+ from flowfile_frame.expr import Column, Expr, col, lit
21
+ from flowfile_frame.group_frame import GroupByFrame
22
+ from flowfile_frame.join import _create_join_mappings, _normalize_columns_to_list
23
+ from flowfile_frame.lazy_methods import add_lazyframe_methods
24
+ from flowfile_frame.selectors import Selector
25
+ from flowfile_frame.utils import (
26
+ _check_if_convertible_to_code,
27
+ _parse_inputs_as_iterable,
28
+ create_flow_graph,
29
+ ensure_inputs_as_iterable,
30
+ generate_node_id,
31
+ stringify_values,
32
+ )
33
+ from flowfile_frame.utils import data as node_id_data
32
34
 
33
35
 
34
36
  def can_be_expr(param: inspect.Parameter) -> bool:
@@ -83,7 +85,7 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
83
85
  raw_definitions = []
84
86
 
85
87
  # Add function sources if any
86
- if hasattr(expr_obj, '_function_sources') and expr_obj._function_sources:
88
+ if hasattr(expr_obj, "_function_sources") and expr_obj._function_sources:
87
89
  # Remove duplicates while preserving order
88
90
  unique_sources = []
89
91
  seen = set()
@@ -101,8 +103,9 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
101
103
  return pure_expr_str, raw_defs_str
102
104
 
103
105
 
104
- def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr | None = None,
105
- group_expr: pl.Expr | None = None) -> None:
106
+ def _check_ok_for_serialization(
107
+ method_name: str = None, polars_expr: pl.Expr | None = None, group_expr: pl.Expr | None = None
108
+ ) -> None:
106
109
  if method_name is None:
107
110
  raise NotImplementedError("Cannot create a polars lambda expression without the method")
108
111
  if polars_expr is None:
@@ -110,7 +113,7 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
110
113
  method_ref = getattr(pl.LazyFrame, method_name)
111
114
  if method_ref is None:
112
115
  raise ModuleNotFoundError(f"Could not find the method {method_name} in polars lazyframe")
113
- if method_name == 'group_by':
116
+ if method_name == "group_by":
114
117
  if group_expr is None:
115
118
  raise NotImplementedError("Cannot create a polars lambda expression without the groupby expression")
116
119
  if not all(isinstance(ge, pl.Expr) for ge in group_expr):
@@ -120,6 +123,7 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
120
123
  @add_lazyframe_methods
121
124
  class FlowFrame:
122
125
  """Main class that wraps FlowDataEngine and maintains the ETL graph."""
126
+
123
127
  flow_graph: FlowGraph
124
128
  data: pl.LazyFrame
125
129
 
@@ -197,8 +201,10 @@ class FlowFrame:
197
201
  raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
198
202
  # Create a FlowDataEngine to get data in the right format for manual input
199
203
  flow_table = FlowDataEngine(raw_data=pl_data)
200
- raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
201
- columns=[c.get_minimal_field_info() for c in flow_table.schema])
204
+ raw_data_format = input_schema.RawData(
205
+ data=list(flow_table.to_dict().values()),
206
+ columns=[c.get_minimal_field_info() for c in flow_table.schema],
207
+ )
202
208
  # Create a manual input node
203
209
  input_node = input_schema.NodeManualInput(
204
210
  flow_id=flow_id,
@@ -220,19 +226,19 @@ class FlowFrame:
220
226
  )
221
227
 
222
228
  def __new__(
223
- cls,
224
- data: pl.LazyFrame | FrameInitTypes = None,
225
- schema: SchemaDefinition | None = None,
226
- *,
227
- schema_overrides: SchemaDict | None = None,
228
- strict: bool = True,
229
- orient: Orientation | None = None,
230
- infer_schema_length: int | None = 100,
231
- nan_to_null: bool = False,
232
- flow_graph: Optional[FlowGraph] = None,
233
- node_id: Optional[int] = None,
234
- parent_node_id: Optional[int] = None,
235
- **kwargs, # Accept and ignore any other kwargs for API compatibility
229
+ cls,
230
+ data: pl.LazyFrame | FrameInitTypes = None,
231
+ schema: SchemaDefinition | None = None,
232
+ *,
233
+ schema_overrides: SchemaDict | None = None,
234
+ strict: bool = True,
235
+ orient: Orientation | None = None,
236
+ infer_schema_length: int | None = 100,
237
+ nan_to_null: bool = False,
238
+ flow_graph: FlowGraph | None = None,
239
+ node_id: int | None = None,
240
+ parent_node_id: int | None = None,
241
+ **kwargs, # Accept and ignore any other kwargs for API compatibility
236
242
  ) -> "FlowFrame":
237
243
  """
238
244
  Unified constructor for FlowFrame.
@@ -252,11 +258,18 @@ class FlowFrame:
252
258
  instance.parent_node_id = parent_node_id
253
259
  return instance
254
260
  elif flow_graph is not None and not isinstance(data, pl.LazyFrame):
255
- instance = cls.create_from_any_type(data=data, schema=schema, schema_overrides=schema_overrides,
256
- strict=strict, orient=orient, infer_schema_length=infer_schema_length,
257
- nan_to_null=nan_to_null, flow_graph=flow_graph, node_id=node_id,
258
- parent_node_id=parent_node_id
259
- )
261
+ instance = cls.create_from_any_type(
262
+ data=data,
263
+ schema=schema,
264
+ schema_overrides=schema_overrides,
265
+ strict=strict,
266
+ orient=orient,
267
+ infer_schema_length=infer_schema_length,
268
+ nan_to_null=nan_to_null,
269
+ flow_graph=flow_graph,
270
+ node_id=node_id,
271
+ parent_node_id=parent_node_id,
272
+ )
260
273
  return instance
261
274
 
262
275
  source_graph = create_flow_graph()
@@ -265,37 +278,41 @@ class FlowFrame:
265
278
  if data is None:
266
279
  data = pl.LazyFrame()
267
280
  if not isinstance(data, pl.LazyFrame):
268
-
269
281
  description = "Data imported from Python object"
270
282
  try:
271
283
  pl_df = pl.DataFrame(
272
- data, schema=schema, schema_overrides=schema_overrides,
273
- strict=strict, orient=orient, infer_schema_length=infer_schema_length,
274
- nan_to_null=nan_to_null
284
+ data,
285
+ schema=schema,
286
+ schema_overrides=schema_overrides,
287
+ strict=strict,
288
+ orient=orient,
289
+ infer_schema_length=infer_schema_length,
290
+ nan_to_null=nan_to_null,
275
291
  )
276
292
  pl_data = pl_df.lazy()
277
293
  except Exception as e:
278
294
  raise ValueError(f"Could not convert data to a Polars DataFrame: {e}")
279
295
 
280
296
  flow_table = FlowDataEngine(raw_data=pl_data)
281
- raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
282
- columns=[c.get_minimal_field_info() for c in flow_table.schema])
297
+ raw_data_format = input_schema.RawData(
298
+ data=list(flow_table.to_dict().values()),
299
+ columns=[c.get_minimal_field_info() for c in flow_table.schema],
300
+ )
283
301
  input_node = input_schema.NodeManualInput(
284
- flow_id=source_graph.flow_id, node_id=source_node_id,
285
- raw_data_format=raw_data_format, pos_x=100, pos_y=100,
286
- is_setup=True, description=description
302
+ flow_id=source_graph.flow_id,
303
+ node_id=source_node_id,
304
+ raw_data_format=raw_data_format,
305
+ pos_x=100,
306
+ pos_y=100,
307
+ is_setup=True,
308
+ description=description,
287
309
  )
288
310
  source_graph.add_manual_input(input_node)
289
311
  else:
290
312
  source_graph.add_dependency_on_polars_lazy_frame(data, source_node_id)
291
313
 
292
314
  final_data = source_graph.get_node(source_node_id).get_resulting_data().data_frame
293
- return cls(
294
- data=final_data,
295
- flow_graph=source_graph,
296
- node_id=source_node_id,
297
- parent_node_id=parent_node_id
298
- )
315
+ return cls(data=final_data, flow_graph=source_graph, node_id=source_node_id, parent_node_id=parent_node_id)
299
316
 
300
317
  def __init__(self, *args, **kwargs):
301
318
  """
@@ -328,20 +345,20 @@ class FlowFrame:
328
345
  parent_node_id=self.node_id,
329
346
  )
330
347
  except AttributeError:
331
- raise ValueError('Could not execute the function')
348
+ raise ValueError("Could not execute the function")
332
349
 
333
350
  @staticmethod
334
351
  def _generate_sort_polars_code(
335
- pure_sort_expr_strs: List[str],
336
- descending_values: List[bool],
337
- nulls_last_values: List[bool],
338
- multithreaded: bool,
339
- maintain_order: bool,
352
+ pure_sort_expr_strs: list[str],
353
+ descending_values: list[bool],
354
+ nulls_last_values: list[bool],
355
+ multithreaded: bool,
356
+ maintain_order: bool,
340
357
  ) -> str:
341
358
  """
342
359
  Generates the `input_df.sort(...)` Polars code string using pure expression strings.
343
360
  """
344
- kwargs_for_code: Dict[str, Any] = {}
361
+ kwargs_for_code: dict[str, Any] = {}
345
362
  if any(descending_values):
346
363
  kwargs_for_code["descending"] = descending_values[0] if len(descending_values) == 1 else descending_values
347
364
  if any(nulls_last_values):
@@ -353,19 +370,20 @@ class FlowFrame:
353
370
 
354
371
  kwargs_str_for_code = ", ".join(f"{k}={repr(v)}" for k, v in kwargs_for_code.items())
355
372
 
356
- by_arg_for_code = pure_sort_expr_strs[0] if len(
357
- pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
373
+ by_arg_for_code = (
374
+ pure_sort_expr_strs[0] if len(pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
375
+ )
358
376
  return f"input_df.sort({by_arg_for_code}{', ' + kwargs_str_for_code if kwargs_str_for_code else ''})"
359
377
 
360
378
  def sort(
361
- self,
362
- by: Union[List[Union[Expr, str]], Expr, str],
363
- *more_by: Union[Expr, str],
364
- descending: Union[bool, List[bool]] = False,
365
- nulls_last: Union[bool, List[bool]] = False,
366
- multithreaded: bool = True,
367
- maintain_order: bool = False,
368
- description: Optional[str] = None,
379
+ self,
380
+ by: list[Expr | str] | Expr | str,
381
+ *more_by: Expr | str,
382
+ descending: bool | list[bool] = False,
383
+ nulls_last: bool | list[bool] = False,
384
+ multithreaded: bool = True,
385
+ maintain_order: bool = False,
386
+ description: str | None = None,
369
387
  ) -> "FlowFrame":
370
388
  """
371
389
  Sort the dataframe by the given columns.
@@ -377,10 +395,10 @@ class FlowFrame:
377
395
  if more_by:
378
396
  sort_expressions_input.extend(list(_parse_inputs_as_iterable(more_by)))
379
397
 
380
- all_processed_expr_objects: List[Expr] = []
381
- pure_polars_expr_strings_for_sort: List[str] = []
382
- collected_raw_definitions: List[str] = []
383
- column_names_for_native_node: List[str] = []
398
+ all_processed_expr_objects: list[Expr] = []
399
+ pure_polars_expr_strings_for_sort: list[str] = []
400
+ collected_raw_definitions: list[str] = []
401
+ column_names_for_native_node: list[str] = []
384
402
 
385
403
  use_polars_code_path = False
386
404
 
@@ -429,10 +447,12 @@ class FlowFrame:
429
447
  if not is_simple_col_for_native: # If it wasn't a simple string or unaltered Column
430
448
  use_polars_code_path = True
431
449
 
432
- desc_values = list(descending) if isinstance(descending, list) else [descending] * len(
433
- all_processed_expr_objects)
434
- null_last_values = list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(
435
- all_processed_expr_objects)
450
+ desc_values = (
451
+ list(descending) if isinstance(descending, list) else [descending] * len(all_processed_expr_objects)
452
+ )
453
+ null_last_values = (
454
+ list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(all_processed_expr_objects)
455
+ )
436
456
 
437
457
  if len(desc_values) != len(all_processed_expr_objects):
438
458
  raise ValueError("Length of 'descending' does not match the number of sort expressions.")
@@ -448,23 +468,31 @@ class FlowFrame:
448
468
  if collected_raw_definitions:
449
469
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions)) # Order-preserving unique
450
470
  definitions_section = "\n\n".join(unique_raw_definitions)
451
- final_code_for_node = definitions_section + \
452
- "\#─────SPLIT─────\n\n" + \
453
- f"output_df = {polars_operation_code}"
471
+ final_code_for_node = (
472
+ definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
473
+ )
454
474
  else:
455
475
  final_code_for_node = polars_operation_code
456
476
 
457
- pl_expressions_for_fallback = [e.expr for e in all_processed_expr_objects if
458
- hasattr(e, 'expr') and e.expr is not None]
477
+ pl_expressions_for_fallback = [
478
+ e.expr for e in all_processed_expr_objects if hasattr(e, "expr") and e.expr is not None
479
+ ]
459
480
  kwargs_for_fallback = {
460
481
  "descending": desc_values[0] if len(desc_values) == 1 else desc_values,
461
482
  "nulls_last": null_last_values[0] if len(null_last_values) == 1 else null_last_values,
462
- "multithreaded": multithreaded, "maintain_order": maintain_order}
483
+ "multithreaded": multithreaded,
484
+ "maintain_order": maintain_order,
485
+ }
463
486
 
464
- self._add_polars_code(new_node_id, final_code_for_node, description, method_name="sort",
465
- convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
466
- polars_expr=pl_expressions_for_fallback,
467
- kwargs_expr=kwargs_for_fallback)
487
+ self._add_polars_code(
488
+ new_node_id,
489
+ final_code_for_node,
490
+ description,
491
+ method_name="sort",
492
+ convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
493
+ polars_expr=pl_expressions_for_fallback,
494
+ kwargs_expr=kwargs_for_fallback,
495
+ )
468
496
  else:
469
497
  sort_inputs_for_node = []
470
498
  for i, col_name_for_native in enumerate(column_names_for_native_node):
@@ -473,30 +501,44 @@ class FlowFrame:
473
501
  # type: ignore
474
502
  )
475
503
  sort_settings = input_schema.NodeSort(
476
- flow_id=self.flow_graph.flow_id, node_id=new_node_id, sort_input=sort_inputs_for_node, # type: ignore
477
- pos_x=200, pos_y=150, is_setup=True, depending_on_id=self.node_id,
478
- description=description or f"Sort by {', '.join(column_names_for_native_node)}")
504
+ flow_id=self.flow_graph.flow_id,
505
+ node_id=new_node_id,
506
+ sort_input=sort_inputs_for_node, # type: ignore
507
+ pos_x=200,
508
+ pos_y=150,
509
+ is_setup=True,
510
+ depending_on_id=self.node_id,
511
+ description=description or f"Sort by {', '.join(column_names_for_native_node)}",
512
+ )
479
513
  self.flow_graph.add_sort(sort_settings)
480
514
 
481
515
  return self._create_child_frame(new_node_id)
482
516
 
483
- def _add_polars_code(self, new_node_id: int, code: str, description: str = None,
484
- depending_on_ids: List[str] | None = None, convertable_to_code: bool = True,
485
- method_name: str = None, polars_expr: Expr | List[Expr] | None = None,
486
- group_expr: Expr | List[Expr] | None = None,
487
- kwargs_expr: Dict | None = None,
488
- group_kwargs: Dict | None = None, ):
517
+ def _add_polars_code(
518
+ self,
519
+ new_node_id: int,
520
+ code: str,
521
+ description: str = None,
522
+ depending_on_ids: list[str] | None = None,
523
+ convertable_to_code: bool = True,
524
+ method_name: str = None,
525
+ polars_expr: Expr | list[Expr] | None = None,
526
+ group_expr: Expr | list[Expr] | None = None,
527
+ kwargs_expr: dict | None = None,
528
+ group_kwargs: dict | None = None,
529
+ ):
489
530
  polars_code_for_node: str
490
531
  if not convertable_to_code or _contains_lambda_pattern(code):
491
-
492
- effective_method_name = get_method_name_from_code(
493
- code) if method_name is None and "input_df." in code else method_name
532
+ effective_method_name = (
533
+ get_method_name_from_code(code) if method_name is None and "input_df." in code else method_name
534
+ )
494
535
 
495
536
  pl_expr_list = ensure_inputs_as_iterable(polars_expr) if polars_expr is not None else []
496
537
  group_expr_list = ensure_inputs_as_iterable(group_expr) if group_expr is not None else []
497
538
 
498
- _check_ok_for_serialization(polars_expr=pl_expr_list, method_name=effective_method_name,
499
- group_expr=group_expr_list)
539
+ _check_ok_for_serialization(
540
+ polars_expr=pl_expr_list, method_name=effective_method_name, group_expr=group_expr_list
541
+ )
500
542
 
501
543
  current_kwargs_expr = kwargs_expr if kwargs_expr is not None else {}
502
544
  result_lazyframe_or_expr: Any
@@ -508,22 +550,27 @@ class FlowFrame:
508
550
  target_obj = getattr(self.data, effective_method_name)(*group_expr_list, **group_kwargs)
509
551
  if not pl_expr_list:
510
552
  raise ValueError(
511
- "Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback.")
553
+ "Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback."
554
+ )
512
555
  result_lazyframe_or_expr = target_obj.agg(*pl_expr_list, **current_kwargs_expr)
513
556
  elif effective_method_name:
514
- result_lazyframe_or_expr = getattr(self.data, effective_method_name)(*pl_expr_list,
515
- **current_kwargs_expr)
557
+ result_lazyframe_or_expr = getattr(self.data, effective_method_name)(
558
+ *pl_expr_list, **current_kwargs_expr
559
+ )
516
560
  else:
517
561
  raise ValueError(
518
- "Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback.")
562
+ "Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback."
563
+ )
519
564
  try:
520
565
  if isinstance(result_lazyframe_or_expr, pl.LazyFrame):
521
- serialized_value_for_code = result_lazyframe_or_expr.serialize(format='json')
522
- polars_code_for_node = "\n".join([
523
- f"serialized_value = r'''{serialized_value_for_code}'''",
524
- "buffer = BytesIO(serialized_value.encode('utf-8'))",
525
- "output_df = pl.LazyFrame.deserialize(buffer, format='json')",
526
- ])
566
+ serialized_value_for_code = result_lazyframe_or_expr.serialize(format="json")
567
+ polars_code_for_node = "\n".join(
568
+ [
569
+ f"serialized_value = r'''{serialized_value_for_code}'''",
570
+ "buffer = BytesIO(serialized_value.encode('utf-8'))",
571
+ "output_df = pl.LazyFrame.deserialize(buffer, format='json')",
572
+ ]
573
+ )
527
574
  logger.warning(
528
575
  f"Transformation '{effective_method_name}' uses non-serializable elements. "
529
576
  "Falling back to serializing the resulting Polars LazyFrame object."
@@ -556,18 +603,18 @@ class FlowFrame:
556
603
  self.flow_graph.add_polars_code(polars_code_settings)
557
604
 
558
605
  def join(
559
- self,
560
- other,
561
- on: List[str | Column] | str | Column = None,
562
- how: str = "inner",
563
- left_on: List[str | Column] | str | Column = None,
564
- right_on: List[str | Column] | str | Column = None,
565
- suffix: str = "_right",
566
- validate: str = None,
567
- nulls_equal: bool = False,
568
- coalesce: bool = None,
569
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
570
- description: str = None,
606
+ self,
607
+ other,
608
+ on: list[str | Column] | str | Column = None,
609
+ how: str = "inner",
610
+ left_on: list[str | Column] | str | Column = None,
611
+ right_on: list[str | Column] | str | Column = None,
612
+ suffix: str = "_right",
613
+ validate: str = None,
614
+ nulls_equal: bool = False,
615
+ coalesce: bool = None,
616
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
617
+ description: str = None,
571
618
  ) -> "FlowFrame":
572
619
  """
573
620
  Add a join operation to the Logical Plan.
@@ -613,9 +660,7 @@ class FlowFrame:
613
660
  New FlowFrame with join operation applied.
614
661
  """
615
662
  # Step 1: Determine if we need to use Polars code
616
- use_polars_code = self._should_use_polars_code_for_join(
617
- maintain_order, coalesce, nulls_equal, validate, suffix
618
- )
663
+ use_polars_code = self._should_use_polars_code_for_join(maintain_order, coalesce, nulls_equal, validate, suffix)
619
664
  # Step 2: Ensure both FlowFrames are in the same graph
620
665
  self._ensure_same_graph(other)
621
666
 
@@ -623,12 +668,9 @@ class FlowFrame:
623
668
  new_node_id = generate_node_id()
624
669
 
625
670
  # Step 4: Parse and validate join columns
626
- left_columns, right_columns = self._parse_join_columns(
627
- on, left_on, right_on, how
628
- )
629
-
671
+ left_columns, right_columns = self._parse_join_columns(on, left_on, right_on, how)
630
672
  # Step 5: Validate column lists have same length (except for cross join)
631
- if how != 'cross' and left_columns is not None and right_columns is not None:
673
+ if how != "cross" and left_columns is not None and right_columns is not None:
632
674
  if len(left_columns) != len(right_columns):
633
675
  raise ValueError(
634
676
  f"Length mismatch: left columns ({len(left_columns)}) != right columns ({len(right_columns)})"
@@ -636,42 +678,46 @@ class FlowFrame:
636
678
 
637
679
  # Step 6: Create join mappings if not using Polars code
638
680
  join_mappings = None
639
- if not use_polars_code and how != 'cross':
640
- join_mappings, use_polars_code = _create_join_mappings(
641
- left_columns or [], right_columns or []
642
- )
681
+ if not use_polars_code and how != "cross":
682
+ join_mappings, use_polars_code = _create_join_mappings(left_columns or [], right_columns or [])
643
683
 
644
684
  # Step 7: Execute join based on approach
645
- if use_polars_code or suffix != '_right':
685
+ if use_polars_code or suffix != "_right":
646
686
  return self._execute_polars_code_join(
647
- other, new_node_id, on, left_on, right_on, left_columns, right_columns,
648
- how, suffix, validate, nulls_equal, coalesce, maintain_order, description
649
- )
650
- elif join_mappings or how == 'cross':
651
- return self._execute_native_join(
652
- other, new_node_id, join_mappings, how, description
687
+ other,
688
+ new_node_id,
689
+ on,
690
+ left_on,
691
+ right_on,
692
+ left_columns,
693
+ right_columns,
694
+ how,
695
+ suffix,
696
+ validate,
697
+ nulls_equal,
698
+ coalesce,
699
+ maintain_order,
700
+ description,
653
701
  )
702
+ elif join_mappings or how == "cross":
703
+ return self._execute_native_join(other, new_node_id, join_mappings, how, description)
654
704
  else:
655
705
  raise ValueError("Could not execute join")
656
706
 
657
- def _should_use_polars_code_for_join(
658
- self, maintain_order, coalesce, nulls_equal, validate, suffix
659
- ) -> bool:
707
+ def _should_use_polars_code_for_join(self, maintain_order, coalesce, nulls_equal, validate, suffix) -> bool:
660
708
  """Determine if we should use Polars code instead of native join."""
661
709
  return not (
662
- maintain_order is None and
663
- coalesce is None and
664
- nulls_equal is False and
665
- validate is None and
666
- suffix == '_right'
710
+ maintain_order is None
711
+ and coalesce is None
712
+ and nulls_equal is False
713
+ and validate is None
714
+ and suffix == "_right"
667
715
  )
668
716
 
669
717
  def _ensure_same_graph(self, other: "FlowFrame") -> None:
670
718
  """Ensure both FlowFrames are in the same graph, combining if necessary."""
671
719
  if self.flow_graph.flow_id != other.flow_graph.flow_id:
672
- combined_graph, node_mappings = combine_flow_graphs_with_mapping(
673
- self.flow_graph, other.flow_graph
674
- )
720
+ combined_graph, node_mappings = combine_flow_graphs_with_mapping(self.flow_graph, other.flow_graph)
675
721
 
676
722
  new_self_node_id = node_mappings.get((self.flow_graph.flow_id, self.node_id), None)
677
723
  new_other_node_id = node_mappings.get((other.flow_graph.flow_id, other.node_id), None)
@@ -686,19 +732,19 @@ class FlowFrame:
686
732
  node_id_data["c"] = node_id_data["c"] + len(combined_graph.nodes)
687
733
 
688
734
  def _parse_join_columns(
689
- self,
690
- on: List[str | Column] | str | Column,
691
- left_on: List[str | Column] | str | Column,
692
- right_on: List[str | Column] | str | Column,
693
- how: str
694
- ) -> tuple[List[str] | None, List[str] | None]:
735
+ self,
736
+ on: list[str | Column] | str | Column,
737
+ left_on: list[str | Column] | str | Column,
738
+ right_on: list[str | Column] | str | Column,
739
+ how: str,
740
+ ) -> tuple[list[str] | None, list[str] | None]:
695
741
  """Parse and validate join column specifications."""
696
742
  if on is not None:
697
743
  left_columns = right_columns = _normalize_columns_to_list(on)
698
744
  elif left_on is not None and right_on is not None:
699
745
  left_columns = _normalize_columns_to_list(left_on)
700
746
  right_columns = _normalize_columns_to_list(right_on)
701
- elif how == 'cross' and left_on is None and right_on is None and on is None:
747
+ elif how == "cross" and left_on is None and right_on is None and on is None:
702
748
  left_columns = None
703
749
  right_columns = None
704
750
  else:
@@ -707,37 +753,43 @@ class FlowFrame:
707
753
  return left_columns, right_columns
708
754
 
709
755
  def _execute_polars_code_join(
710
- self,
711
- other: "FlowFrame",
712
- new_node_id: int,
713
- on: List[str | Column] | str | Column,
714
- left_on: List[str | Column] | str | Column,
715
- right_on: List[str | Column] | str | Column,
716
- left_columns: List[str] | None,
717
- right_columns: List[str] | None,
718
- how: str,
719
- suffix: str,
720
- validate: str,
721
- nulls_equal: bool,
722
- coalesce: bool,
723
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
724
- description: str,
756
+ self,
757
+ other: "FlowFrame",
758
+ new_node_id: int,
759
+ on: list[str | Column] | str | Column,
760
+ left_on: list[str | Column] | str | Column,
761
+ right_on: list[str | Column] | str | Column,
762
+ left_columns: list[str] | None,
763
+ right_columns: list[str] | None,
764
+ how: str,
765
+ suffix: str,
766
+ validate: str,
767
+ nulls_equal: bool,
768
+ coalesce: bool,
769
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
770
+ description: str,
725
771
  ) -> "FlowFrame":
726
772
  """Execute join using Polars code approach."""
727
773
  # Build the code arguments
728
774
  code_kwargs = self._build_polars_join_kwargs(
729
- on, left_on, right_on, left_columns, right_columns,
730
- how, suffix, validate, nulls_equal, coalesce, maintain_order
775
+ on,
776
+ left_on,
777
+ right_on,
778
+ left_columns,
779
+ right_columns,
780
+ how,
781
+ suffix,
782
+ validate,
783
+ nulls_equal,
784
+ coalesce,
785
+ maintain_order,
731
786
  )
732
787
 
733
788
  kwargs_str = ", ".join(f"{k}={v}" for k, v in code_kwargs.items() if v is not None)
734
789
  code = f"input_df_1.join({kwargs_str})"
735
790
 
736
791
  # Add the Polars code node
737
- self._add_polars_code(
738
- new_node_id, code, description,
739
- depending_on_ids=[self.node_id, other.node_id]
740
- )
792
+ self._add_polars_code(new_node_id, code, description, depending_on_ids=[self.node_id, other.node_id])
741
793
 
742
794
  # Add connections
743
795
  self._add_connection(self.node_id, new_node_id, "main")
@@ -752,28 +804,29 @@ class FlowFrame:
752
804
  )
753
805
 
754
806
  def _build_polars_join_kwargs(
755
- self,
756
- on: List[str | Column] | str | Column,
757
- left_on: List[str | Column] | str | Column,
758
- right_on: List[str | Column] | str | Column,
759
- left_columns: List[str] | None,
760
- right_columns: List[str] | None,
761
- how: str,
762
- suffix: str,
763
- validate: str,
764
- nulls_equal: bool,
765
- coalesce: bool,
766
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
807
+ self,
808
+ on: list[str | Column] | str | Column,
809
+ left_on: list[str | Column] | str | Column,
810
+ right_on: list[str | Column] | str | Column,
811
+ left_columns: list[str] | None,
812
+ right_columns: list[str] | None,
813
+ how: str,
814
+ suffix: str,
815
+ validate: str,
816
+ nulls_equal: bool,
817
+ coalesce: bool,
818
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
767
819
  ) -> dict:
768
820
  """Build kwargs dictionary for Polars join code."""
769
821
 
770
822
  def format_column_list(cols):
771
823
  if cols is None:
772
824
  return None
773
- return "[" + ', '.join(
774
- f"'{v}'" if isinstance(v, str) else str(v)
775
- for v in _normalize_columns_to_list(cols)
776
- ) + "]"
825
+ return (
826
+ "["
827
+ + ", ".join(f"'{v}'" if isinstance(v, str) else str(v) for v in _normalize_columns_to_list(cols))
828
+ + "]"
829
+ )
777
830
 
778
831
  return {
779
832
  "other": "input_df_2",
@@ -785,46 +838,49 @@ class FlowFrame:
785
838
  "validate": _to_string_val(validate),
786
839
  "nulls_equal": nulls_equal,
787
840
  "coalesce": coalesce,
788
- "maintain_order": _to_string_val(maintain_order)
841
+ "maintain_order": _to_string_val(maintain_order),
789
842
  }
790
843
 
791
844
  def _execute_native_join(
792
- self,
793
- other: "FlowFrame",
794
- new_node_id: int,
795
- join_mappings: List | None,
796
- how: str,
797
- description: str,
845
+ self,
846
+ other: "FlowFrame",
847
+ new_node_id: int,
848
+ join_mappings: list | None,
849
+ how: str,
850
+ description: str,
798
851
  ) -> "FlowFrame":
799
852
  """Execute join using native FlowFile join nodes."""
800
853
  # Create select inputs for both frames
854
+
801
855
  left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
802
856
  right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
803
857
  # Create appropriate join input based on join type
804
- if how == 'cross':
858
+ if how == "cross":
805
859
  join_input = transform_schema.CrossJoinInput(
806
- left_select=left_select.renames,
860
+ left_select=transform_schema.JoinInputs(renames=left_select.renames),
807
861
  right_select=right_select.renames,
808
862
  )
863
+ join_input_manager = transform_schema.CrossJoinInputManager(join_input)
864
+
809
865
  else:
810
866
  join_input = transform_schema.JoinInput(
811
867
  join_mapping=join_mappings,
812
- left_select=left_select.renames,
868
+ left_select=transform_schema.JoinInputs(renames=left_select.renames),
813
869
  right_select=right_select.renames,
814
870
  how=how,
815
871
  )
872
+ join_input_manager = transform_schema.JoinInputManager(join_input)
816
873
 
817
874
  # Configure join input
818
- join_input.auto_rename()
819
- for right_column in right_select.renames:
875
+ for right_column in join_input_manager.right_select.renames:
820
876
  if right_column.join_key:
821
877
  right_column.keep = False
822
878
 
823
879
  # Create and add appropriate node
824
- if how == 'cross':
825
- self._add_cross_join_node(new_node_id, join_input, description, other)
880
+ if how == "cross":
881
+ self._add_cross_join_node(new_node_id, join_input_manager.to_cross_join_input(), description, other)
826
882
  else:
827
- self._add_regular_join_node(new_node_id, join_input, description, other)
883
+ self._add_regular_join_node(new_node_id, join_input_manager.to_join_input(), description, other)
828
884
 
829
885
  # Add connections
830
886
  self._add_connection(self.node_id, new_node_id, "main")
@@ -838,11 +894,11 @@ class FlowFrame:
838
894
  )
839
895
 
840
896
  def _add_cross_join_node(
841
- self,
842
- new_node_id: int,
843
- join_input: "transform_schema.CrossJoinInput",
844
- description: str,
845
- other: "FlowFrame",
897
+ self,
898
+ new_node_id: int,
899
+ join_input: "transform_schema.CrossJoinInput",
900
+ description: str,
901
+ other: "FlowFrame",
846
902
  ) -> None:
847
903
  """Add a cross join node to the graph."""
848
904
  cross_join_settings = input_schema.NodeCrossJoin(
@@ -851,18 +907,18 @@ class FlowFrame:
851
907
  cross_join_input=join_input,
852
908
  is_setup=True,
853
909
  depending_on_ids=[self.node_id, other.node_id],
854
- description=description or f"Join with cross strategy",
910
+ description=description or "Join with cross strategy",
855
911
  auto_generate_selection=True,
856
912
  verify_integrity=True,
857
913
  )
858
914
  self.flow_graph.add_cross_join(cross_join_settings)
859
915
 
860
916
  def _add_regular_join_node(
861
- self,
862
- new_node_id: int,
863
- join_input: "transform_schema.JoinInput",
864
- description: str,
865
- other: "FlowFrame",
917
+ self,
918
+ new_node_id: int,
919
+ join_input: "transform_schema.JoinInput",
920
+ description: str,
921
+ other: "FlowFrame",
866
922
  ) -> None:
867
923
  """Add a regular join node to the graph."""
868
924
  join_settings = input_schema.NodeJoin(
@@ -887,34 +943,41 @@ class FlowFrame:
887
943
  pos_y=100,
888
944
  is_setup=True,
889
945
  depending_on_id=self.node_id,
890
- description=description
946
+ description=description,
891
947
  )
892
948
  self.flow_graph.add_record_count(node_number_of_records)
893
949
  return self._create_child_frame(new_node_id)
894
950
 
895
- def rename(self, mapping: Mapping[str, str], *, strict: bool = True,
896
- description: str = None) -> "FlowFrame":
951
+ def rename(self, mapping: Mapping[str, str], *, strict: bool = True, description: str = None) -> "FlowFrame":
897
952
  """Rename columns based on a mapping or function."""
898
- return self.select([col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
899
- description=description, _keep_missing=True)
953
+ return self.select(
954
+ [col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
955
+ description=description,
956
+ _keep_missing=True,
957
+ )
900
958
 
901
- def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None, _keep_missing: bool = False) -> "FlowFrame":
959
+ def select(
960
+ self, *columns: str | Expr | Selector, description: str | None = None, _keep_missing: bool = False
961
+ ) -> "FlowFrame":
902
962
  """
903
963
  Select columns from the frame.
904
964
  """
905
965
  columns_iterable = list(_parse_inputs_as_iterable(columns))
906
966
  new_node_id = generate_node_id()
907
- if (len(columns_iterable) == 1 and isinstance(columns_iterable[0], Expr)
908
- and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"):
967
+ if (
968
+ len(columns_iterable) == 1
969
+ and isinstance(columns_iterable[0], Expr)
970
+ and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"
971
+ ):
909
972
  return self._add_number_of_records(new_node_id, description)
910
973
 
911
- all_input_expr_objects: List[Expr] = []
912
- pure_polars_expr_strings_for_select: List[str] = []
913
- collected_raw_definitions: List[str] = []
914
- selected_col_names_for_native: List[transform_schema.SelectInput] = [] # For native node
974
+ all_input_expr_objects: list[Expr] = []
975
+ pure_polars_expr_strings_for_select: list[str] = []
976
+ collected_raw_definitions: list[str] = []
977
+ selected_col_names_for_native: list[transform_schema.SelectInput] = [] # For native node
915
978
 
916
979
  can_use_native_node = True
917
- if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == '*':
980
+ if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == "*":
918
981
  effective_columns_iterable = [col(c_name) for c_name in self.columns]
919
982
  else:
920
983
  effective_columns_iterable = columns_iterable
@@ -948,13 +1011,17 @@ class FlowFrame:
948
1011
  if can_use_native_node:
949
1012
  existing_cols = self.columns
950
1013
  selected_col_names = {select_col.old_name for select_col in selected_col_names_for_native}
951
- not_selected_columns = [transform_schema.SelectInput(c, keep=_keep_missing) for c in existing_cols if
952
- c not in selected_col_names]
1014
+ not_selected_columns = [
1015
+ transform_schema.SelectInput(c, keep=_keep_missing)
1016
+ for c in existing_cols
1017
+ if c not in selected_col_names
1018
+ ]
953
1019
  selected_col_names_for_native.extend(not_selected_columns)
954
1020
  if _keep_missing:
955
1021
  lookup_selection = {_col.old_name: _col for _col in selected_col_names_for_native}
956
- selected_col_names_for_native = [lookup_selection.get(_col) for
957
- _col in existing_cols if _col in lookup_selection]
1022
+ selected_col_names_for_native = [
1023
+ lookup_selection.get(_col) for _col in existing_cols if _col in lookup_selection
1024
+ ]
958
1025
  select_settings = input_schema.NodeSelect(
959
1026
  flow_id=self.flow_graph.flow_id,
960
1027
  node_id=new_node_id,
@@ -964,7 +1031,7 @@ class FlowFrame:
964
1031
  pos_y=100,
965
1032
  is_setup=True,
966
1033
  depending_on_id=self.node_id,
967
- description=description
1034
+ description=description,
968
1035
  )
969
1036
  self.flow_graph.add_select(select_settings)
970
1037
  else:
@@ -973,23 +1040,35 @@ class FlowFrame:
973
1040
  if collected_raw_definitions:
974
1041
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
975
1042
  definitions_section = "\n\n".join(unique_raw_definitions)
976
- final_code_for_node = definitions_section + \
977
- "\#─────SPLIT─────\n\n" + \
978
- f"output_df = {polars_operation_code}"
1043
+ final_code_for_node = (
1044
+ definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
1045
+ )
979
1046
  else:
980
1047
  final_code_for_node = polars_operation_code
981
1048
 
982
- pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
983
- isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
984
- self._add_polars_code(new_node_id, final_code_for_node, description,
985
- method_name="select",
986
- convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
987
- polars_expr=pl_expressions_for_fallback)
1049
+ pl_expressions_for_fallback = [
1050
+ e.expr
1051
+ for e in all_input_expr_objects
1052
+ if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
1053
+ ]
1054
+ self._add_polars_code(
1055
+ new_node_id,
1056
+ final_code_for_node,
1057
+ description,
1058
+ method_name="select",
1059
+ convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
1060
+ polars_expr=pl_expressions_for_fallback,
1061
+ )
988
1062
 
989
1063
  return self._create_child_frame(new_node_id)
990
1064
 
991
- def filter(self, *predicates: Union[Expr, Any], flowfile_formula: Optional[str] = None,
992
- description: Optional[str] = None, **constraints: Any) -> "FlowFrame":
1065
+ def filter(
1066
+ self,
1067
+ *predicates: Expr | Any,
1068
+ flowfile_formula: str | None = None,
1069
+ description: str | None = None,
1070
+ **constraints: Any,
1071
+ ) -> "FlowFrame":
993
1072
  """
994
1073
  Filter rows based on a predicate.
995
1074
  """
@@ -998,9 +1077,9 @@ class FlowFrame:
998
1077
  available_columns = self.columns
999
1078
  new_node_id = generate_node_id()
1000
1079
  if len(predicates) > 0 or len(constraints) > 0:
1001
- all_input_expr_objects: List[Expr] = []
1002
- pure_polars_expr_strings: List[str] = []
1003
- collected_raw_definitions: List[str] = []
1080
+ all_input_expr_objects: list[Expr] = []
1081
+ pure_polars_expr_strings: list[str] = []
1082
+ collected_raw_definitions: list[str] = []
1004
1083
 
1005
1084
  processed_predicates = []
1006
1085
  for pred_item in predicates:
@@ -1029,10 +1108,11 @@ class FlowFrame:
1029
1108
  collected_raw_definitions.append(raw_defs_str)
1030
1109
 
1031
1110
  for k, v_val in constraints.items():
1032
- constraint_expr_obj = (col(k) == lit(v_val))
1111
+ constraint_expr_obj = col(k) == lit(v_val)
1033
1112
  all_input_expr_objects.append(constraint_expr_obj)
1034
1113
  pure_expr_str, raw_defs_str = _extract_expr_parts(
1035
- constraint_expr_obj) # Constraint exprs are unlikely to have defs
1114
+ constraint_expr_obj
1115
+ ) # Constraint exprs are unlikely to have defs
1036
1116
  pure_polars_expr_strings.append(f"({pure_expr_str})")
1037
1117
  if raw_defs_str and raw_defs_str not in collected_raw_definitions: # Should be rare here
1038
1118
  collected_raw_definitions.append(raw_defs_str)
@@ -1044,31 +1124,36 @@ class FlowFrame:
1044
1124
  if collected_raw_definitions:
1045
1125
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions)) # Order-preserving unique
1046
1126
  definitions_section = "\n\n".join(unique_raw_definitions)
1047
- final_code_for_node = definitions_section + \
1048
- "\#─────SPLIT─────\n\n" + \
1049
- f"output_df = {polars_operation_code}"
1127
+ final_code_for_node = (
1128
+ definitions_section + "\\#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
1129
+ )
1050
1130
  else:
1051
1131
  final_code_for_node = polars_operation_code
1052
1132
 
1053
1133
  convertable_to_code = _check_if_convertible_to_code(all_input_expr_objects)
1054
- pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
1055
- isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
1056
- self._add_polars_code(new_node_id, final_code_for_node, description, method_name="filter",
1057
- convertable_to_code=convertable_to_code,
1058
- polars_expr=pl_expressions_for_fallback)
1134
+ pl_expressions_for_fallback = [
1135
+ e.expr
1136
+ for e in all_input_expr_objects
1137
+ if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
1138
+ ]
1139
+ self._add_polars_code(
1140
+ new_node_id,
1141
+ final_code_for_node,
1142
+ description,
1143
+ method_name="filter",
1144
+ convertable_to_code=convertable_to_code,
1145
+ polars_expr=pl_expressions_for_fallback,
1146
+ )
1059
1147
  elif flowfile_formula:
1060
1148
  filter_settings = input_schema.NodeFilter(
1061
1149
  flow_id=self.flow_graph.flow_id,
1062
1150
  node_id=new_node_id,
1063
- filter_input=transform_schema.FilterInput(
1064
- advanced_filter=flowfile_formula,
1065
- filter_type="advanced"
1066
- ),
1151
+ filter_input=transform_schema.FilterInput(advanced_filter=flowfile_formula, filter_type="advanced"),
1067
1152
  pos_x=200,
1068
1153
  pos_y=150,
1069
1154
  is_setup=True,
1070
1155
  depending_on_id=self.node_id,
1071
- description=description
1156
+ description=description,
1072
1157
  )
1073
1158
  self.flow_graph.add_filter(filter_settings)
1074
1159
  else:
@@ -1077,12 +1162,7 @@ class FlowFrame:
1077
1162
 
1078
1163
  return self._create_child_frame(new_node_id)
1079
1164
 
1080
- def sink_csv(self,
1081
- file: str,
1082
- *args,
1083
- separator: str = ",",
1084
- encoding: str = "utf-8",
1085
- description: str = None):
1165
+ def sink_csv(self, file: str, *args, separator: str = ",", encoding: str = "utf-8", description: str = None):
1086
1166
  """
1087
1167
  Write the data to a CSV file.
1088
1168
 
@@ -1098,12 +1178,12 @@ class FlowFrame:
1098
1178
  return self.write_csv(file, *args, separator=separator, encoding=encoding, description=description)
1099
1179
 
1100
1180
  def write_parquet(
1101
- self,
1102
- path: str | os.PathLike,
1103
- *,
1104
- description: str = None,
1105
- convert_to_absolute_path: bool = True,
1106
- **kwargs: Any,
1181
+ self,
1182
+ path: str | os.PathLike,
1183
+ *,
1184
+ description: str = None,
1185
+ convert_to_absolute_path: bool = True,
1186
+ **kwargs: Any,
1107
1187
  ) -> "FlowFrame":
1108
1188
  """
1109
1189
  Write the data to a Parquet file. Creates a standard Output node if only
@@ -1140,16 +1220,11 @@ class FlowFrame:
1140
1220
  file_name = file_str.split(os.sep)[-1]
1141
1221
  use_polars_code = bool(kwargs.items()) or not is_path_input
1142
1222
 
1143
- output_parquet_table = input_schema.OutputParquetTable(
1144
- file_type="parquet"
1145
- )
1146
1223
  output_settings = input_schema.OutputSettings(
1147
- file_type='parquet',
1224
+ file_type="parquet",
1148
1225
  name=file_name,
1149
1226
  directory=file_str if is_path_input else str(file_str),
1150
- output_parquet_table=output_parquet_table,
1151
- output_csv_table=input_schema.OutputCsvTable(),
1152
- output_excel_table=input_schema.OutputExcelTable()
1227
+ table_settings=input_schema.OutputParquetTable(),
1153
1228
  )
1154
1229
 
1155
1230
  if is_path_input:
@@ -1166,7 +1241,7 @@ class FlowFrame:
1166
1241
  node_id=new_node_id,
1167
1242
  output_settings=output_settings,
1168
1243
  depending_on_id=self.node_id,
1169
- description=description
1244
+ description=description,
1170
1245
  )
1171
1246
  self.flow_graph.add_output(node_output)
1172
1247
  else:
@@ -1192,16 +1267,15 @@ class FlowFrame:
1192
1267
  return self._create_child_frame(new_node_id)
1193
1268
 
1194
1269
  def write_csv(
1195
- self,
1196
- file: str | os.PathLike,
1197
- *,
1198
- separator: str = ",",
1199
- encoding: str = "utf-8",
1200
- description: str = None,
1201
- convert_to_absolute_path: bool = True,
1202
- **kwargs: Any,
1270
+ self,
1271
+ file: str | os.PathLike,
1272
+ *,
1273
+ separator: str = ",",
1274
+ encoding: str = "utf-8",
1275
+ description: str = None,
1276
+ convert_to_absolute_path: bool = True,
1277
+ **kwargs: Any,
1203
1278
  ) -> "FlowFrame":
1204
-
1205
1279
  new_node_id = generate_node_id()
1206
1280
  is_path_input = isinstance(file, (str, os.PathLike))
1207
1281
  if isinstance(file, os.PathLike):
@@ -1217,13 +1291,10 @@ class FlowFrame:
1217
1291
 
1218
1292
  use_polars_code = bool(kwargs) or not is_path_input
1219
1293
  output_settings = input_schema.OutputSettings(
1220
- file_type='csv',
1294
+ file_type="csv",
1221
1295
  name=file_name,
1222
1296
  directory=file_str if is_path_input else str(file_str),
1223
- output_csv_table=input_schema.OutputCsvTable(
1224
- file_type="csv", delimiter=separator, encoding=encoding),
1225
- output_excel_table=input_schema.OutputExcelTable(),
1226
- output_parquet_table=input_schema.OutputParquetTable()
1297
+ table_settings=input_schema.OutputCsvTable(delimiter=separator, encoding=encoding),
1227
1298
  )
1228
1299
  if is_path_input:
1229
1300
  try:
@@ -1239,7 +1310,7 @@ class FlowFrame:
1239
1310
  node_id=new_node_id,
1240
1311
  output_settings=output_settings,
1241
1312
  depending_on_id=self.node_id,
1242
- description=description
1313
+ description=description,
1243
1314
  )
1244
1315
  self.flow_graph.add_output(node_output)
1245
1316
  else:
@@ -1253,9 +1324,9 @@ class FlowFrame:
1253
1324
  path_arg_repr = repr(output_settings.directory)
1254
1325
 
1255
1326
  all_kwargs_for_code = {
1256
- 'separator': separator,
1257
- 'encoding': encoding,
1258
- **kwargs # Add the extra kwargs
1327
+ "separator": separator,
1328
+ "encoding": encoding,
1329
+ **kwargs, # Add the extra kwargs
1259
1330
  }
1260
1331
  kwargs_repr = ", ".join(f"{k}={repr(v)}" for k, v in all_kwargs_for_code.items())
1261
1332
 
@@ -1269,42 +1340,47 @@ class FlowFrame:
1269
1340
 
1270
1341
  return self._create_child_frame(new_node_id)
1271
1342
 
1272
- def write_parquet_to_cloud_storage(self,
1273
- path: str,
1274
- connection_name: Optional[str] = None,
1275
- compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy",
1276
- description: Optional[str] = None,
1277
- ) -> "FlowFrame":
1343
+ def write_parquet_to_cloud_storage(
1344
+ self,
1345
+ path: str,
1346
+ connection_name: str | None = None,
1347
+ compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy",
1348
+ description: str | None = None,
1349
+ ) -> "FlowFrame":
1278
1350
  """
1279
- Write the data frame to cloud storage in Parquet format.
1280
-
1281
- Args:
1282
- path (str): The destination path in cloud storage where the Parquet file will be written.
1283
- connection_name (Optional[str], optional): The name of the storage connection
1284
- that a user can create. If None, uses the default connection. Defaults to None.
1285
- compression (Literal["snappy", "gzip", "brotli", "lz4", "zstd"], optional):
1286
- The compression algorithm to use for the Parquet file. Defaults to "snappy".
1287
- description (Optional[str], optional): Description of this operation for the ETL graph.
1288
-
1289
- Returns:
1290
- FlowFrame: A new child data frame representing the written data.
1351
+ Write the data frame to cloud storage in Parquet format.
1352
+
1353
+ Args:
1354
+ path (str): The destination path in cloud storage where the Parquet file will be written.
1355
+ connection_name (Optional[str], optional): The name of the storage connection
1356
+ that a user can create. If None, uses the default connection. Defaults to None.
1357
+ compression (Literal["snappy", "gzip", "brotli", "lz4", "zstd"], optional):
1358
+ The compression algorithm to use for the Parquet file. Defaults to "snappy".
1359
+ description (Optional[str], optional): Description of this operation for the ETL graph.
1360
+
1361
+ Returns:
1362
+ FlowFrame: A new child data frame representing the written data.
1291
1363
  """
1292
1364
 
1293
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1294
- connection_name=connection_name,
1295
- depends_on_node_id=self.node_id,
1296
- parquet_compression=compression,
1297
- file_format="parquet",
1298
- description=description)
1365
+ new_node_id = add_write_ff_to_cloud_storage(
1366
+ path,
1367
+ flow_graph=self.flow_graph,
1368
+ connection_name=connection_name,
1369
+ depends_on_node_id=self.node_id,
1370
+ parquet_compression=compression,
1371
+ file_format="parquet",
1372
+ description=description,
1373
+ )
1299
1374
  return self._create_child_frame(new_node_id)
1300
1375
 
1301
- def write_csv_to_cloud_storage(self,
1302
- path: str,
1303
- connection_name: Optional[str] = None,
1304
- delimiter: str = ";",
1305
- encoding: CsvEncoding = "utf8",
1306
- description: Optional[str] = None,
1307
- ) -> "FlowFrame":
1376
+ def write_csv_to_cloud_storage(
1377
+ self,
1378
+ path: str,
1379
+ connection_name: str | None = None,
1380
+ delimiter: str = ";",
1381
+ encoding: CsvEncoding = "utf8",
1382
+ description: str | None = None,
1383
+ ) -> "FlowFrame":
1308
1384
  """
1309
1385
  Write the data frame to cloud storage in CSV format.
1310
1386
 
@@ -1321,21 +1397,25 @@ class FlowFrame:
1321
1397
  Returns:
1322
1398
  FlowFrame: A new child data frame representing the written data.
1323
1399
  """
1324
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1325
- connection_name=connection_name,
1326
- depends_on_node_id=self.node_id,
1327
- csv_delimiter=delimiter,
1328
- csv_encoding=encoding,
1329
- file_format="csv",
1330
- description=description)
1400
+ new_node_id = add_write_ff_to_cloud_storage(
1401
+ path,
1402
+ flow_graph=self.flow_graph,
1403
+ connection_name=connection_name,
1404
+ depends_on_node_id=self.node_id,
1405
+ csv_delimiter=delimiter,
1406
+ csv_encoding=encoding,
1407
+ file_format="csv",
1408
+ description=description,
1409
+ )
1331
1410
  return self._create_child_frame(new_node_id)
1332
1411
 
1333
- def write_delta(self,
1334
- path: str,
1335
- connection_name: Optional[str] = None,
1336
- write_mode: Literal["overwrite", "append"] = "overwrite",
1337
- description: Optional[str] = None,
1338
- ) -> "FlowFrame":
1412
+ def write_delta(
1413
+ self,
1414
+ path: str,
1415
+ connection_name: str | None = None,
1416
+ write_mode: Literal["overwrite", "append"] = "overwrite",
1417
+ description: str | None = None,
1418
+ ) -> "FlowFrame":
1339
1419
  """
1340
1420
  Write the data frame to cloud storage in Delta Lake format.
1341
1421
 
@@ -1349,19 +1429,23 @@ class FlowFrame:
1349
1429
  Returns:
1350
1430
  FlowFrame: A new child data frame representing the written data.
1351
1431
  """
1352
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1353
- connection_name=connection_name,
1354
- depends_on_node_id=self.node_id,
1355
- write_mode=write_mode,
1356
- file_format="delta",
1357
- description=description)
1432
+ new_node_id = add_write_ff_to_cloud_storage(
1433
+ path,
1434
+ flow_graph=self.flow_graph,
1435
+ connection_name=connection_name,
1436
+ depends_on_node_id=self.node_id,
1437
+ write_mode=write_mode,
1438
+ file_format="delta",
1439
+ description=description,
1440
+ )
1358
1441
  return self._create_child_frame(new_node_id)
1359
1442
 
1360
- def write_json_to_cloud_storage(self,
1361
- path: str,
1362
- connection_name: Optional[str] = None,
1363
- description: Optional[str] = None,
1364
- ) -> "FlowFrame":
1443
+ def write_json_to_cloud_storage(
1444
+ self,
1445
+ path: str,
1446
+ connection_name: str | None = None,
1447
+ description: str | None = None,
1448
+ ) -> "FlowFrame":
1365
1449
  """
1366
1450
  Write the data frame to cloud storage in JSON format.
1367
1451
 
@@ -1373,11 +1457,14 @@ class FlowFrame:
1373
1457
  Returns:
1374
1458
  FlowFrame: A new child data frame representing the written data.
1375
1459
  """
1376
- new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1377
- connection_name=connection_name,
1378
- depends_on_node_id=self.node_id,
1379
- file_format="json",
1380
- description=description)
1460
+ new_node_id = add_write_ff_to_cloud_storage(
1461
+ path,
1462
+ flow_graph=self.flow_graph,
1463
+ connection_name=connection_name,
1464
+ depends_on_node_id=self.node_id,
1465
+ file_format="json",
1466
+ description=description,
1467
+ )
1381
1468
  return self._create_child_frame(new_node_id)
1382
1469
 
1383
1470
  def group_by(self, *by, description: str = None, maintain_order=False, **named_by) -> GroupByFrame:
@@ -1414,7 +1501,10 @@ class FlowFrame:
1414
1501
  # Create a GroupByFrame
1415
1502
  return GroupByFrame(
1416
1503
  node_id=new_node_id,
1417
- parent_frame=self, by_cols=by_cols, maintain_order=maintain_order, description=description
1504
+ parent_frame=self,
1505
+ by_cols=by_cols,
1506
+ maintain_order=maintain_order,
1507
+ description=description,
1418
1508
  )
1419
1509
 
1420
1510
  def to_graph(self):
@@ -1422,7 +1512,7 @@ class FlowFrame:
1422
1512
  return self.flow_graph
1423
1513
 
1424
1514
  def save_graph(self, file_path: str, auto_arrange: bool = True):
1425
- """Save the graph """
1515
+ """Save the graph"""
1426
1516
  if auto_arrange:
1427
1517
  self.flow_graph.apply_layout()
1428
1518
  self.flow_graph.save_flow(file_path)
@@ -1435,23 +1525,27 @@ class FlowFrame:
1435
1525
 
1436
1526
  def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> "FlowFrame":
1437
1527
  new_node_id = generate_node_id()
1438
- function_settings = (
1439
- input_schema.NodeFormula(flow_id=self.flow_graph.flow_id, node_id=new_node_id, depending_on_id=self.node_id,
1440
- function=transform_schema.FunctionInput(
1441
- function=flowfile_formula,
1442
- field=transform_schema.FieldInput(name=output_column_name, data_type='Auto')),
1443
- description=description))
1528
+ function_settings = input_schema.NodeFormula(
1529
+ flow_id=self.flow_graph.flow_id,
1530
+ node_id=new_node_id,
1531
+ depending_on_id=self.node_id,
1532
+ function=transform_schema.FunctionInput(
1533
+ function=flowfile_formula, field=transform_schema.FieldInput(name=output_column_name, data_type="Auto")
1534
+ ),
1535
+ description=description,
1536
+ )
1444
1537
  self.flow_graph.add_formula(function_settings)
1445
1538
  return self._create_child_frame(new_node_id)
1446
1539
 
1447
1540
  def head(self, n: int, description: str = None):
1448
1541
  new_node_id = generate_node_id()
1449
- settings = input_schema.NodeSample(flow_id=self.flow_graph.flow_id,
1450
- node_id=new_node_id,
1451
- depending_on_id=self.node_id,
1452
- sample_size=n,
1453
- description=description
1454
- )
1542
+ settings = input_schema.NodeSample(
1543
+ flow_id=self.flow_graph.flow_id,
1544
+ node_id=new_node_id,
1545
+ depending_on_id=self.node_id,
1546
+ sample_size=n,
1547
+ description=description,
1548
+ )
1455
1549
  self.flow_graph.add_sample(settings)
1456
1550
  return self._create_child_frame(new_node_id)
1457
1551
 
@@ -1467,16 +1561,18 @@ class FlowFrame:
1467
1561
  def get_node_settings(self) -> FlowNode:
1468
1562
  return self.flow_graph.get_node(self.node_id)
1469
1563
 
1470
- def pivot(self,
1471
- on: str | list[str],
1472
- *,
1473
- index: str | list[str] | None = None,
1474
- values: str | list[str] | None = None,
1475
- aggregate_function: str | None = "first",
1476
- maintain_order: bool = True,
1477
- sort_columns: bool = False,
1478
- separator: str = '_',
1479
- description: str = None) -> "FlowFrame":
1564
+ def pivot(
1565
+ self,
1566
+ on: str | list[str],
1567
+ *,
1568
+ index: str | list[str] | None = None,
1569
+ values: str | list[str] | None = None,
1570
+ aggregate_function: str | None = "first",
1571
+ maintain_order: bool = True,
1572
+ sort_columns: bool = False,
1573
+ separator: str = "_",
1574
+ description: str = None,
1575
+ ) -> "FlowFrame":
1480
1576
  """
1481
1577
  Pivot a DataFrame from long to wide format.
1482
1578
 
@@ -1525,17 +1621,14 @@ class FlowFrame:
1525
1621
  value_col = values if isinstance(values, str) else values[0]
1526
1622
 
1527
1623
  # Set valid aggregations
1528
- valid_aggs = ['first', 'last', 'min', 'max', 'sum', 'mean', 'median', 'count']
1624
+ valid_aggs = ["first", "last", "min", "max", "sum", "mean", "median", "count"]
1529
1625
  if aggregate_function not in valid_aggs:
1530
- raise ValueError(f"Invalid aggregate_function: {aggregate_function}. "
1531
- f"Must be one of: {', '.join(valid_aggs)}")
1626
+ raise ValueError(
1627
+ f"Invalid aggregate_function: {aggregate_function}. " f"Must be one of: {', '.join(valid_aggs)}"
1628
+ )
1532
1629
 
1533
1630
  # Check if we can use the native implementation
1534
- can_use_native = (
1535
- isinstance(on_value, str) and
1536
- isinstance(value_col, str) and
1537
- aggregate_function in valid_aggs
1538
- )
1631
+ can_use_native = isinstance(on_value, str) and isinstance(value_col, str) and aggregate_function in valid_aggs
1539
1632
 
1540
1633
  if can_use_native:
1541
1634
  # Create pivot input for native implementation
@@ -1543,7 +1636,7 @@ class FlowFrame:
1543
1636
  index_columns=index_columns,
1544
1637
  pivot_column=on_value,
1545
1638
  value_col=value_col,
1546
- aggregations=[aggregate_function]
1639
+ aggregations=[aggregate_function],
1547
1640
  )
1548
1641
 
1549
1642
  # Create node settings
@@ -1555,7 +1648,7 @@ class FlowFrame:
1555
1648
  pos_y=150,
1556
1649
  is_setup=True,
1557
1650
  depending_on_id=self.node_id,
1558
- description=description or f"Pivot {value_col} by {on_value}"
1651
+ description=description or f"Pivot {value_col} by {on_value}",
1559
1652
  )
1560
1653
 
1561
1654
  # Add to graph using native implementation
@@ -1583,8 +1676,9 @@ class FlowFrame:
1583
1676
  # Generate description if not provided
1584
1677
  if description is None:
1585
1678
  on_str = on if isinstance(on, str) else ", ".join(on if isinstance(on, list) else [on])
1586
- values_str = values if isinstance(values, str) else ", ".join(
1587
- values if isinstance(values, list) else [values])
1679
+ values_str = (
1680
+ values if isinstance(values, str) else ", ".join(values if isinstance(values, list) else [values])
1681
+ )
1588
1682
  description = f"Pivot {values_str} by {on_str}"
1589
1683
 
1590
1684
  # Add polars code node
@@ -1592,13 +1686,15 @@ class FlowFrame:
1592
1686
 
1593
1687
  return self._create_child_frame(new_node_id)
1594
1688
 
1595
- def unpivot(self,
1596
- on: list[str | Selector] | str | None | Selector = None,
1597
- *,
1598
- index: list[str] | str | None = None,
1599
- variable_name: str = "variable",
1600
- value_name: str = "value",
1601
- description: str = None) -> "FlowFrame":
1689
+ def unpivot(
1690
+ self,
1691
+ on: list[str | Selector] | str | None | Selector = None,
1692
+ *,
1693
+ index: list[str] | str | None = None,
1694
+ variable_name: str = "variable",
1695
+ value_name: str = "value",
1696
+ description: str = None,
1697
+ ) -> "FlowFrame":
1602
1698
  """
1603
1699
  Unpivot a DataFrame from wide to long format.
1604
1700
 
@@ -1645,13 +1741,13 @@ class FlowFrame:
1645
1741
  value_columns = [on]
1646
1742
 
1647
1743
  if can_use_native:
1648
- can_use_native = (variable_name == "variable" and value_name == "value")
1744
+ can_use_native = variable_name == "variable" and value_name == "value"
1649
1745
  if can_use_native:
1650
1746
  unpivot_input = transform_schema.UnpivotInput(
1651
1747
  index_columns=index_columns,
1652
1748
  value_columns=value_columns,
1653
1749
  data_type_selector=None,
1654
- data_type_selector_mode='column'
1750
+ data_type_selector_mode="column",
1655
1751
  )
1656
1752
 
1657
1753
  # Create node settings
@@ -1663,7 +1759,7 @@ class FlowFrame:
1663
1759
  pos_y=150,
1664
1760
  is_setup=True,
1665
1761
  depending_on_id=self.node_id,
1666
- description=description or "Unpivot data from wide to long format"
1762
+ description=description or "Unpivot data from wide to long format",
1667
1763
  )
1668
1764
 
1669
1765
  # Add to graph using native implementation
@@ -1699,7 +1795,7 @@ class FlowFrame:
1699
1795
 
1700
1796
  def concat(
1701
1797
  self,
1702
- other: "FlowFrame" | List["FlowFrame"],
1798
+ other: "FlowFrame" | list["FlowFrame"],
1703
1799
  how: str = "vertical",
1704
1800
  rechunk: bool = False,
1705
1801
  parallel: bool = True,
@@ -1800,14 +1896,11 @@ class FlowFrame:
1800
1896
 
1801
1897
  # Add polars code node with dependencies on all input frames
1802
1898
  depending_on_ids = [self.node_id] + [frame.node_id for frame in others]
1803
- self._add_polars_code(
1804
- new_node_id, code, description, depending_on_ids=depending_on_ids
1805
- )
1899
+ self._add_polars_code(new_node_id, code, description, depending_on_ids=depending_on_ids)
1806
1900
  # Add connections to ensure all frames are available
1807
1901
  self._add_connection(self.node_id, new_node_id, "main")
1808
1902
 
1809
1903
  for other_frame in others:
1810
-
1811
1904
  other_frame.flow_graph = combined_graph
1812
1905
  other_frame._add_connection(other_frame.node_id, new_node_id, "main")
1813
1906
  # Create and return the new frame
@@ -1819,8 +1912,8 @@ class FlowFrame:
1819
1912
  )
1820
1913
 
1821
1914
  def _detect_cum_count_record_id(
1822
- self, expr: Any, new_node_id: int, description: Optional[str] = None
1823
- ) -> Tuple[bool, Optional["FlowFrame"]]:
1915
+ self, expr: Any, new_node_id: int, description: str | None = None
1916
+ ) -> tuple[bool, Optional["FlowFrame"]]:
1824
1917
  """
1825
1918
  Detect if the expression is a cum_count operation and use record_id if possible.
1826
1919
 
@@ -1841,8 +1934,12 @@ class FlowFrame:
1841
1934
  - Optional[FlowFrame]: The new FlowFrame if detection was successful, otherwise None
1842
1935
  """
1843
1936
  # Check if this is a cum_count operation
1844
- if (not isinstance(expr, Expr) or not expr._repr_str
1845
- or "cum_count" not in expr._repr_str or not hasattr(expr, "name")):
1937
+ if (
1938
+ not isinstance(expr, Expr)
1939
+ or not expr._repr_str
1940
+ or "cum_count" not in expr._repr_str
1941
+ or not hasattr(expr, "name")
1942
+ ):
1846
1943
  return False, None
1847
1944
 
1848
1945
  # Extract the output name
@@ -1929,24 +2026,24 @@ class FlowFrame:
1929
2026
  return False, None
1930
2027
 
1931
2028
  def with_columns(
1932
- self,
1933
- *exprs: Union[Expr, Iterable[Expr], Any], # Allow Any for implicit lit conversion
1934
- flowfile_formulas: Optional[List[str]] = None,
1935
- output_column_names: Optional[List[str]] = None,
1936
- description: Optional[str] = None,
1937
- **named_exprs: Union[Expr, Any], # Allow Any for implicit lit conversion
2029
+ self,
2030
+ *exprs: Expr | Iterable[Expr] | Any, # Allow Any for implicit lit conversion
2031
+ flowfile_formulas: list[str] | None = None,
2032
+ output_column_names: list[str] | None = None,
2033
+ description: str | None = None,
2034
+ **named_exprs: Expr | Any, # Allow Any for implicit lit conversion
1938
2035
  ) -> "FlowFrame":
1939
2036
  """
1940
2037
  Add or replace columns in the DataFrame.
1941
2038
  """
1942
2039
  new_node_id = generate_node_id()
1943
2040
 
1944
- all_input_expr_objects: List[Expr] = []
1945
- pure_polars_expr_strings_for_wc: List[str] = []
1946
- collected_raw_definitions: List[str] = []
2041
+ all_input_expr_objects: list[Expr] = []
2042
+ pure_polars_expr_strings_for_wc: list[str] = []
2043
+ collected_raw_definitions: list[str] = []
1947
2044
  has_exprs_or_named_exprs = bool(exprs or named_exprs)
1948
2045
  if has_exprs_or_named_exprs:
1949
- actual_exprs_to_process: List[Expr] = []
2046
+ actual_exprs_to_process: list[Expr] = []
1950
2047
  temp_exprs_iterable = list(_parse_inputs_as_iterable(exprs))
1951
2048
 
1952
2049
  for item in temp_exprs_iterable:
@@ -1977,38 +2074,43 @@ class FlowFrame:
1977
2074
  if collected_raw_definitions:
1978
2075
  unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
1979
2076
  definitions_section = "\n\n".join(unique_raw_definitions)
1980
- final_code_for_node = definitions_section + \
1981
- "\n#─────SPLIT─────\n\n" + \
1982
- f"output_df = {polars_operation_code}"
2077
+ final_code_for_node = (
2078
+ definitions_section + "\n#─────SPLIT─────\n\n" + f"output_df = {polars_operation_code}"
2079
+ )
1983
2080
  else:
1984
2081
  final_code_for_node = polars_operation_code
1985
2082
 
1986
- pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
1987
- isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
1988
- self._add_polars_code(new_node_id, final_code_for_node, description, method_name='with_columns',
1989
- convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
1990
- polars_expr=pl_expressions_for_fallback)
2083
+ pl_expressions_for_fallback = [
2084
+ e.expr
2085
+ for e in all_input_expr_objects
2086
+ if isinstance(e, Expr) and hasattr(e, "expr") and e.expr is not None
2087
+ ]
2088
+ self._add_polars_code(
2089
+ new_node_id,
2090
+ final_code_for_node,
2091
+ description,
2092
+ method_name="with_columns",
2093
+ convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
2094
+ polars_expr=pl_expressions_for_fallback,
2095
+ )
1991
2096
  return self._create_child_frame(new_node_id)
1992
2097
 
1993
2098
  elif flowfile_formulas is not None and output_column_names is not None:
1994
-
1995
2099
  if len(output_column_names) != len(flowfile_formulas):
1996
- raise ValueError(
1997
- "Length of both the formulas and the output columns names must be identical"
1998
- )
2100
+ raise ValueError("Length of both the formulas and the output columns names must be identical")
1999
2101
 
2000
2102
  if len(flowfile_formulas) == 1:
2001
2103
  return self._with_flowfile_formula(flowfile_formulas[0], output_column_names[0], description)
2002
2104
  ff = self
2003
- for i, (flowfile_formula, output_column_name) in enumerate(zip(flowfile_formulas, output_column_names)):
2105
+ for i, (flowfile_formula, output_column_name) in enumerate(
2106
+ zip(flowfile_formulas, output_column_names, strict=False)
2107
+ ):
2004
2108
  ff = ff._with_flowfile_formula(flowfile_formula, output_column_name, f"{i}: {description}")
2005
2109
  return ff
2006
2110
  else:
2007
2111
  raise ValueError("Either exprs/named_exprs or flowfile_formulas with output_column_names must be provided")
2008
2112
 
2009
- def with_row_index(
2010
- self, name: str = "index", offset: int = 0, description: str = None
2011
- ) -> "FlowFrame":
2113
+ def with_row_index(self, name: str = "index", offset: int = 0, description: str = None) -> "FlowFrame":
2012
2114
  """
2013
2115
  Add a row index as the first column in the DataFrame.
2014
2116
 
@@ -2055,9 +2157,7 @@ class FlowFrame:
2055
2157
  else:
2056
2158
  # Use the polars code approach for other cases
2057
2159
  code = f"input_df.with_row_index(name='{name}', offset={offset})"
2058
- self._add_polars_code(
2059
- new_node_id, code, description or f"Add row index column '{name}'"
2060
- )
2160
+ self._add_polars_code(new_node_id, code, description or f"Add row index column '{name}'")
2061
2161
 
2062
2162
  return self._create_child_frame(new_node_id)
2063
2163
 
@@ -2091,9 +2191,7 @@ class FlowFrame:
2091
2191
  all_columns = []
2092
2192
 
2093
2193
  if isinstance(columns, (list, tuple)):
2094
- all_columns.extend(
2095
- [col.column_name if isinstance(col, Column) else col for col in columns]
2096
- )
2194
+ all_columns.extend([col.column_name if isinstance(col, Column) else col for col in columns])
2097
2195
  else:
2098
2196
  all_columns.append(columns.column_name if isinstance(columns, Column) else columns)
2099
2197
 
@@ -2102,10 +2200,9 @@ class FlowFrame:
2102
2200
  all_columns.append(col.column_name if isinstance(col, Column) else col)
2103
2201
 
2104
2202
  if len(all_columns) == 1:
2105
-
2106
2203
  columns_str = stringify_values(all_columns[0])
2107
2204
  else:
2108
- columns_str = "[" + ", ".join([ stringify_values(col) for col in all_columns]) + "]"
2205
+ columns_str = "[" + ", ".join([stringify_values(col) for col in all_columns]) + "]"
2109
2206
 
2110
2207
  code = f"""
2111
2208
  # Explode columns into multiple rows
@@ -2120,24 +2217,25 @@ class FlowFrame:
2120
2217
 
2121
2218
  return self._create_child_frame(new_node_id)
2122
2219
 
2123
- def fuzzy_match(self,
2124
- other: "FlowFrame",
2125
- fuzzy_mappings: List[FuzzyMapping],
2126
- description: str = None,
2127
- ) -> "FlowFrame":
2220
+ def fuzzy_match(
2221
+ self,
2222
+ other: "FlowFrame",
2223
+ fuzzy_mappings: list[FuzzyMapping],
2224
+ description: str = None,
2225
+ ) -> "FlowFrame":
2128
2226
  self._ensure_same_graph(other)
2129
2227
 
2130
2228
  # Step 3: Generate new node ID
2131
2229
  new_node_id = generate_node_id()
2132
- node_fuzzy_match = input_schema.NodeFuzzyMatch(flow_id=self.flow_graph.flow_id,
2133
- node_id=new_node_id,
2134
- join_input=
2135
- transform_schema.FuzzyMatchInput(join_mapping=fuzzy_mappings,
2136
- left_select=self.columns,
2137
- right_select=other.columns),
2138
- description=description or "Fuzzy match between two FlowFrames",
2139
- depending_on_ids=[self.node_id, other.node_id],
2140
- )
2230
+ node_fuzzy_match = input_schema.NodeFuzzyMatch(
2231
+ flow_id=self.flow_graph.flow_id,
2232
+ node_id=new_node_id,
2233
+ join_input=transform_schema.FuzzyMatchInput(
2234
+ join_mapping=fuzzy_mappings, left_select=self.columns, right_select=other.columns
2235
+ ),
2236
+ description=description or "Fuzzy match between two FlowFrames",
2237
+ depending_on_ids=[self.node_id, other.node_id],
2238
+ )
2141
2239
  self.flow_graph.add_fuzzy_match(node_fuzzy_match)
2142
2240
  self._add_connection(self.node_id, new_node_id, "main")
2143
2241
  other._add_connection(other.node_id, new_node_id, "right")
@@ -2216,7 +2314,7 @@ class FlowFrame:
2216
2314
 
2217
2315
  def unique(
2218
2316
  self,
2219
- subset: Union[str, "Expr", List[ Union[ str, "Expr"]]] = None,
2317
+ subset: Union[str, "Expr", list[Union[str, "Expr"]]] = None,
2220
2318
  *,
2221
2319
  keep: Literal["first", "last", "any", "none"] = "any",
2222
2320
  maintain_order: bool = False,
@@ -2273,17 +2371,11 @@ class FlowFrame:
2273
2371
  break
2274
2372
 
2275
2373
  # Determine if we can use the native implementation
2276
- can_use_native = (
2277
- can_use_native
2278
- and keep in ["any", "first", "last", "none"]
2279
- and not maintain_order
2280
- )
2374
+ can_use_native = can_use_native and keep in ["any", "first", "last", "none"] and not maintain_order
2281
2375
 
2282
2376
  if can_use_native:
2283
2377
  # Use the native NodeUnique implementation
2284
- unique_input = transform_schema.UniqueInput(
2285
- columns=processed_subset, strategy=keep
2286
- )
2378
+ unique_input = transform_schema.UniqueInput(columns=processed_subset, strategy=keep)
2287
2379
 
2288
2380
  # Create node settings
2289
2381
  unique_settings = input_schema.NodeUnique(
@@ -2336,12 +2428,12 @@ class FlowFrame:
2336
2428
  return self._create_child_frame(new_node_id)
2337
2429
 
2338
2430
  @property
2339
- def columns(self) -> List[str]:
2431
+ def columns(self) -> list[str]:
2340
2432
  """Get the column names."""
2341
2433
  return self.data.collect_schema().names()
2342
2434
 
2343
2435
  @property
2344
- def dtypes(self) -> List[pl.DataType]:
2436
+ def dtypes(self) -> list[pl.DataType]:
2345
2437
  """Get the column data types."""
2346
2438
  return self.data.dtypes
2347
2439