Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +178 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
  140. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
  143. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
  149. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. flowfile_core/__init__.py +13 -6
  154. flowfile_core/auth/jwt.py +51 -16
  155. flowfile_core/auth/models.py +32 -7
  156. flowfile_core/auth/password.py +89 -0
  157. flowfile_core/auth/secrets.py +8 -6
  158. flowfile_core/configs/__init__.py +9 -7
  159. flowfile_core/configs/flow_logger.py +15 -14
  160. flowfile_core/configs/node_store/__init__.py +72 -4
  161. flowfile_core/configs/node_store/nodes.py +155 -172
  162. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  163. flowfile_core/configs/settings.py +28 -15
  164. flowfile_core/database/connection.py +7 -6
  165. flowfile_core/database/init_db.py +96 -2
  166. flowfile_core/database/models.py +3 -1
  167. flowfile_core/fileExplorer/__init__.py +17 -0
  168. flowfile_core/fileExplorer/funcs.py +123 -57
  169. flowfile_core/fileExplorer/utils.py +10 -11
  170. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  171. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  172. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  173. flowfile_core/flowfile/analytics/utils.py +1 -1
  174. flowfile_core/flowfile/code_generator/code_generator.py +358 -244
  175. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  176. flowfile_core/flowfile/connection_manager/models.py +1 -1
  177. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  178. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/extensions.py +17 -12
  180. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  181. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  182. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
  183. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  184. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  188. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  189. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  190. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  191. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  192. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  193. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  194. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  195. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  196. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  197. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  199. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  200. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  201. flowfile_core/flowfile/flow_graph.py +918 -571
  202. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  203. flowfile_core/flowfile/flow_node/flow_node.py +330 -233
  204. flowfile_core/flowfile/flow_node/models.py +53 -41
  205. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  206. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  207. flowfile_core/flowfile/handler.py +80 -30
  208. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  209. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  210. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  211. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  212. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  213. flowfile_core/flowfile/node_designer/ui_components.py +135 -34
  214. flowfile_core/flowfile/schema_callbacks.py +71 -51
  215. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  216. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  217. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  218. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  219. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  220. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  221. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  222. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  223. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  224. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  225. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  226. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  227. flowfile_core/flowfile/util/node_skipper.py +4 -4
  228. flowfile_core/flowfile/utils.py +19 -21
  229. flowfile_core/main.py +26 -19
  230. flowfile_core/routes/auth.py +284 -11
  231. flowfile_core/routes/cloud_connections.py +25 -25
  232. flowfile_core/routes/logs.py +21 -29
  233. flowfile_core/routes/public.py +3 -3
  234. flowfile_core/routes/routes.py +70 -34
  235. flowfile_core/routes/secrets.py +25 -27
  236. flowfile_core/routes/user_defined_components.py +483 -4
  237. flowfile_core/run_lock.py +0 -1
  238. flowfile_core/schemas/__init__.py +4 -6
  239. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  240. flowfile_core/schemas/cloud_storage_schemas.py +59 -53
  241. flowfile_core/schemas/input_schema.py +231 -144
  242. flowfile_core/schemas/output_model.py +49 -34
  243. flowfile_core/schemas/schemas.py +116 -89
  244. flowfile_core/schemas/transform_schema.py +518 -263
  245. flowfile_core/schemas/yaml_types.py +21 -7
  246. flowfile_core/secret_manager/secret_manager.py +17 -13
  247. flowfile_core/types.py +29 -9
  248. flowfile_core/utils/arrow_reader.py +7 -6
  249. flowfile_core/utils/excel_file_manager.py +3 -3
  250. flowfile_core/utils/fileManager.py +7 -7
  251. flowfile_core/utils/fl_executor.py +8 -10
  252. flowfile_core/utils/utils.py +4 -4
  253. flowfile_core/utils/validate_setup.py +5 -4
  254. flowfile_frame/__init__.py +106 -51
  255. flowfile_frame/adapters.py +2 -9
  256. flowfile_frame/adding_expr.py +73 -32
  257. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  258. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  259. flowfile_frame/config.py +2 -5
  260. flowfile_frame/expr.py +311 -218
  261. flowfile_frame/expr.pyi +160 -159
  262. flowfile_frame/expr_name.py +23 -23
  263. flowfile_frame/flow_frame.py +571 -476
  264. flowfile_frame/flow_frame.pyi +123 -104
  265. flowfile_frame/flow_frame_methods.py +227 -246
  266. flowfile_frame/group_frame.py +50 -20
  267. flowfile_frame/join.py +2 -2
  268. flowfile_frame/lazy.py +129 -87
  269. flowfile_frame/lazy_methods.py +83 -30
  270. flowfile_frame/list_name_space.py +55 -50
  271. flowfile_frame/selectors.py +148 -68
  272. flowfile_frame/series.py +9 -7
  273. flowfile_frame/utils.py +19 -21
  274. flowfile_worker/__init__.py +12 -7
  275. flowfile_worker/configs.py +11 -19
  276. flowfile_worker/create/__init__.py +14 -9
  277. flowfile_worker/create/funcs.py +114 -77
  278. flowfile_worker/create/models.py +46 -43
  279. flowfile_worker/create/pl_types.py +14 -15
  280. flowfile_worker/create/read_excel_tables.py +34 -41
  281. flowfile_worker/create/utils.py +22 -19
  282. flowfile_worker/external_sources/s3_source/main.py +18 -51
  283. flowfile_worker/external_sources/s3_source/models.py +34 -27
  284. flowfile_worker/external_sources/sql_source/main.py +8 -5
  285. flowfile_worker/external_sources/sql_source/models.py +13 -9
  286. flowfile_worker/flow_logger.py +10 -8
  287. flowfile_worker/funcs.py +214 -155
  288. flowfile_worker/main.py +11 -17
  289. flowfile_worker/models.py +35 -28
  290. flowfile_worker/process_manager.py +2 -3
  291. flowfile_worker/routes.py +121 -90
  292. flowfile_worker/secrets.py +9 -6
  293. flowfile_worker/spawner.py +80 -49
  294. flowfile_worker/utils.py +3 -2
  295. shared/__init__.py +2 -7
  296. shared/storage_config.py +25 -13
  297. test_utils/postgres/commands.py +3 -2
  298. test_utils/postgres/fixtures.py +9 -9
  299. test_utils/s3/commands.py +1 -1
  300. test_utils/s3/data_generator.py +3 -4
  301. test_utils/s3/demo_data_generator.py +4 -7
  302. test_utils/s3/fixtures.py +7 -5
  303. tools/migrate/__init__.py +1 -1
  304. tools/migrate/__main__.py +16 -29
  305. tools/migrate/legacy_schemas.py +251 -190
  306. tools/migrate/migrate.py +193 -181
  307. tools/migrate/tests/conftest.py +1 -3
  308. tools/migrate/tests/test_migrate.py +36 -41
  309. tools/migrate/tests/test_migration_e2e.py +28 -29
  310. tools/migrate/tests/test_node_migrations.py +50 -20
  311. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  312. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  313. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  314. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  315. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  316. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  317. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  318. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  319. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  320. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  321. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  322. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  323. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  324. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  325. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  326. flowfile-0.5.1.dist-info/RECORD +0 -388
  327. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
  328. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
  329. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,15 +1,13 @@
1
- from typing import List, Dict, Optional, Set, Tuple
2
1
  import polars as pl
3
-
4
2
  from pl_fuzzy_frame_match.models import FuzzyMapping
5
3
 
6
- from flowfile_core.flowfile.flow_graph import FlowGraph
4
+ from flowfile_core.configs import logger
7
5
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
8
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
7
+ from flowfile_core.flowfile.flow_graph import FlowGraph
9
8
  from flowfile_core.flowfile.flow_node.flow_node import FlowNode
10
9
  from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
11
10
  from flowfile_core.schemas import input_schema, transform_schema
12
- from flowfile_core.configs import logger
13
11
 
14
12
 
15
13
  class FlowGraphToPolarsConverter:
@@ -19,18 +17,19 @@ class FlowGraphToPolarsConverter:
19
17
  This class takes a FlowGraph instance and generates standalone Python code
20
18
  that uses only Polars, without any Flowfile dependencies.
21
19
  """
20
+
22
21
  flow_graph: FlowGraph
23
- node_var_mapping: Dict[int, str]
24
- imports: Set[str]
25
- code_lines: List[str]
26
- output_nodes: List[Tuple[int, str]] = []
27
- last_node_var: Optional[str] = None
22
+ node_var_mapping: dict[int, str]
23
+ imports: set[str]
24
+ code_lines: list[str]
25
+ output_nodes: list[tuple[int, str]] = []
26
+ last_node_var: str | None = None
28
27
 
29
28
  def __init__(self, flow_graph: FlowGraph):
30
29
  self.flow_graph = flow_graph
31
- self.node_var_mapping: Dict[int, str] = {} # Maps node_id to variable name
32
- self.imports: Set[str] = {"import polars as pl"}
33
- self.code_lines: List[str] = []
30
+ self.node_var_mapping: dict[int, str] = {} # Maps node_id to variable name
31
+ self.imports: set[str] = {"import polars as pl"}
32
+ self.code_lines: list[str] = []
34
33
  self.output_nodes = []
35
34
  self.last_node_var = None
36
35
 
@@ -44,7 +43,7 @@ class FlowGraphToPolarsConverter:
44
43
  # Get execution order
45
44
  execution_order = determine_execution_order(
46
45
  all_nodes=[node for node in self.flow_graph.nodes if node.is_correct],
47
- flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes()
46
+ flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes(),
48
47
  )
49
48
 
50
49
  # Generate code for each node in order
@@ -56,7 +55,7 @@ class FlowGraphToPolarsConverter:
56
55
 
57
56
  def handle_output_node(self, node: FlowNode, var_name: str) -> None:
58
57
  settings = node.setting_input
59
- if hasattr(settings, 'is_flow_output') and settings.is_flow_output:
58
+ if hasattr(settings, "is_flow_output") and settings.is_flow_output:
60
59
  self.output_nodes.append((node.node_id, var_name))
61
60
 
62
61
  def _generate_node_code(self, node: FlowNode) -> None:
@@ -82,67 +81,59 @@ class FlowGraphToPolarsConverter:
82
81
  self._add_comment(f"# TODO: Implement handler for node type: {node_type}")
83
82
  raise Exception(f"No handler implemented for node type: {node_type}")
84
83
 
85
- def _get_input_vars(self, node: FlowNode) -> Dict[str, str]:
84
+ def _get_input_vars(self, node: FlowNode) -> dict[str, str]:
86
85
  """Get input variable names for a node."""
87
86
  input_vars = {}
88
87
 
89
88
  if node.node_inputs.main_inputs:
90
89
  if len(node.node_inputs.main_inputs) == 1:
91
- input_vars['main'] = self.node_var_mapping.get(
92
- node.node_inputs.main_inputs[0].node_id, 'df'
93
- )
90
+ input_vars["main"] = self.node_var_mapping.get(node.node_inputs.main_inputs[0].node_id, "df")
94
91
  else:
95
92
  for i, input_node in enumerate(node.node_inputs.main_inputs):
96
- input_vars[f'main_{i}'] = self.node_var_mapping.get(
97
- input_node.node_id, f'df_{i}'
98
- )
93
+ input_vars[f"main_{i}"] = self.node_var_mapping.get(input_node.node_id, f"df_{i}")
99
94
 
100
95
  if node.node_inputs.left_input:
101
- input_vars['left'] = self.node_var_mapping.get(
102
- node.node_inputs.left_input.node_id, 'df_left'
103
- )
96
+ input_vars["left"] = self.node_var_mapping.get(node.node_inputs.left_input.node_id, "df_left")
104
97
 
105
98
  if node.node_inputs.right_input:
106
- input_vars['right'] = self.node_var_mapping.get(
107
- node.node_inputs.right_input.node_id, 'df_right'
108
- )
99
+ input_vars["right"] = self.node_var_mapping.get(node.node_inputs.right_input.node_id, "df_right")
109
100
 
110
101
  return input_vars
111
102
 
112
103
  def _handle_csv_read(self, file_settings: input_schema.ReceivedTable, var_name: str):
113
- if file_settings.table_settings.encoding.lower() in ('utf-8', 'utf8'):
104
+ if file_settings.table_settings.encoding.lower() in ("utf-8", "utf8"):
114
105
  encoding = "utf8-lossy"
115
106
  self._add_code(f"{var_name} = pl.scan_csv(")
116
107
  self._add_code(f' "{file_settings.abs_file_path}",')
117
108
  self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
118
- self._add_code(f' has_header={file_settings.table_settings.has_headers},')
119
- self._add_code(f' ignore_errors={file_settings.table_settings.ignore_errors},')
109
+ self._add_code(f" has_header={file_settings.table_settings.has_headers},")
110
+ self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
120
111
  self._add_code(f' encoding="{encoding}",')
121
- self._add_code(f' skip_rows={file_settings.table_settings.starting_from_line},')
112
+ self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
122
113
  self._add_code(")")
123
114
  else:
124
115
  self._add_code(f"{var_name} = pl.read_csv(")
125
116
  self._add_code(f' "{file_settings.abs_file_path}",')
126
117
  self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
127
- self._add_code(f' has_header={file_settings.table_settings.has_headers},')
128
- self._add_code(f' ignore_errors={file_settings.table_settings.ignore_errors},')
118
+ self._add_code(f" has_header={file_settings.table_settings.has_headers},")
119
+ self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
129
120
  if file_settings.table_settings.encoding:
130
121
  self._add_code(f' encoding="{file_settings.table_settings.encoding}",')
131
- self._add_code(f' skip_rows={file_settings.table_settings.starting_from_line},')
122
+ self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
132
123
  self._add_code(").lazy()")
133
124
 
134
- def _handle_cloud_storage_reader(self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: Dict[str, str]):
125
+ def _handle_cloud_storage_reader(
126
+ self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: dict[str, str]
127
+ ):
135
128
  cloud_read_settings = settings.cloud_storage_settings
136
- self.imports.add(
137
- "import flowfile as ff"
138
- )
129
+ self.imports.add("import flowfile as ff")
139
130
  if cloud_read_settings.file_format == "csv":
140
131
  self._add_code(f"{var_name} = ff.scan_csv_from_cloud_storage(")
141
132
  self._add_code(f' "{cloud_read_settings.resource_path}",')
142
133
  self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
143
134
  self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
144
135
  self._add_code(f' delimiter="{cloud_read_settings.csv_delimiter}",')
145
- self._add_code(f' has_header={cloud_read_settings.csv_has_header},')
136
+ self._add_code(f" has_header={cloud_read_settings.csv_has_header},")
146
137
  self._add_code(f' encoding="{cloud_read_settings.csv_encoding}",')
147
138
 
148
139
  elif cloud_read_settings.file_format == "parquet":
@@ -162,22 +153,22 @@ class FlowGraphToPolarsConverter:
162
153
  self._add_code(f' "{cloud_read_settings.resource_path}",')
163
154
  self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
164
155
  self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
165
- self._add_code(f' version_id={cloud_read_settings.delta_version},')
156
+ self._add_code(f" version_id={cloud_read_settings.delta_version},")
166
157
  else:
167
158
  return
168
159
  self._add_code(").data")
169
160
 
170
- def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: Dict[str, str]) -> None:
161
+ def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: dict[str, str]) -> None:
171
162
  """Handle file reading nodes."""
172
163
  file_settings = settings.received_file
173
164
 
174
- if file_settings.file_type == 'csv':
165
+ if file_settings.file_type == "csv":
175
166
  self._handle_csv_read(file_settings, var_name)
176
167
 
177
- elif file_settings.file_type == 'parquet':
168
+ elif file_settings.file_type == "parquet":
178
169
  self._add_code(f'{var_name} = pl.scan_parquet("{file_settings.abs_file_path}")')
179
170
 
180
- elif file_settings.file_type in ('xlsx', 'excel'):
171
+ elif file_settings.file_type in ("xlsx", "excel"):
181
172
  self._add_code(f"{var_name} = pl.read_excel(")
182
173
  self._add_code(f' "{file_settings.abs_file_path}",')
183
174
  if file_settings.table_settings.sheet_name:
@@ -187,12 +178,18 @@ class FlowGraphToPolarsConverter:
187
178
  self._add_code("")
188
179
 
189
180
  @staticmethod
190
- def _generate_pl_schema_with_typing(flowfile_schema: List[FlowfileColumn]) -> str:
191
- polars_schema_str = "pl.Schema([" + ", ".join(f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
192
- for flowfile_column in flowfile_schema) + "])"
181
+ def _generate_pl_schema_with_typing(flowfile_schema: list[FlowfileColumn]) -> str:
182
+ polars_schema_str = (
183
+ "pl.Schema(["
184
+ + ", ".join(
185
+ f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
186
+ for flowfile_column in flowfile_schema
187
+ )
188
+ + "])"
189
+ )
193
190
  return polars_schema_str
194
191
 
195
- def get_manual_schema_input(self, flowfile_schema: List[FlowfileColumn]) -> str:
192
+ def get_manual_schema_input(self, flowfile_schema: list[FlowfileColumn]) -> str:
196
193
  polars_schema_str = self._generate_pl_schema_with_typing(flowfile_schema)
197
194
  is_valid_pl_schema = self._validate_pl_schema(polars_schema_str)
198
195
  if is_valid_pl_schema:
@@ -210,19 +207,23 @@ class FlowGraphToPolarsConverter:
210
207
  logger.error(f"Invalid Polars schema: {e}")
211
208
  return False
212
209
 
213
- def _handle_manual_input(self, settings: input_schema.NodeManualInput, var_name: str, input_vars: Dict[str, str]) -> None:
210
+ def _handle_manual_input(
211
+ self, settings: input_schema.NodeManualInput, var_name: str, input_vars: dict[str, str]
212
+ ) -> None:
214
213
  """Handle manual data input nodes."""
215
214
  data = settings.raw_data_format.data
216
- flowfile_schema = list(FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns)
215
+ flowfile_schema = list(
216
+ FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns
217
+ )
217
218
  schema = self.get_manual_schema_input(flowfile_schema)
218
219
  self._add_code(f"{var_name} = pl.LazyFrame({data}, schema={schema}, strict=False)")
219
220
  self._add_code("")
220
221
 
221
- def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: Dict[str, str]) -> None:
222
+ def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: dict[str, str]) -> None:
222
223
  """Handle filter nodes."""
223
- input_df = input_vars.get('main', 'df')
224
+ input_df = input_vars.get("main", "df")
224
225
 
225
- if settings.filter_input.filter_type == 'advanced':
226
+ if settings.filter_input.is_advanced():
226
227
  # Parse the advanced filter expression
227
228
  self.imports.add(
228
229
  "from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr"
@@ -233,28 +234,33 @@ class FlowGraphToPolarsConverter:
233
234
  else:
234
235
  # Handle basic filter
235
236
  basic = settings.filter_input.basic_filter
236
- filter_expr = self._create_basic_filter_expr(basic)
237
- self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
237
+ if basic is not None:
238
+ filter_expr = self._create_basic_filter_expr(basic)
239
+ self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
240
+ else:
241
+ self._add_code(f"{var_name} = {input_df} # No filter applied")
238
242
  self._add_code("")
239
243
 
240
- def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: Dict[str, str]):
241
- input_df = input_vars.get('main', 'df')
244
+ def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: dict[str, str]):
245
+ input_df = input_vars.get("main", "df")
242
246
  self._add_code(f"{var_name} = {input_df}.select(pl.len().alias('number_of_records'))")
243
247
 
244
- def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: Dict[str, str]):
245
- input_df = input_vars.get('main', 'df')
248
+ def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: dict[str, str]):
249
+ input_df = input_vars.get("main", "df")
246
250
  from_col_name = settings.graph_solver_input.col_from
247
251
  to_col_name = settings.graph_solver_input.col_to
248
252
  output_col_name = settings.graph_solver_input.output_column_name
249
- self._add_code(f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
250
- f'pl.col("{to_col_name}"))'
251
- f'.alias("{output_col_name}"))')
253
+ self._add_code(
254
+ f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
255
+ f'pl.col("{to_col_name}"))'
256
+ f'.alias("{output_col_name}"))'
257
+ )
252
258
  self._add_code("")
253
259
  self.imports.add("from polars_grouper import graph_solver")
254
260
 
255
- def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: Dict[str, str]) -> None:
261
+ def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: dict[str, str]) -> None:
256
262
  """Handle select/rename nodes."""
257
- input_df = input_vars.get('main', 'df')
263
+ input_df = input_vars.get("main", "df")
258
264
  # Get columns to keep and renames
259
265
  select_exprs = []
260
266
  for select_input in settings.select_input:
@@ -266,7 +272,7 @@ class FlowGraphToPolarsConverter:
266
272
 
267
273
  if (select_input.data_type_change or select_input.is_altered) and select_input.data_type:
268
274
  polars_dtype = self._get_polars_dtype(select_input.data_type)
269
- expr = f'{expr}.cast({polars_dtype})'
275
+ expr = f"{expr}.cast({polars_dtype})"
270
276
 
271
277
  select_exprs.append(expr)
272
278
 
@@ -279,7 +285,7 @@ class FlowGraphToPolarsConverter:
279
285
  self._add_code(f"{var_name} = {input_df}")
280
286
  self._add_code("")
281
287
 
282
- def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: Dict[str, str]) -> None:
288
+ def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: dict[str, str]) -> None:
283
289
  """Handle join nodes by routing to appropriate join type handler.
284
290
 
285
291
  This is the main entry point for processing join operations. It determines
@@ -293,8 +299,8 @@ class FlowGraphToPolarsConverter:
293
299
  Returns:
294
300
  None: Modifies internal state by adding generated code
295
301
  """
296
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
297
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
302
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
303
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
298
304
  # Ensure left and right DataFrames are distinct
299
305
  if left_df == right_df:
300
306
  right_df = "df_right"
@@ -305,8 +311,9 @@ class FlowGraphToPolarsConverter:
305
311
  else:
306
312
  self._handle_standard_join(settings, var_name, left_df, right_df)
307
313
 
308
- def _handle_semi_anti_join(self, settings: input_schema.NodeJoin, var_name: str, left_df: str,
309
- right_df: str) -> None:
314
+ def _handle_semi_anti_join(
315
+ self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
316
+ ) -> None:
310
317
  """Handle semi and anti joins which only return rows from the left DataFrame.
311
318
 
312
319
  Semi joins return rows from left DataFrame that have matches in right.
@@ -333,8 +340,9 @@ class FlowGraphToPolarsConverter:
333
340
  self._add_code(" )")
334
341
  self._add_code(")")
335
342
 
336
- def _handle_standard_join(self, settings: input_schema.NodeJoin, var_name: str, left_df: str,
337
- right_df: str) -> None:
343
+ def _handle_standard_join(
344
+ self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
345
+ ) -> None:
338
346
  """Handle standard joins (left, right, inner, outer) with full column management.
339
347
 
340
348
  Standard joins may include columns from both DataFrames and require careful
@@ -370,12 +378,11 @@ class FlowGraphToPolarsConverter:
370
378
  )
371
379
  # Execute the join
372
380
  self._execute_join_with_post_processing(
373
- settings, var_name, left_df, right_df, left_on, right_on,
374
- after_join_drop_cols, reverse_action
381
+ settings, var_name, left_df, right_df, left_on, right_on, after_join_drop_cols, reverse_action
375
382
  )
376
383
 
377
384
  @staticmethod
378
- def _get_join_keys(settings: transform_schema.JoinInputManager) -> Tuple[List[str], List[str]]:
385
+ def _get_join_keys(settings: transform_schema.JoinInputManager) -> tuple[list[str], list[str]]:
379
386
  """Extract join keys based on join type.
380
387
 
381
388
  Different join types require different handling of join keys:
@@ -397,8 +404,9 @@ class FlowGraphToPolarsConverter:
397
404
 
398
405
  return left_on, right_on
399
406
 
400
- def _apply_pre_join_transformations(self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str) -> Tuple[
401
- str, str]:
407
+ def _apply_pre_join_transformations(
408
+ self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str
409
+ ) -> tuple[str, str]:
402
410
  """Apply column renames and drops before the join operation.
403
411
 
404
412
  Pre-join transformations prepare DataFrames by:
@@ -419,8 +427,7 @@ class FlowGraphToPolarsConverter:
419
427
  right_renames = {
420
428
  column.old_name: column.new_name
421
429
  for column in settings.right_select.renames
422
- if
423
- column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
430
+ if column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
424
431
  }
425
432
 
426
433
  left_renames = {
@@ -430,13 +437,11 @@ class FlowGraphToPolarsConverter:
430
437
  }
431
438
 
432
439
  left_drop_columns = [
433
- column.old_name for column in settings.left_select.renames
434
- if not column.keep and not column.join_key
440
+ column.old_name for column in settings.left_select.renames if not column.keep and not column.join_key
435
441
  ]
436
442
 
437
443
  right_drop_columns = [
438
- column.old_name for column in settings.right_select.renames
439
- if not column.keep and not column.join_key
444
+ column.old_name for column in settings.right_select.renames if not column.keep and not column.join_key
440
445
  ]
441
446
 
442
447
  # Apply transformations
@@ -451,9 +456,14 @@ class FlowGraphToPolarsConverter:
451
456
 
452
457
  return left_df, right_df
453
458
 
454
- def _handle_join_key_transformations(self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str,
455
- left_on: List[str], right_on: List[str]) \
456
- -> Tuple[List[str], List[str], Optional[Dict], List[str]]:
459
+ def _handle_join_key_transformations(
460
+ self,
461
+ settings: transform_schema.JoinInputManager,
462
+ left_df: str,
463
+ right_df: str,
464
+ left_on: list[str],
465
+ right_on: list[str],
466
+ ) -> tuple[list[str], list[str], dict | None, list[str]]:
457
467
  """Route to appropriate join-specific key transformation handler.
458
468
 
459
469
  Different join types require different strategies for handling join keys
@@ -484,9 +494,9 @@ class FlowGraphToPolarsConverter:
484
494
  else:
485
495
  return left_on, right_on, None, []
486
496
 
487
- def _handle_left_inner_join_keys(self, settings: transform_schema.JoinInputManager, right_df: str,
488
- left_on: List[str], right_on: List[str]) -> Tuple[
489
- List[str], List[str], Dict, List[str]]:
497
+ def _handle_left_inner_join_keys(
498
+ self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
499
+ ) -> tuple[list[str], list[str], dict, list[str]]:
490
500
  """Handle key transformations for left and inner joins.
491
501
 
492
502
  For left/inner joins:
@@ -510,27 +520,26 @@ class FlowGraphToPolarsConverter:
510
520
  left_join_keys_to_keep = [jk.new_name for jk in settings.left_select.join_key_selects if jk.keep]
511
521
  join_key_duplication_command = [
512
522
  f'pl.col("{rjk.old_name}").alias("__DROP__{rjk.new_name}__DROP__")'
513
- for rjk in settings.right_select.join_key_selects if rjk.keep
523
+ for rjk in settings.right_select.join_key_selects
524
+ if rjk.keep
514
525
  ]
515
526
 
516
527
  reverse_action = {
517
528
  f"__DROP__{rjk.new_name}__DROP__": rjk.new_name
518
- for rjk in settings.right_select.join_key_selects if rjk.keep
529
+ for rjk in settings.right_select.join_key_selects
530
+ if rjk.keep
519
531
  }
520
532
 
521
533
  if join_key_duplication_command:
522
534
  self._add_code(f"{right_df} = {right_df}.with_columns([{', '.join(join_key_duplication_command)}])")
523
535
 
524
- after_join_drop_cols = [
525
- k.new_name for k in settings.left_select.join_key_selects
526
- if not k.keep
527
- ]
536
+ after_join_drop_cols = [k.new_name for k in settings.left_select.join_key_selects if not k.keep]
528
537
 
529
538
  return left_on, right_on, reverse_action, after_join_drop_cols
530
539
 
531
- def _handle_right_join_keys(self, settings: transform_schema.JoinInputManager, left_df: str,
532
- left_on: List[str], right_on: List[str]) -> Tuple[
533
- List[str], List[str], None, List[str]]:
540
+ def _handle_right_join_keys(
541
+ self, settings: transform_schema.JoinInputManager, left_df: str, left_on: list[str], right_on: list[str]
542
+ ) -> tuple[list[str], list[str], None, list[str]]:
534
543
  """Handle key transformations for right joins.
535
544
 
536
545
  For right joins:
@@ -553,7 +562,8 @@ class FlowGraphToPolarsConverter:
553
562
  """
554
563
  join_key_duplication_command = [
555
564
  f'pl.col("{ljk.new_name}").alias("__jk_{ljk.new_name}")'
556
- for ljk in settings.left_select.join_key_selects if ljk.keep
565
+ for ljk in settings.left_select.join_key_selects
566
+ if ljk.keep
557
567
  ]
558
568
 
559
569
  # Update left_on keys
@@ -569,14 +579,15 @@ class FlowGraphToPolarsConverter:
569
579
  left_join_keys_keep = {jk.new_name for jk in settings.left_select.join_key_selects if jk.keep}
570
580
  after_join_drop_cols_right = [
571
581
  jk.new_name if jk.new_name not in left_join_keys_keep else jk.new_name + "_right"
572
- for jk in settings.right_select.join_key_selects if not jk.keep
582
+ for jk in settings.right_select.join_key_selects
583
+ if not jk.keep
573
584
  ]
574
585
  after_join_drop_cols = list(set(after_join_drop_cols_right))
575
586
  return left_on, right_on, None, after_join_drop_cols
576
587
 
577
- def _handle_outer_join_keys(self, settings: transform_schema.JoinInputManager, right_df: str,
578
- left_on: List[str],
579
- right_on: List[str]) -> Tuple[List[str], List[str], Dict, List[str]]:
588
+ def _handle_outer_join_keys(
589
+ self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
590
+ ) -> tuple[list[str], list[str], dict, list[str]]:
580
591
  """Handle key transformations for outer joins.
581
592
 
582
593
  For outer joins:
@@ -600,14 +611,10 @@ class FlowGraphToPolarsConverter:
600
611
  left_join_keys = {jk.new_name for jk in settings.left_select.join_key_selects}
601
612
 
602
613
  join_keys_to_keep_and_rename = [
603
- rjk for rjk in settings.right_select.join_key_selects
604
- if rjk.keep and rjk.new_name in left_join_keys
614
+ rjk for rjk in settings.right_select.join_key_selects if rjk.keep and rjk.new_name in left_join_keys
605
615
  ]
606
616
 
607
- join_key_rename_command = {
608
- rjk.new_name: f"__jk_{rjk.new_name}"
609
- for rjk in join_keys_to_keep_and_rename
610
- }
617
+ join_key_rename_command = {rjk.new_name: f"__jk_{rjk.new_name}" for rjk in join_keys_to_keep_and_rename}
611
618
 
612
619
  # Update right_on keys
613
620
  for position, right_on_key in enumerate(right_on):
@@ -621,20 +628,27 @@ class FlowGraphToPolarsConverter:
621
628
  reverse_action = {f"__jk_{rjk.new_name}": rjk.new_name for rjk in join_keys_to_keep_and_rename}
622
629
 
623
630
  # Calculate columns to drop after join
624
- after_join_drop_cols_left = [
625
- jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep
626
- ]
631
+ after_join_drop_cols_left = [jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep]
627
632
  after_join_drop_cols_right = [
628
633
  jk.new_name if jk.new_name not in left_join_keys else jk.new_name + "_right"
629
- for jk in settings.right_select.join_key_selects if not jk.keep
634
+ for jk in settings.right_select.join_key_selects
635
+ if not jk.keep
630
636
  ]
631
637
  after_join_drop_cols = after_join_drop_cols_left + after_join_drop_cols_right
632
638
 
633
639
  return left_on, right_on, reverse_action, after_join_drop_cols
634
640
 
635
- def _execute_join_with_post_processing(self, settings: input_schema.NodeJoin, var_name: str,
636
- left_df: str, right_df: str, left_on: List[str], right_on: List[str],
637
- after_join_drop_cols: List[str], reverse_action: Optional[Dict]) -> None:
641
+ def _execute_join_with_post_processing(
642
+ self,
643
+ settings: input_schema.NodeJoin,
644
+ var_name: str,
645
+ left_df: str,
646
+ right_df: str,
647
+ left_on: list[str],
648
+ right_on: list[str],
649
+ after_join_drop_cols: list[str],
650
+ reverse_action: dict | None,
651
+ ) -> None:
638
652
  """Execute the join operation and apply post-processing steps.
639
653
 
640
654
  Generates the actual join code with any necessary post-processing:
@@ -665,7 +679,7 @@ class FlowGraphToPolarsConverter:
665
679
  self._add_code(" )")
666
680
 
667
681
  # Handle right join special case
668
- if settings.join_input.how == 'right':
682
+ if settings.join_input.how == "right":
669
683
  self._add_code(".collect()") # Right join needs to be collected first cause of issue with rename
670
684
 
671
685
  # Apply post-join transformations
@@ -676,21 +690,21 @@ class FlowGraphToPolarsConverter:
676
690
  self._add_code(f".rename({reverse_action})")
677
691
 
678
692
  # Convert back to lazy for right joins
679
- if settings.join_input.how == 'right':
680
- self._add_code(f".lazy()")
693
+ if settings.join_input.how == "right":
694
+ self._add_code(".lazy()")
681
695
 
682
696
  self._add_code(")")
683
697
 
684
- def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: Dict[str, str]) -> None:
698
+ def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: dict[str, str]) -> None:
685
699
  """Handle group by nodes."""
686
- input_df = input_vars.get('main', 'df')
700
+ input_df = input_vars.get("main", "df")
687
701
 
688
702
  # Separate groupby columns from aggregation columns
689
703
  group_cols = []
690
704
  agg_exprs = []
691
705
 
692
706
  for agg_col in settings.groupby_input.agg_cols:
693
- if agg_col.agg == 'groupby':
707
+ if agg_col.agg == "groupby":
694
708
  group_cols.append(agg_col.old_name)
695
709
  else:
696
710
  agg_func = self._get_agg_function(agg_col.agg)
@@ -703,9 +717,9 @@ class FlowGraphToPolarsConverter:
703
717
  self._add_code("])")
704
718
  self._add_code("")
705
719
 
706
- def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: Dict[str, str]) -> None:
720
+ def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: dict[str, str]) -> None:
707
721
  """Handle formula/expression nodes."""
708
- input_df = input_vars.get('main', 'df')
722
+ input_df = input_vars.get("main", "df")
709
723
  self.imports.add("from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr")
710
724
 
711
725
  # Convert SQL-like formula to Polars expression
@@ -717,7 +731,7 @@ class FlowGraphToPolarsConverter:
717
731
  output_type = convert_pl_type_to_string(cast_str_to_polars_type(settings.function.field.data_type))
718
732
  if output_type[:3] != "pl.":
719
733
  output_type = "pl." + output_type
720
- self._add_code(f' .cast({output_type})')
734
+ self._add_code(f" .cast({output_type})")
721
735
 
722
736
  self._add_code("])")
723
737
  self._add_code("")
@@ -725,11 +739,11 @@ class FlowGraphToPolarsConverter:
725
739
  def _handle_pivot_no_index(self, settings: input_schema.NodePivot, var_name: str, input_df: str, agg_func: str):
726
740
  pivot_input = settings.pivot_input
727
741
 
728
- self._add_code(f'{var_name} = ({input_df}.collect()')
742
+ self._add_code(f"{var_name} = ({input_df}.collect()")
729
743
  self._add_code(' .with_columns(pl.lit(1).alias("__temp_index__"))')
730
- self._add_code(' .pivot(')
744
+ self._add_code(" .pivot(")
731
745
  self._add_code(f' values="{pivot_input.value_col}",')
732
- self._add_code(f' index=["__temp_index__"],')
746
+ self._add_code(' index=["__temp_index__"],')
733
747
  self._add_code(f' columns="{pivot_input.pivot_column}",')
734
748
  self._add_code(f' aggregate_function="{agg_func}"')
735
749
  self._add_code(" )")
@@ -737,17 +751,16 @@ class FlowGraphToPolarsConverter:
737
751
  self._add_code(").lazy()")
738
752
  self._add_code("")
739
753
 
740
- def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: Dict[str, str]) -> None:
754
+ def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: dict[str, str]) -> None:
741
755
  """Handle pivot nodes."""
742
- input_df = input_vars.get('main', 'df')
756
+ input_df = input_vars.get("main", "df")
743
757
  pivot_input = settings.pivot_input
744
758
  if len(pivot_input.aggregations) > 1:
745
- logger.error("Multiple aggregations are not convertable to polars code. "
746
- "Taking the first value")
759
+ logger.error("Multiple aggregations are not convertable to polars code. " "Taking the first value")
747
760
  if len(pivot_input.aggregations) > 0:
748
761
  agg_func = pivot_input.aggregations[0]
749
762
  else:
750
- agg_func = 'first'
763
+ agg_func = "first"
751
764
  if len(settings.pivot_input.index_columns) == 0:
752
765
  self._handle_pivot_no_index(settings, var_name, input_df, agg_func)
753
766
  else:
@@ -761,9 +774,9 @@ class FlowGraphToPolarsConverter:
761
774
  self._add_code(").lazy()")
762
775
  self._add_code("")
763
776
 
764
- def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: Dict[str, str]) -> None:
777
+ def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: dict[str, str]) -> None:
765
778
  """Handle unpivot nodes."""
766
- input_df = input_vars.get('main', 'df')
779
+ input_df = input_vars.get("main", "df")
767
780
  unpivot_input = settings.unpivot_input
768
781
 
769
782
  self._add_code(f"{var_name} = {input_df}.unpivot(")
@@ -779,22 +792,22 @@ class FlowGraphToPolarsConverter:
779
792
  self._add_code(")")
780
793
  self._add_code("")
781
794
 
782
- def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: Dict[str, str]) -> None:
795
+ def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: dict[str, str]) -> None:
783
796
  """Handle union nodes."""
784
797
  # Get all input LazyFrame
785
798
  dfs = []
786
- if 'main' in input_vars:
787
- dfs.append(input_vars['main'])
799
+ if "main" in input_vars:
800
+ dfs.append(input_vars["main"])
788
801
  else:
789
802
  # Multiple main inputs
790
803
  for key, df_var in input_vars.items():
791
- if key.startswith('main'):
804
+ if key.startswith("main"):
792
805
  dfs.append(df_var)
793
806
 
794
- if settings.union_input.mode == 'relaxed':
795
- how = 'diagonal_relaxed'
807
+ if settings.union_input.mode == "relaxed":
808
+ how = "diagonal_relaxed"
796
809
  else:
797
- how = 'diagonal'
810
+ how = "diagonal"
798
811
 
799
812
  self._add_code(f"{var_name} = pl.concat([")
800
813
  for df in dfs:
@@ -802,76 +815,88 @@ class FlowGraphToPolarsConverter:
802
815
  self._add_code(f"], how='{how}')")
803
816
  self._add_code("")
804
817
 
805
- def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: Dict[str, str]) -> None:
818
+ def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: dict[str, str]) -> None:
806
819
  """Handle sort nodes."""
807
- input_df = input_vars.get('main', 'df')
820
+ input_df = input_vars.get("main", "df")
808
821
 
809
822
  sort_cols = []
810
823
  descending = []
811
824
 
812
825
  for sort_input in settings.sort_input:
813
826
  sort_cols.append(f'"{sort_input.column}"')
814
- descending.append(sort_input.how == 'desc')
827
+ descending.append(sort_input.how == "desc")
815
828
 
816
829
  self._add_code(f"{var_name} = {input_df}.sort([{', '.join(sort_cols)}], descending={descending})")
817
830
  self._add_code("")
818
831
 
819
- def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: Dict[str, str]) -> None:
832
+ def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: dict[str, str]) -> None:
820
833
  """Handle sample nodes."""
821
- input_df = input_vars.get('main', 'df')
834
+ input_df = input_vars.get("main", "df")
822
835
  self._add_code(f"{var_name} = {input_df}.head(n={settings.sample_size})")
823
836
  self._add_code("")
824
837
 
825
838
  @staticmethod
826
- def _transform_fuzzy_mappings_to_string(fuzzy_mappings: List[FuzzyMapping]) -> str:
827
-
839
+ def _transform_fuzzy_mappings_to_string(fuzzy_mappings: list[FuzzyMapping]) -> str:
828
840
  output_str = "["
829
841
  for i, fuzzy_mapping in enumerate(fuzzy_mappings):
830
-
831
- output_str += (f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
832
- f" right_col='{fuzzy_mapping.right_col}', "
833
- f"threshold_score={fuzzy_mapping.threshold_score}, "
834
- f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')")
842
+ output_str += (
843
+ f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
844
+ f" right_col='{fuzzy_mapping.right_col}', "
845
+ f"threshold_score={fuzzy_mapping.threshold_score}, "
846
+ f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')"
847
+ )
835
848
  if i < len(fuzzy_mappings) - 1:
836
849
  output_str += ",\n"
837
850
  output_str += "]"
838
851
  return output_str
839
852
 
840
- def _handle_fuzzy_match(self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: Dict[str, str]) -> None:
853
+ def _handle_fuzzy_match(
854
+ self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: dict[str, str]
855
+ ) -> None:
841
856
  """Handle fuzzy match nodes."""
842
857
  self.imports.add("from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs")
843
858
  fuzzy_match_handler = transform_schema.FuzzyMatchInputManager(settings.join_input)
844
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
845
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
859
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
860
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
846
861
 
847
862
  if left_df == right_df:
848
863
  right_df = "df_right"
849
864
  self._add_code(f"{right_df} = {left_df}")
850
865
 
851
866
  if fuzzy_match_handler.left_select.has_drop_cols():
852
- self._add_code(f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})")
867
+ self._add_code(
868
+ f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})"
869
+ )
853
870
  if fuzzy_match_handler.right_select.has_drop_cols():
854
- self._add_code(f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})")
871
+ self._add_code(
872
+ f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})"
873
+ )
855
874
 
856
875
  fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(fuzzy_match_handler.join_mapping)
857
- self._add_code(f"{var_name} = fuzzy_match_dfs(\n"
858
- f" left_df={left_df}, right_df={right_df},\n"
859
- f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
860
- f" ).lazy()")
876
+ self._add_code(
877
+ f"{var_name} = fuzzy_match_dfs(\n"
878
+ f" left_df={left_df}, right_df={right_df},\n"
879
+ f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
880
+ f" ).lazy()"
881
+ )
861
882
 
862
- def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: Dict[str, str]) -> None:
883
+ def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: dict[str, str]) -> None:
863
884
  """Handle unique/distinct nodes."""
864
- input_df = input_vars.get('main', 'df')
885
+ input_df = input_vars.get("main", "df")
865
886
 
866
887
  if settings.unique_input.columns:
867
- self._add_code(f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')")
888
+ self._add_code(
889
+ f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')"
890
+ )
868
891
  else:
869
892
  self._add_code(f"{var_name} = {input_df}.unique(keep='{settings.unique_input.strategy}')")
870
893
  self._add_code("")
871
894
 
872
- def _handle_text_to_rows(self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: Dict[str, str]) -> None:
895
+ def _handle_text_to_rows(
896
+ self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: dict[str, str]
897
+ ) -> None:
873
898
  """Handle text to rows (explode) nodes."""
874
- input_df = input_vars.get('main', 'df')
899
+ input_df = input_vars.get("main", "df")
875
900
  text_input = settings.text_to_rows_input
876
901
 
877
902
  # First split the column
@@ -884,96 +909,108 @@ class FlowGraphToPolarsConverter:
884
909
 
885
910
  self._add_code(f"{var_name} = {input_df}.with_columns({split_expr}).explode('{explode_col}')")
886
911
  self._add_code("")
912
+
887
913
  # .with_columns(
888
914
  # (pl.cum_count(record_id_settings.output_column_name)
889
915
  # .over(record_id_settings.group_by_columns) + record_id_settings.offset - 1)
890
916
  # .alias(record_id_settings.output_column_name)
891
917
  # )
892
- def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: Dict[str, str]) -> None:
918
+ def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: dict[str, str]) -> None:
893
919
  """Handle record ID nodes."""
894
- input_df = input_vars.get('main', 'df')
920
+ input_df = input_vars.get("main", "df")
895
921
  record_input = settings.record_id_input
896
922
  if record_input.group_by and record_input.group_by_columns:
897
-
898
923
  # Row number within groups
899
924
  self._add_code(f"{var_name} = ({input_df}")
900
925
  self._add_code(f" .with_columns(pl.lit(1).alias('{record_input.output_column_name}'))")
901
- self._add_code(f" .with_columns([")
902
- self._add_code(f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)")
926
+ self._add_code(" .with_columns([")
927
+ self._add_code(
928
+ f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)"
929
+ )
903
930
  self._add_code(f" .alias('{record_input.output_column_name}')")
904
931
  self._add_code("])")
905
- self._add_code(f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])")
932
+ self._add_code(
933
+ f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])"
934
+ )
906
935
  self._add_code(")")
907
936
  else:
908
937
  # Simple row number
909
- self._add_code(f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})")
938
+ self._add_code(
939
+ f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})"
940
+ )
910
941
  self._add_code("")
911
942
 
912
- def _handle_cross_join(self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: Dict[str, str]) -> None:
943
+ def _handle_cross_join(
944
+ self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: dict[str, str]
945
+ ) -> None:
913
946
  """Handle cross join nodes."""
914
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
915
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
947
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
948
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
916
949
 
917
950
  self._add_code(f"{var_name} = {left_df}.join({right_df}, how='cross')")
918
951
  self._add_code("")
919
952
 
920
- def _handle_cloud_storage_writer(self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: Dict[str, str]) -> None:
953
+ def _handle_cloud_storage_writer(
954
+ self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: dict[str, str]
955
+ ) -> None:
921
956
  """Handle cloud storage writer nodes."""
922
- input_df = input_vars.get('main', 'df')
957
+ input_df = input_vars.get("main", "df")
923
958
  # def write_csv_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: Optional[str] = None) -> 'FlowFrame': ...
924
959
 
925
960
  output_settings = settings.cloud_storage_settings
926
961
  self.imports.add("import flowfile as ff")
927
962
  self._add_code(f"(ff.FlowFrame({input_df})")
928
963
  if output_settings.file_format == "csv":
929
- self._add_code(f' .write_csv_to_cloud_storage(')
964
+ self._add_code(" .write_csv_to_cloud_storage(")
930
965
  self._add_code(f' path="{output_settings.resource_path}",')
931
966
  self._add_code(f' connection_name="{output_settings.connection_name}",')
932
967
  self._add_code(f' delimiter="{output_settings.csv_delimiter}",')
933
968
  self._add_code(f' encoding="{output_settings.csv_encoding}",')
934
969
  self._add_code(f' description="{settings.description}"')
935
970
  elif output_settings.file_format == "parquet":
936
- self._add_code(f' .write_parquet_to_cloud_storage(')
971
+ self._add_code(" .write_parquet_to_cloud_storage(")
937
972
  self._add_code(f' path="{output_settings.resource_path}",')
938
973
  self._add_code(f' connection_name="{output_settings.connection_name}",')
939
974
  self._add_code(f' description="{settings.description}"')
940
975
  elif output_settings.file_format == "json":
941
- self._add_code(f' .write_json_to_cloud_storage(')
976
+ self._add_code(" .write_json_to_cloud_storage(")
942
977
  self._add_code(f' path="{output_settings.resource_path}",')
943
978
  self._add_code(f' connection_name="{output_settings.connection_name}",')
944
979
  self._add_code(f' description="{settings.description}"')
945
980
  elif output_settings.file_format == "delta":
946
- self._add_code(f' .write_delta(')
981
+ self._add_code(" .write_delta(")
947
982
  self._add_code(f' path="{output_settings.resource_path}",')
948
983
  self._add_code(f' write_mode="{output_settings.write_mode}",')
949
984
  self._add_code(f' connection_name="{output_settings.connection_name}",')
950
985
  self._add_code(f' description="{settings.description}"')
951
- self._add_code(' )')
952
- self._add_code(')')
986
+ self._add_code(" )")
987
+ self._add_code(")")
953
988
 
954
- def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: Dict[str, str]) -> None:
989
+ def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: dict[str, str]) -> None:
955
990
  """Handle output nodes."""
956
- input_df = input_vars.get('main', 'df')
991
+ input_df = input_vars.get("main", "df")
957
992
  output_settings = settings.output_settings
958
993
 
959
- if output_settings.file_type == 'csv':
960
- self._add_code(f'{input_df}.sink_csv(')
994
+ if output_settings.file_type == "csv":
995
+ self._add_code(f"{input_df}.sink_csv(")
961
996
  self._add_code(f' "{output_settings.abs_file_path}",')
962
997
  self._add_code(f' separator="{output_settings.table_settings.delimiter}"')
963
- self._add_code(')')
998
+ self._add_code(")")
964
999
 
965
- elif output_settings.file_type == 'parquet':
1000
+ elif output_settings.file_type == "parquet":
966
1001
  self._add_code(f'{input_df}.sink_parquet("{output_settings.abs_file_path}")')
967
1002
 
968
- elif output_settings.file_type == 'excel':
969
- self._add_code(f'{input_df}.collect().write_excel(')
1003
+ elif output_settings.file_type == "excel":
1004
+ self._add_code(f"{input_df}.collect().write_excel(")
970
1005
  self._add_code(f' "{output_settings.abs_file_path}",')
971
1006
  self._add_code(f' worksheet="{output_settings.table_settings.sheet_name}"')
972
- self._add_code(')')
1007
+ self._add_code(")")
973
1008
 
974
1009
  self._add_code("")
975
1010
 
976
- def _handle_polars_code(self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: Dict[str, str]) -> None:
1011
+ def _handle_polars_code(
1012
+ self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: dict[str, str]
1013
+ ) -> None:
977
1014
  """Handle custom Polars code nodes."""
978
1015
  code = settings.polars_code_input.polars_code.strip()
979
1016
  # Determine function parameters based on number of inputs
@@ -990,7 +1027,7 @@ class FlowGraphToPolarsConverter:
990
1027
  arg_list = []
991
1028
  i = 1
992
1029
  for key in sorted(input_vars.keys()):
993
- if key.startswith('main'):
1030
+ if key.startswith("main"):
994
1031
  param_list.append(f"input_df_{i}: pl.LazyFrame")
995
1032
  arg_list.append(input_vars[key])
996
1033
  i += 1
@@ -1001,7 +1038,7 @@ class FlowGraphToPolarsConverter:
1001
1038
  is_expression = "output_df" not in code
1002
1039
 
1003
1040
  # Wrap the code in a function
1004
- self._add_code(f"# Custom Polars code")
1041
+ self._add_code("# Custom Polars code")
1005
1042
  self._add_code(f"def _polars_code_{var_name.replace('df_', '')}({params}):")
1006
1043
 
1007
1044
  # Handle the code based on its structure
@@ -1010,18 +1047,18 @@ class FlowGraphToPolarsConverter:
1010
1047
  self._add_code(f" return {code}")
1011
1048
  else:
1012
1049
  # It contains assignments
1013
- for line in code.split('\n'):
1050
+ for line in code.split("\n"):
1014
1051
  if line.strip():
1015
1052
  self._add_code(f" {line}")
1016
1053
 
1017
1054
  # If no explicit return, try to detect what to return
1018
- if 'return' not in code:
1055
+ if "return" not in code:
1019
1056
  # Try to find the last assignment
1020
- lines = [l.strip() for l in code.split('\n') if l.strip() and '=' in l]
1057
+ lines = [l.strip() for l in code.split("\n") if l.strip() and "=" in l]
1021
1058
  if lines:
1022
1059
  last_assignment = lines[-1]
1023
- if '=' in last_assignment:
1024
- output_var = last_assignment.split('=')[0].strip()
1060
+ if "=" in last_assignment:
1061
+ output_var = last_assignment.split("=")[0].strip()
1025
1062
  self._add_code(f" return {output_var}")
1026
1063
 
1027
1064
  self._add_code("")
@@ -1054,14 +1091,7 @@ class FlowGraphToPolarsConverter:
1054
1091
  col, op, val = match.groups()
1055
1092
 
1056
1093
  # Map operators
1057
- op_map = {
1058
- '=': '==',
1059
- '!=': '!=',
1060
- '>': '>',
1061
- '<': '<',
1062
- '>=': '>=',
1063
- '<=': '<='
1064
- }
1094
+ op_map = {"=": "==", "!=": "!=", ">": ">", "<": "<", ">=": ">=", "<=": "<="}
1065
1095
 
1066
1096
  polars_op = op_map.get(op, op)
1067
1097
 
@@ -1075,45 +1105,129 @@ class FlowGraphToPolarsConverter:
1075
1105
  return re.sub(pattern, replace_expr, expr)
1076
1106
 
1077
1107
  def _create_basic_filter_expr(self, basic: transform_schema.BasicFilter) -> str:
1078
- """Create Polars expression from basic filter."""
1108
+ """Create Polars expression from basic filter.
1109
+
1110
+ Generates proper Polars code for all supported filter operators.
1111
+
1112
+ Args:
1113
+ basic: The BasicFilter configuration.
1114
+
1115
+ Returns:
1116
+ A string containing valid Polars filter expression code.
1117
+ """
1118
+ from flowfile_core.schemas.transform_schema import FilterOperator
1119
+
1079
1120
  col = f'pl.col("{basic.field}")'
1121
+ value = basic.value
1122
+ value2 = basic.value2
1123
+
1124
+ # Determine if value is numeric (for proper quoting)
1125
+ is_numeric = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
1126
+
1127
+ # Get the operator
1128
+ try:
1129
+ operator = basic.get_operator()
1130
+ except (ValueError, AttributeError):
1131
+ operator = FilterOperator.from_symbol(str(basic.operator))
1132
+
1133
+ # Generate expression based on operator
1134
+ if operator == FilterOperator.EQUALS:
1135
+ if is_numeric:
1136
+ return f"{col} == {value}"
1137
+ return f'{col} == "{value}"'
1138
+
1139
+ elif operator == FilterOperator.NOT_EQUALS:
1140
+ if is_numeric:
1141
+ return f"{col} != {value}"
1142
+ return f'{col} != "{value}"'
1143
+
1144
+ elif operator == FilterOperator.GREATER_THAN:
1145
+ if is_numeric:
1146
+ return f"{col} > {value}"
1147
+ return f'{col} > "{value}"'
1148
+
1149
+ elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
1150
+ if is_numeric:
1151
+ return f"{col} >= {value}"
1152
+ return f'{col} >= "{value}"'
1153
+
1154
+ elif operator == FilterOperator.LESS_THAN:
1155
+ if is_numeric:
1156
+ return f"{col} < {value}"
1157
+ return f'{col} < "{value}"'
1158
+
1159
+ elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
1160
+ if is_numeric:
1161
+ return f"{col} <= {value}"
1162
+ return f'{col} <= "{value}"'
1163
+
1164
+ elif operator == FilterOperator.CONTAINS:
1165
+ return f'{col}.str.contains("{value}")'
1166
+
1167
+ elif operator == FilterOperator.NOT_CONTAINS:
1168
+ return f'{col}.str.contains("{value}").not_()'
1169
+
1170
+ elif operator == FilterOperator.STARTS_WITH:
1171
+ return f'{col}.str.starts_with("{value}")'
1172
+
1173
+ elif operator == FilterOperator.ENDS_WITH:
1174
+ return f'{col}.str.ends_with("{value}")'
1175
+
1176
+ elif operator == FilterOperator.IS_NULL:
1177
+ return f"{col}.is_null()"
1178
+
1179
+ elif operator == FilterOperator.IS_NOT_NULL:
1180
+ return f"{col}.is_not_null()"
1181
+
1182
+ elif operator == FilterOperator.IN:
1183
+ values = [v.strip() for v in value.split(",")]
1184
+ if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
1185
+ values_str = ", ".join(values)
1186
+ else:
1187
+ values_str = ", ".join(f'"{v}"' for v in values)
1188
+ return f"{col}.is_in([{values_str}])"
1189
+
1190
+ elif operator == FilterOperator.NOT_IN:
1191
+ values = [v.strip() for v in value.split(",")]
1192
+ if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
1193
+ values_str = ", ".join(values)
1194
+ else:
1195
+ values_str = ", ".join(f'"{v}"' for v in values)
1196
+ return f"{col}.is_in([{values_str}]).not_()"
1197
+
1198
+ elif operator == FilterOperator.BETWEEN:
1199
+ if value2 is None:
1200
+ return f"{col} # BETWEEN requires two values"
1201
+ if is_numeric and value2.replace(".", "", 1).replace("-", "", 1).isnumeric():
1202
+ return f"({col} >= {value}) & ({col} <= {value2})"
1203
+ return f'({col} >= "{value}") & ({col} <= "{value2}")'
1080
1204
 
1081
- if basic.filter_type == 'equals':
1082
- return f'{col} == "{basic.filter_value}"'
1083
- elif basic.filter_type == 'not_equals':
1084
- return f'{col} != "{basic.filter_value}"'
1085
- elif basic.filter_type == 'greater':
1086
- return f'{col} > {basic.filter_value}'
1087
- elif basic.filter_type == 'less':
1088
- return f'{col} < {basic.filter_value}'
1089
- elif basic.filter_type == 'in':
1090
- values = basic.filter_value.split(',')
1091
- return f"pl.col('{col}').is_in({values})"
1205
+ # Fallback
1092
1206
  return col
1093
1207
 
1094
1208
  def _get_polars_dtype(self, dtype_str: str) -> str:
1095
1209
  """Convert Flowfile dtype string to Polars dtype."""
1096
1210
  dtype_map = {
1097
- 'String': 'pl.Utf8',
1098
- 'Integer': 'pl.Int64',
1099
- 'Double': 'pl.Float64',
1100
- 'Boolean': 'pl.Boolean',
1101
- 'Date': 'pl.Date',
1102
- 'Datetime': 'pl.Datetime',
1103
- 'Float32': 'pl.Float32',
1104
- 'Float64': 'pl.Float64',
1105
- 'Int32': 'pl.Int32',
1106
- 'Int64': 'pl.Int64',
1107
- 'Utf8': 'pl.Utf8',
1211
+ "String": "pl.Utf8",
1212
+ "Integer": "pl.Int64",
1213
+ "Double": "pl.Float64",
1214
+ "Boolean": "pl.Boolean",
1215
+ "Date": "pl.Date",
1216
+ "Datetime": "pl.Datetime",
1217
+ "Float32": "pl.Float32",
1218
+ "Float64": "pl.Float64",
1219
+ "Int32": "pl.Int32",
1220
+ "Int64": "pl.Int64",
1221
+ "Utf8": "pl.Utf8",
1108
1222
  }
1109
- return dtype_map.get(dtype_str, 'pl.Utf8')
1223
+ return dtype_map.get(dtype_str, "pl.Utf8")
1110
1224
 
1111
1225
  def _get_agg_function(self, agg: str) -> str:
1112
1226
  """Get Polars aggregation function name."""
1113
1227
  agg_map = {
1114
- 'avg': 'mean',
1115
- 'average': 'mean',
1116
- 'concat': 'str.concat',
1228
+ "avg": "mean",
1229
+ "average": "mean",
1230
+ "concat": "str.concat",
1117
1231
  }
1118
1232
  return agg_map.get(agg, agg)
1119
1233
 
@@ -1126,12 +1240,12 @@ class FlowGraphToPolarsConverter:
1126
1240
  import re
1127
1241
 
1128
1242
  # Pattern for column names (simplified)
1129
- col_pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
1243
+ col_pattern = r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b"
1130
1244
 
1131
1245
  def replace_col(match):
1132
1246
  col_name = match.group(1)
1133
1247
  # Skip SQL keywords
1134
- keywords = {'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'AND', 'OR', 'NOT', 'IN', 'AS'}
1248
+ keywords = {"CASE", "WHEN", "THEN", "ELSE", "END", "AND", "OR", "NOT", "IN", "AS"}
1135
1249
  if col_name.upper() in keywords:
1136
1250
  return col_name
1137
1251
  return f'pl.col("{col_name}")'
@@ -1139,13 +1253,13 @@ class FlowGraphToPolarsConverter:
1139
1253
  result = re.sub(col_pattern, replace_col, sql_expr)
1140
1254
 
1141
1255
  # Handle CASE WHEN
1142
- if 'CASE' in result:
1256
+ if "CASE" in result:
1143
1257
  # This would need proper parsing
1144
1258
  result = "pl.when(...).then(...).otherwise(...)"
1145
1259
 
1146
1260
  return result
1147
1261
 
1148
- def add_return_code(self, lines: List[str]) -> None:
1262
+ def add_return_code(self, lines: list[str]) -> None:
1149
1263
  if self.output_nodes:
1150
1264
  # Return marked output nodes
1151
1265
  if len(self.output_nodes) == 1:
@@ -1175,8 +1289,8 @@ class FlowGraphToPolarsConverter:
1175
1289
  # Add main function
1176
1290
  lines.append("def run_etl_pipeline():")
1177
1291
  lines.append(' """')
1178
- lines.append(f' ETL Pipeline: {self.flow_graph.__name__}')
1179
- lines.append(' Generated from Flowfile')
1292
+ lines.append(f" ETL Pipeline: {self.flow_graph.__name__}")
1293
+ lines.append(" Generated from Flowfile")
1180
1294
  lines.append(' """')
1181
1295
  lines.append(" ")
1182
1296