Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +179 -73
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +52 -59
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
  36. flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
  37. flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
  79. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
  140. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
  143. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
  149. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
  154. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
  155. flowfile_core/__init__.py +13 -3
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +8 -6
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +123 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/code_generator.py +391 -279
  177. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  178. flowfile_core/flowfile/connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  180. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  181. flowfile_core/flowfile/extensions.py +17 -12
  182. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  183. flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
  184. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  190. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
  191. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  192. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  193. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
  194. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  195. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  196. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  197. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
  201. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  202. flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
  203. flowfile_core/flowfile/flow_graph.py +1011 -561
  204. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  205. flowfile_core/flowfile/flow_node/flow_node.py +332 -232
  206. flowfile_core/flowfile/flow_node/models.py +54 -41
  207. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  208. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  209. flowfile_core/flowfile/handler.py +82 -32
  210. flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
  211. flowfile_core/flowfile/manage/io_flowfile.py +391 -0
  212. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  213. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  214. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  215. flowfile_core/flowfile/node_designer/ui_components.py +136 -35
  216. flowfile_core/flowfile/schema_callbacks.py +77 -54
  217. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  218. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  219. flowfile_core/flowfile/setting_generator/settings.py +72 -55
  220. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  221. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  223. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  224. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  225. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  226. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  227. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  228. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  229. flowfile_core/flowfile/util/node_skipper.py +4 -4
  230. flowfile_core/flowfile/utils.py +19 -21
  231. flowfile_core/main.py +26 -19
  232. flowfile_core/routes/auth.py +284 -11
  233. flowfile_core/routes/cloud_connections.py +25 -25
  234. flowfile_core/routes/logs.py +21 -29
  235. flowfile_core/routes/public.py +3 -3
  236. flowfile_core/routes/routes.py +77 -43
  237. flowfile_core/routes/secrets.py +25 -27
  238. flowfile_core/routes/user_defined_components.py +483 -4
  239. flowfile_core/run_lock.py +0 -1
  240. flowfile_core/schemas/__init__.py +4 -6
  241. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  242. flowfile_core/schemas/cloud_storage_schemas.py +59 -55
  243. flowfile_core/schemas/input_schema.py +398 -154
  244. flowfile_core/schemas/output_model.py +50 -35
  245. flowfile_core/schemas/schemas.py +207 -67
  246. flowfile_core/schemas/transform_schema.py +1360 -435
  247. flowfile_core/schemas/yaml_types.py +117 -0
  248. flowfile_core/secret_manager/secret_manager.py +17 -13
  249. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
  250. flowfile_core/utils/arrow_reader.py +7 -6
  251. flowfile_core/utils/excel_file_manager.py +3 -3
  252. flowfile_core/utils/fileManager.py +7 -7
  253. flowfile_core/utils/fl_executor.py +8 -10
  254. flowfile_core/utils/utils.py +4 -4
  255. flowfile_core/utils/validate_setup.py +5 -4
  256. flowfile_frame/__init__.py +107 -50
  257. flowfile_frame/adapters.py +2 -9
  258. flowfile_frame/adding_expr.py +73 -32
  259. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  260. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  261. flowfile_frame/config.py +2 -5
  262. flowfile_frame/expr.py +311 -218
  263. flowfile_frame/expr.pyi +160 -159
  264. flowfile_frame/expr_name.py +23 -23
  265. flowfile_frame/flow_frame.py +581 -489
  266. flowfile_frame/flow_frame.pyi +123 -104
  267. flowfile_frame/flow_frame_methods.py +236 -252
  268. flowfile_frame/group_frame.py +50 -20
  269. flowfile_frame/join.py +2 -2
  270. flowfile_frame/lazy.py +129 -87
  271. flowfile_frame/lazy_methods.py +83 -30
  272. flowfile_frame/list_name_space.py +55 -50
  273. flowfile_frame/selectors.py +148 -68
  274. flowfile_frame/series.py +9 -7
  275. flowfile_frame/utils.py +19 -21
  276. flowfile_worker/__init__.py +12 -4
  277. flowfile_worker/configs.py +11 -19
  278. flowfile_worker/create/__init__.py +14 -27
  279. flowfile_worker/create/funcs.py +143 -94
  280. flowfile_worker/create/models.py +139 -68
  281. flowfile_worker/create/pl_types.py +14 -15
  282. flowfile_worker/create/read_excel_tables.py +34 -41
  283. flowfile_worker/create/utils.py +22 -19
  284. flowfile_worker/external_sources/s3_source/main.py +18 -51
  285. flowfile_worker/external_sources/s3_source/models.py +34 -27
  286. flowfile_worker/external_sources/sql_source/main.py +8 -5
  287. flowfile_worker/external_sources/sql_source/models.py +13 -9
  288. flowfile_worker/flow_logger.py +10 -8
  289. flowfile_worker/funcs.py +214 -155
  290. flowfile_worker/main.py +11 -17
  291. flowfile_worker/models.py +35 -28
  292. flowfile_worker/process_manager.py +2 -3
  293. flowfile_worker/routes.py +121 -93
  294. flowfile_worker/secrets.py +9 -6
  295. flowfile_worker/spawner.py +80 -49
  296. flowfile_worker/utils.py +3 -2
  297. shared/__init__.py +2 -7
  298. shared/storage_config.py +25 -13
  299. test_utils/postgres/commands.py +3 -2
  300. test_utils/postgres/fixtures.py +9 -9
  301. test_utils/s3/commands.py +1 -1
  302. test_utils/s3/data_generator.py +3 -4
  303. test_utils/s3/demo_data_generator.py +4 -7
  304. test_utils/s3/fixtures.py +7 -5
  305. tools/migrate/README.md +56 -0
  306. tools/migrate/__init__.py +12 -0
  307. tools/migrate/__main__.py +118 -0
  308. tools/migrate/legacy_schemas.py +682 -0
  309. tools/migrate/migrate.py +610 -0
  310. tools/migrate/tests/__init__.py +0 -0
  311. tools/migrate/tests/conftest.py +21 -0
  312. tools/migrate/tests/test_migrate.py +622 -0
  313. tools/migrate/tests/test_migration_e2e.py +1009 -0
  314. tools/migrate/tests/test_node_migrations.py +843 -0
  315. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  316. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  317. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  318. flowfile/web/static/assets/Filter-812dcbca.js +0 -164
  319. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  320. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  321. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  322. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  323. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  324. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  325. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  326. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  327. flowfile/web/static/assets/secretApi-538058f3.js +0 -46
  328. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  329. flowfile-0.4.1.dist-info/RECORD +0 -376
  330. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  331. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
  332. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,15 +1,13 @@
1
- from typing import List, Dict, Optional, Set, Tuple
2
1
  import polars as pl
3
-
4
2
  from pl_fuzzy_frame_match.models import FuzzyMapping
5
3
 
6
- from flowfile_core.flowfile.flow_graph import FlowGraph
4
+ from flowfile_core.configs import logger
7
5
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
8
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
7
+ from flowfile_core.flowfile.flow_graph import FlowGraph
9
8
  from flowfile_core.flowfile.flow_node.flow_node import FlowNode
10
9
  from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
11
10
  from flowfile_core.schemas import input_schema, transform_schema
12
- from flowfile_core.configs import logger
13
11
 
14
12
 
15
13
  class FlowGraphToPolarsConverter:
@@ -19,18 +17,19 @@ class FlowGraphToPolarsConverter:
19
17
  This class takes a FlowGraph instance and generates standalone Python code
20
18
  that uses only Polars, without any Flowfile dependencies.
21
19
  """
20
+
22
21
  flow_graph: FlowGraph
23
- node_var_mapping: Dict[int, str]
24
- imports: Set[str]
25
- code_lines: List[str]
26
- output_nodes: List[Tuple[int, str]] = []
27
- last_node_var: Optional[str] = None
22
+ node_var_mapping: dict[int, str]
23
+ imports: set[str]
24
+ code_lines: list[str]
25
+ output_nodes: list[tuple[int, str]] = []
26
+ last_node_var: str | None = None
28
27
 
29
28
  def __init__(self, flow_graph: FlowGraph):
30
29
  self.flow_graph = flow_graph
31
- self.node_var_mapping: Dict[int, str] = {} # Maps node_id to variable name
32
- self.imports: Set[str] = {"import polars as pl"}
33
- self.code_lines: List[str] = []
30
+ self.node_var_mapping: dict[int, str] = {} # Maps node_id to variable name
31
+ self.imports: set[str] = {"import polars as pl"}
32
+ self.code_lines: list[str] = []
34
33
  self.output_nodes = []
35
34
  self.last_node_var = None
36
35
 
@@ -44,7 +43,7 @@ class FlowGraphToPolarsConverter:
44
43
  # Get execution order
45
44
  execution_order = determine_execution_order(
46
45
  all_nodes=[node for node in self.flow_graph.nodes if node.is_correct],
47
- flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes()
46
+ flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes(),
48
47
  )
49
48
 
50
49
  # Generate code for each node in order
@@ -56,14 +55,13 @@ class FlowGraphToPolarsConverter:
56
55
 
57
56
  def handle_output_node(self, node: FlowNode, var_name: str) -> None:
58
57
  settings = node.setting_input
59
- if hasattr(settings, 'is_flow_output') and settings.is_flow_output:
58
+ if hasattr(settings, "is_flow_output") and settings.is_flow_output:
60
59
  self.output_nodes.append((node.node_id, var_name))
61
60
 
62
61
  def _generate_node_code(self, node: FlowNode) -> None:
63
62
  """Generate Polars code for a specific node."""
64
63
  node_type = node.node_type
65
64
  settings = node.setting_input
66
- # Skip placeholder nodes
67
65
  if isinstance(settings, input_schema.NodePromise):
68
66
  self._add_comment(f"# Skipping uninitialized node: {node.node_id}")
69
67
  return
@@ -71,7 +69,7 @@ class FlowGraphToPolarsConverter:
71
69
  var_name = f"df_{node.node_id}"
72
70
  self.node_var_mapping[node.node_id] = var_name
73
71
  self.handle_output_node(node, var_name)
74
- if node.node_template.output>0:
72
+ if node.node_template.output > 0:
75
73
  self.last_node_var = var_name
76
74
  # Get input variable names
77
75
  input_vars = self._get_input_vars(node)
@@ -83,67 +81,59 @@ class FlowGraphToPolarsConverter:
83
81
  self._add_comment(f"# TODO: Implement handler for node type: {node_type}")
84
82
  raise Exception(f"No handler implemented for node type: {node_type}")
85
83
 
86
- def _get_input_vars(self, node: FlowNode) -> Dict[str, str]:
84
+ def _get_input_vars(self, node: FlowNode) -> dict[str, str]:
87
85
  """Get input variable names for a node."""
88
86
  input_vars = {}
89
87
 
90
88
  if node.node_inputs.main_inputs:
91
89
  if len(node.node_inputs.main_inputs) == 1:
92
- input_vars['main'] = self.node_var_mapping.get(
93
- node.node_inputs.main_inputs[0].node_id, 'df'
94
- )
90
+ input_vars["main"] = self.node_var_mapping.get(node.node_inputs.main_inputs[0].node_id, "df")
95
91
  else:
96
92
  for i, input_node in enumerate(node.node_inputs.main_inputs):
97
- input_vars[f'main_{i}'] = self.node_var_mapping.get(
98
- input_node.node_id, f'df_{i}'
99
- )
93
+ input_vars[f"main_{i}"] = self.node_var_mapping.get(input_node.node_id, f"df_{i}")
100
94
 
101
95
  if node.node_inputs.left_input:
102
- input_vars['left'] = self.node_var_mapping.get(
103
- node.node_inputs.left_input.node_id, 'df_left'
104
- )
96
+ input_vars["left"] = self.node_var_mapping.get(node.node_inputs.left_input.node_id, "df_left")
105
97
 
106
98
  if node.node_inputs.right_input:
107
- input_vars['right'] = self.node_var_mapping.get(
108
- node.node_inputs.right_input.node_id, 'df_right'
109
- )
99
+ input_vars["right"] = self.node_var_mapping.get(node.node_inputs.right_input.node_id, "df_right")
110
100
 
111
101
  return input_vars
112
102
 
113
103
  def _handle_csv_read(self, file_settings: input_schema.ReceivedTable, var_name: str):
114
- if file_settings.encoding.lower() in ('utf-8', 'utf8'):
104
+ if file_settings.table_settings.encoding.lower() in ("utf-8", "utf8"):
115
105
  encoding = "utf8-lossy"
116
106
  self._add_code(f"{var_name} = pl.scan_csv(")
117
107
  self._add_code(f' "{file_settings.abs_file_path}",')
118
- self._add_code(f' separator="{file_settings.delimiter}",')
119
- self._add_code(f' has_header={file_settings.has_headers},')
120
- self._add_code(f' ignore_errors={file_settings.ignore_errors},')
108
+ self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
109
+ self._add_code(f" has_header={file_settings.table_settings.has_headers},")
110
+ self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
121
111
  self._add_code(f' encoding="{encoding}",')
122
- self._add_code(f' skip_rows={file_settings.starting_from_line},')
112
+ self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
123
113
  self._add_code(")")
124
114
  else:
125
115
  self._add_code(f"{var_name} = pl.read_csv(")
126
116
  self._add_code(f' "{file_settings.abs_file_path}",')
127
- self._add_code(f' separator="{file_settings.delimiter}",')
128
- self._add_code(f' has_header={file_settings.has_headers},')
129
- self._add_code(f' ignore_errors={file_settings.ignore_errors},')
130
- if file_settings.encoding:
131
- self._add_code(f' encoding="{file_settings.encoding}",')
132
- self._add_code(f' skip_rows={file_settings.starting_from_line},')
117
+ self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
118
+ self._add_code(f" has_header={file_settings.table_settings.has_headers},")
119
+ self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
120
+ if file_settings.table_settings.encoding:
121
+ self._add_code(f' encoding="{file_settings.table_settings.encoding}",')
122
+ self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
133
123
  self._add_code(").lazy()")
134
124
 
135
- def _handle_cloud_storage_reader(self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: Dict[str, str]):
125
+ def _handle_cloud_storage_reader(
126
+ self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: dict[str, str]
127
+ ):
136
128
  cloud_read_settings = settings.cloud_storage_settings
137
- self.imports.add(
138
- "import flowfile as ff"
139
- )
129
+ self.imports.add("import flowfile as ff")
140
130
  if cloud_read_settings.file_format == "csv":
141
131
  self._add_code(f"{var_name} = ff.scan_csv_from_cloud_storage(")
142
132
  self._add_code(f' "{cloud_read_settings.resource_path}",')
143
133
  self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
144
134
  self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
145
135
  self._add_code(f' delimiter="{cloud_read_settings.csv_delimiter}",')
146
- self._add_code(f' has_header={cloud_read_settings.csv_has_header},')
136
+ self._add_code(f" has_header={cloud_read_settings.csv_has_header},")
147
137
  self._add_code(f' encoding="{cloud_read_settings.csv_encoding}",')
148
138
 
149
139
  elif cloud_read_settings.file_format == "parquet":
@@ -163,37 +153,43 @@ class FlowGraphToPolarsConverter:
163
153
  self._add_code(f' "{cloud_read_settings.resource_path}",')
164
154
  self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
165
155
  self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
166
- self._add_code(f' version_id={cloud_read_settings.delta_version},')
156
+ self._add_code(f" version_id={cloud_read_settings.delta_version},")
167
157
  else:
168
158
  return
169
159
  self._add_code(").data")
170
160
 
171
- def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: Dict[str, str]) -> None:
161
+ def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: dict[str, str]) -> None:
172
162
  """Handle file reading nodes."""
173
163
  file_settings = settings.received_file
174
164
 
175
- if file_settings.file_type == 'csv':
165
+ if file_settings.file_type == "csv":
176
166
  self._handle_csv_read(file_settings, var_name)
177
167
 
178
- elif file_settings.file_type == 'parquet':
168
+ elif file_settings.file_type == "parquet":
179
169
  self._add_code(f'{var_name} = pl.scan_parquet("{file_settings.abs_file_path}")')
180
170
 
181
- elif file_settings.file_type in ('xlsx', 'excel'):
171
+ elif file_settings.file_type in ("xlsx", "excel"):
182
172
  self._add_code(f"{var_name} = pl.read_excel(")
183
173
  self._add_code(f' "{file_settings.abs_file_path}",')
184
- if file_settings.sheet_name:
185
- self._add_code(f' sheet_name="{file_settings.sheet_name}",')
174
+ if file_settings.table_settings.sheet_name:
175
+ self._add_code(f' sheet_name="{file_settings.table_settings.sheet_name}",')
186
176
  self._add_code(").lazy()")
187
177
 
188
178
  self._add_code("")
189
179
 
190
180
  @staticmethod
191
- def _generate_pl_schema_with_typing(flowfile_schema: List[FlowfileColumn]) -> str:
192
- polars_schema_str = "pl.Schema([" + ", ".join(f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
193
- for flowfile_column in flowfile_schema) + "])"
181
+ def _generate_pl_schema_with_typing(flowfile_schema: list[FlowfileColumn]) -> str:
182
+ polars_schema_str = (
183
+ "pl.Schema(["
184
+ + ", ".join(
185
+ f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
186
+ for flowfile_column in flowfile_schema
187
+ )
188
+ + "])"
189
+ )
194
190
  return polars_schema_str
195
191
 
196
- def get_manual_schema_input(self, flowfile_schema: List[FlowfileColumn]) -> str:
192
+ def get_manual_schema_input(self, flowfile_schema: list[FlowfileColumn]) -> str:
197
193
  polars_schema_str = self._generate_pl_schema_with_typing(flowfile_schema)
198
194
  is_valid_pl_schema = self._validate_pl_schema(polars_schema_str)
199
195
  if is_valid_pl_schema:
@@ -211,19 +207,23 @@ class FlowGraphToPolarsConverter:
211
207
  logger.error(f"Invalid Polars schema: {e}")
212
208
  return False
213
209
 
214
- def _handle_manual_input(self, settings: input_schema.NodeManualInput, var_name: str, input_vars: Dict[str, str]) -> None:
210
+ def _handle_manual_input(
211
+ self, settings: input_schema.NodeManualInput, var_name: str, input_vars: dict[str, str]
212
+ ) -> None:
215
213
  """Handle manual data input nodes."""
216
214
  data = settings.raw_data_format.data
217
- flowfile_schema = list(FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns)
215
+ flowfile_schema = list(
216
+ FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns
217
+ )
218
218
  schema = self.get_manual_schema_input(flowfile_schema)
219
219
  self._add_code(f"{var_name} = pl.LazyFrame({data}, schema={schema}, strict=False)")
220
220
  self._add_code("")
221
221
 
222
- def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: Dict[str, str]) -> None:
222
+ def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: dict[str, str]) -> None:
223
223
  """Handle filter nodes."""
224
- input_df = input_vars.get('main', 'df')
224
+ input_df = input_vars.get("main", "df")
225
225
 
226
- if settings.filter_input.filter_type == 'advanced':
226
+ if settings.filter_input.is_advanced():
227
227
  # Parse the advanced filter expression
228
228
  self.imports.add(
229
229
  "from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr"
@@ -234,28 +234,33 @@ class FlowGraphToPolarsConverter:
234
234
  else:
235
235
  # Handle basic filter
236
236
  basic = settings.filter_input.basic_filter
237
- filter_expr = self._create_basic_filter_expr(basic)
238
- self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
237
+ if basic is not None:
238
+ filter_expr = self._create_basic_filter_expr(basic)
239
+ self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
240
+ else:
241
+ self._add_code(f"{var_name} = {input_df} # No filter applied")
239
242
  self._add_code("")
240
243
 
241
- def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: Dict[str, str]):
242
- input_df = input_vars.get('main', 'df')
244
+ def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: dict[str, str]):
245
+ input_df = input_vars.get("main", "df")
243
246
  self._add_code(f"{var_name} = {input_df}.select(pl.len().alias('number_of_records'))")
244
247
 
245
- def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: Dict[str, str]):
246
- input_df = input_vars.get('main', 'df')
248
+ def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: dict[str, str]):
249
+ input_df = input_vars.get("main", "df")
247
250
  from_col_name = settings.graph_solver_input.col_from
248
251
  to_col_name = settings.graph_solver_input.col_to
249
252
  output_col_name = settings.graph_solver_input.output_column_name
250
- self._add_code(f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
251
- f'pl.col("{to_col_name}"))'
252
- f'.alias("{output_col_name}"))')
253
+ self._add_code(
254
+ f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
255
+ f'pl.col("{to_col_name}"))'
256
+ f'.alias("{output_col_name}"))'
257
+ )
253
258
  self._add_code("")
254
259
  self.imports.add("from polars_grouper import graph_solver")
255
260
 
256
- def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: Dict[str, str]) -> None:
261
+ def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: dict[str, str]) -> None:
257
262
  """Handle select/rename nodes."""
258
- input_df = input_vars.get('main', 'df')
263
+ input_df = input_vars.get("main", "df")
259
264
  # Get columns to keep and renames
260
265
  select_exprs = []
261
266
  for select_input in settings.select_input:
@@ -267,7 +272,7 @@ class FlowGraphToPolarsConverter:
267
272
 
268
273
  if (select_input.data_type_change or select_input.is_altered) and select_input.data_type:
269
274
  polars_dtype = self._get_polars_dtype(select_input.data_type)
270
- expr = f'{expr}.cast({polars_dtype})'
275
+ expr = f"{expr}.cast({polars_dtype})"
271
276
 
272
277
  select_exprs.append(expr)
273
278
 
@@ -280,7 +285,7 @@ class FlowGraphToPolarsConverter:
280
285
  self._add_code(f"{var_name} = {input_df}")
281
286
  self._add_code("")
282
287
 
283
- def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: Dict[str, str]) -> None:
288
+ def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: dict[str, str]) -> None:
284
289
  """Handle join nodes by routing to appropriate join type handler.
285
290
 
286
291
  This is the main entry point for processing join operations. It determines
@@ -294,9 +299,8 @@ class FlowGraphToPolarsConverter:
294
299
  Returns:
295
300
  None: Modifies internal state by adding generated code
296
301
  """
297
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
298
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
299
-
302
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
303
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
300
304
  # Ensure left and right DataFrames are distinct
301
305
  if left_df == right_df:
302
306
  right_df = "df_right"
@@ -307,8 +311,9 @@ class FlowGraphToPolarsConverter:
307
311
  else:
308
312
  self._handle_standard_join(settings, var_name, left_df, right_df)
309
313
 
310
- def _handle_semi_anti_join(self, settings: input_schema.NodeJoin, var_name: str, left_df: str,
311
- right_df: str) -> None:
314
+ def _handle_semi_anti_join(
315
+ self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
316
+ ) -> None:
312
317
  """Handle semi and anti joins which only return rows from the left DataFrame.
313
318
 
314
319
  Semi joins return rows from left DataFrame that have matches in right.
@@ -335,8 +340,9 @@ class FlowGraphToPolarsConverter:
335
340
  self._add_code(" )")
336
341
  self._add_code(")")
337
342
 
338
- def _handle_standard_join(self, settings: input_schema.NodeJoin, var_name: str, left_df: str,
339
- right_df: str) -> None:
343
+ def _handle_standard_join(
344
+ self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
345
+ ) -> None:
340
346
  """Handle standard joins (left, right, inner, outer) with full column management.
341
347
 
342
348
  Standard joins may include columns from both DataFrames and require careful
@@ -359,26 +365,24 @@ class FlowGraphToPolarsConverter:
359
365
  Returns:
360
366
  None: Modifies internal state by adding generated code
361
367
  """
362
- settings.join_input.auto_rename()
363
-
368
+ join_input_manager = transform_schema.JoinInputManager(settings.join_input)
369
+ join_input_manager.auto_rename()
364
370
  # Get join keys
365
- left_on, right_on = self._get_join_keys(settings)
371
+ left_on, right_on = self._get_join_keys(join_input_manager)
366
372
 
367
373
  # Apply pre-join transformations
368
- left_df, right_df = self._apply_pre_join_transformations(settings, left_df, right_df)
369
-
374
+ left_df, right_df = self._apply_pre_join_transformations(join_input_manager, left_df, right_df)
370
375
  # Handle join-specific key transformations
371
376
  left_on, right_on, reverse_action, after_join_drop_cols = self._handle_join_key_transformations(
372
- settings, left_df, right_df, left_on, right_on
377
+ join_input_manager, left_df, right_df, left_on, right_on
373
378
  )
374
-
375
379
  # Execute the join
376
380
  self._execute_join_with_post_processing(
377
- settings, var_name, left_df, right_df, left_on, right_on,
378
- after_join_drop_cols, reverse_action
381
+ settings, var_name, left_df, right_df, left_on, right_on, after_join_drop_cols, reverse_action
379
382
  )
380
383
 
381
- def _get_join_keys(self, settings: input_schema.NodeJoin) -> Tuple[List[str], List[str]]:
384
+ @staticmethod
385
+ def _get_join_keys(settings: transform_schema.JoinInputManager) -> tuple[list[str], list[str]]:
382
386
  """Extract join keys based on join type.
383
387
 
384
388
  Different join types require different handling of join keys:
@@ -391,17 +395,18 @@ class FlowGraphToPolarsConverter:
391
395
  Returns:
392
396
  Tuple[List[str], List[str]]: Lists of (left_on, right_on) column names
393
397
  """
394
- left_on = [jm.left_col for jm in settings.join_input.get_names_for_table_rename()]
398
+ left_on = [jm.left_col for jm in settings.get_names_for_table_rename()]
395
399
 
396
- if settings.join_input.how in ("outer", "right"):
397
- right_on = [jm.right_col for jm in settings.join_input.get_names_for_table_rename()]
400
+ if settings.how in ("outer", "right"):
401
+ right_on = [jm.right_col for jm in settings.get_names_for_table_rename()]
398
402
  else:
399
- right_on = [jm.right_col for jm in settings.join_input.join_mapping]
403
+ right_on = [jm.right_col for jm in settings.join_mapping]
400
404
 
401
405
  return left_on, right_on
402
406
 
403
- def _apply_pre_join_transformations(self, settings: input_schema.NodeJoin, left_df: str, right_df: str) -> Tuple[
404
- str, str]:
407
+ def _apply_pre_join_transformations(
408
+ self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str
409
+ ) -> tuple[str, str]:
405
410
  """Apply column renames and drops before the join operation.
406
411
 
407
412
  Pre-join transformations prepare DataFrames by:
@@ -421,25 +426,22 @@ class FlowGraphToPolarsConverter:
421
426
  # Calculate renames and drops
422
427
  right_renames = {
423
428
  column.old_name: column.new_name
424
- for column in settings.join_input.right_select.renames
425
- if
426
- column.old_name != column.new_name and not column.join_key or settings.join_input.how in ("outer", "right")
429
+ for column in settings.right_select.renames
430
+ if column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
427
431
  }
428
432
 
429
433
  left_renames = {
430
434
  column.old_name: column.new_name
431
- for column in settings.join_input.left_select.renames
435
+ for column in settings.left_select.renames
432
436
  if column.old_name != column.new_name
433
437
  }
434
438
 
435
439
  left_drop_columns = [
436
- column.old_name for column in settings.join_input.left_select.renames
437
- if not column.keep and not column.join_key
440
+ column.old_name for column in settings.left_select.renames if not column.keep and not column.join_key
438
441
  ]
439
442
 
440
443
  right_drop_columns = [
441
- column.old_name for column in settings.join_input.right_select.renames
442
- if not column.keep and not column.join_key
444
+ column.old_name for column in settings.right_select.renames if not column.keep and not column.join_key
443
445
  ]
444
446
 
445
447
  # Apply transformations
@@ -454,9 +456,14 @@ class FlowGraphToPolarsConverter:
454
456
 
455
457
  return left_df, right_df
456
458
 
457
- def _handle_join_key_transformations(self, settings: input_schema.NodeJoin, left_df: str, right_df: str,
458
- left_on: List[str], right_on: List[str]) \
459
- -> Tuple[List[str], List[str], Optional[Dict], List[str]]:
459
+ def _handle_join_key_transformations(
460
+ self,
461
+ settings: transform_schema.JoinInputManager,
462
+ left_df: str,
463
+ right_df: str,
464
+ left_on: list[str],
465
+ right_on: list[str],
466
+ ) -> tuple[list[str], list[str], dict | None, list[str]]:
460
467
  """Route to appropriate join-specific key transformation handler.
461
468
 
462
469
  Different join types require different strategies for handling join keys
@@ -476,7 +483,7 @@ class FlowGraphToPolarsConverter:
476
483
  - reverse_action: Dictionary for renaming columns after join (or None)
477
484
  - after_join_drop_cols: List of columns to drop after join
478
485
  """
479
- join_type = settings.join_input.how
486
+ join_type = settings.how
480
487
 
481
488
  if join_type in ("left", "inner"):
482
489
  return self._handle_left_inner_join_keys(settings, right_df, left_on, right_on)
@@ -487,9 +494,9 @@ class FlowGraphToPolarsConverter:
487
494
  else:
488
495
  return left_on, right_on, None, []
489
496
 
490
- def _handle_left_inner_join_keys(self, settings: input_schema.NodeJoin, right_df: str,
491
- left_on: List[str], right_on: List[str]) -> Tuple[
492
- List[str], List[str], Dict, List[str]]:
497
+ def _handle_left_inner_join_keys(
498
+ self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
499
+ ) -> tuple[list[str], list[str], dict, list[str]]:
493
500
  """Handle key transformations for left and inner joins.
494
501
 
495
502
  For left/inner joins:
@@ -510,31 +517,29 @@ class FlowGraphToPolarsConverter:
510
517
  - reverse_action: Mapping to rename __DROP__ columns after join
511
518
  - after_join_drop_cols: Left join keys marked for dropping
512
519
  """
513
- left_join_keys_to_keep = [jk.new_name for jk in settings.join_input.left_select.join_key_selects if jk.keep]
514
-
520
+ left_join_keys_to_keep = [jk.new_name for jk in settings.left_select.join_key_selects if jk.keep]
515
521
  join_key_duplication_command = [
516
522
  f'pl.col("{rjk.old_name}").alias("__DROP__{rjk.new_name}__DROP__")'
517
- for rjk in settings.join_input.right_select.join_key_selects if rjk.keep
523
+ for rjk in settings.right_select.join_key_selects
524
+ if rjk.keep
518
525
  ]
519
526
 
520
527
  reverse_action = {
521
528
  f"__DROP__{rjk.new_name}__DROP__": rjk.new_name
522
- for rjk in settings.join_input.right_select.join_key_selects if rjk.keep
529
+ for rjk in settings.right_select.join_key_selects
530
+ if rjk.keep
523
531
  }
524
532
 
525
533
  if join_key_duplication_command:
526
534
  self._add_code(f"{right_df} = {right_df}.with_columns([{', '.join(join_key_duplication_command)}])")
527
535
 
528
- after_join_drop_cols = [
529
- k.new_name for k in settings.join_input.left_select.join_key_selects
530
- if not k.keep
531
- ]
536
+ after_join_drop_cols = [k.new_name for k in settings.left_select.join_key_selects if not k.keep]
532
537
 
533
538
  return left_on, right_on, reverse_action, after_join_drop_cols
534
539
 
535
- def _handle_right_join_keys(self, settings: input_schema.NodeJoin, left_df: str,
536
- left_on: List[str], right_on: List[str]) -> Tuple[
537
- List[str], List[str], None, List[str]]:
540
+ def _handle_right_join_keys(
541
+ self, settings: transform_schema.JoinInputManager, left_df: str, left_on: list[str], right_on: list[str]
542
+ ) -> tuple[list[str], list[str], None, list[str]]:
538
543
  """Handle key transformations for right joins.
539
544
 
540
545
  For right joins:
@@ -557,12 +562,13 @@ class FlowGraphToPolarsConverter:
557
562
  """
558
563
  join_key_duplication_command = [
559
564
  f'pl.col("{ljk.new_name}").alias("__jk_{ljk.new_name}")'
560
- for ljk in settings.join_input.left_select.join_key_selects if ljk.keep
565
+ for ljk in settings.left_select.join_key_selects
566
+ if ljk.keep
561
567
  ]
562
568
 
563
569
  # Update left_on keys
564
570
  for position, left_on_key in enumerate(left_on):
565
- left_on_select = settings.join_input.left_select.get_select_input_on_new_name(left_on_key)
571
+ left_on_select = settings.left_select.get_select_input_on_new_name(left_on_key)
566
572
  if left_on_select and left_on_select.keep:
567
573
  left_on[position] = f"__jk_{left_on_select.new_name}"
568
574
 
@@ -570,18 +576,18 @@ class FlowGraphToPolarsConverter:
570
576
  self._add_code(f"{left_df} = {left_df}.with_columns([{', '.join(join_key_duplication_command)}])")
571
577
 
572
578
  # Calculate columns to drop after join
573
- left_join_keys_keep = {jk.new_name for jk in settings.join_input.left_select.join_key_selects if jk.keep}
579
+ left_join_keys_keep = {jk.new_name for jk in settings.left_select.join_key_selects if jk.keep}
574
580
  after_join_drop_cols_right = [
575
581
  jk.new_name if jk.new_name not in left_join_keys_keep else jk.new_name + "_right"
576
- for jk in settings.join_input.right_select.join_key_selects if not jk.keep
582
+ for jk in settings.right_select.join_key_selects
583
+ if not jk.keep
577
584
  ]
578
585
  after_join_drop_cols = list(set(after_join_drop_cols_right))
579
-
580
586
  return left_on, right_on, None, after_join_drop_cols
581
587
 
582
- def _handle_outer_join_keys(self, settings: input_schema.NodeJoin, right_df: str,
583
- left_on: List[str], right_on: List[str]) -> Tuple[
584
- List[str], List[str], Dict, List[str]]:
588
+ def _handle_outer_join_keys(
589
+ self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
590
+ ) -> tuple[list[str], list[str], dict, list[str]]:
585
591
  """Handle key transformations for outer joins.
586
592
 
587
593
  For outer joins:
@@ -602,21 +608,17 @@ class FlowGraphToPolarsConverter:
602
608
  - reverse_action: Mapping to remove __jk_ prefix after join
603
609
  - after_join_drop_cols: Combined list of columns to drop from both sides
604
610
  """
605
- left_join_keys = {jk.new_name for jk in settings.join_input.left_select.join_key_selects}
611
+ left_join_keys = {jk.new_name for jk in settings.left_select.join_key_selects}
606
612
 
607
613
  join_keys_to_keep_and_rename = [
608
- rjk for rjk in settings.join_input.right_select.join_key_selects
609
- if rjk.keep and rjk.new_name in left_join_keys
614
+ rjk for rjk in settings.right_select.join_key_selects if rjk.keep and rjk.new_name in left_join_keys
610
615
  ]
611
616
 
612
- join_key_rename_command = {
613
- rjk.new_name: f"__jk_{rjk.new_name}"
614
- for rjk in join_keys_to_keep_and_rename
615
- }
617
+ join_key_rename_command = {rjk.new_name: f"__jk_{rjk.new_name}" for rjk in join_keys_to_keep_and_rename}
616
618
 
617
619
  # Update right_on keys
618
620
  for position, right_on_key in enumerate(right_on):
619
- right_on_select = settings.join_input.right_select.get_select_input_on_new_name(right_on_key)
621
+ right_on_select = settings.right_select.get_select_input_on_new_name(right_on_key)
620
622
  if right_on_select and right_on_select.keep and right_on_select.new_name in left_join_keys:
621
623
  right_on[position] = f"__jk_{right_on_select.new_name}"
622
624
 
@@ -626,20 +628,27 @@ class FlowGraphToPolarsConverter:
626
628
  reverse_action = {f"__jk_{rjk.new_name}": rjk.new_name for rjk in join_keys_to_keep_and_rename}
627
629
 
628
630
  # Calculate columns to drop after join
629
- after_join_drop_cols_left = [
630
- jk.new_name for jk in settings.join_input.left_select.join_key_selects if not jk.keep
631
- ]
631
+ after_join_drop_cols_left = [jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep]
632
632
  after_join_drop_cols_right = [
633
633
  jk.new_name if jk.new_name not in left_join_keys else jk.new_name + "_right"
634
- for jk in settings.join_input.right_select.join_key_selects if not jk.keep
634
+ for jk in settings.right_select.join_key_selects
635
+ if not jk.keep
635
636
  ]
636
637
  after_join_drop_cols = after_join_drop_cols_left + after_join_drop_cols_right
637
638
 
638
639
  return left_on, right_on, reverse_action, after_join_drop_cols
639
640
 
640
- def _execute_join_with_post_processing(self, settings: input_schema.NodeJoin, var_name: str,
641
- left_df: str, right_df: str, left_on: List[str], right_on: List[str],
642
- after_join_drop_cols: List[str], reverse_action: Optional[Dict]) -> None:
641
+ def _execute_join_with_post_processing(
642
+ self,
643
+ settings: input_schema.NodeJoin,
644
+ var_name: str,
645
+ left_df: str,
646
+ right_df: str,
647
+ left_on: list[str],
648
+ right_on: list[str],
649
+ after_join_drop_cols: list[str],
650
+ reverse_action: dict | None,
651
+ ) -> None:
643
652
  """Execute the join operation and apply post-processing steps.
644
653
 
645
654
  Generates the actual join code with any necessary post-processing:
@@ -670,7 +679,7 @@ class FlowGraphToPolarsConverter:
670
679
  self._add_code(" )")
671
680
 
672
681
  # Handle right join special case
673
- if settings.join_input.how == 'right':
682
+ if settings.join_input.how == "right":
674
683
  self._add_code(".collect()") # Right join needs to be collected first cause of issue with rename
675
684
 
676
685
  # Apply post-join transformations
@@ -681,21 +690,21 @@ class FlowGraphToPolarsConverter:
681
690
  self._add_code(f".rename({reverse_action})")
682
691
 
683
692
  # Convert back to lazy for right joins
684
- if settings.join_input.how == 'right':
685
- self._add_code(f".lazy()")
693
+ if settings.join_input.how == "right":
694
+ self._add_code(".lazy()")
686
695
 
687
696
  self._add_code(")")
688
697
 
689
- def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: Dict[str, str]) -> None:
698
+ def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: dict[str, str]) -> None:
690
699
  """Handle group by nodes."""
691
- input_df = input_vars.get('main', 'df')
700
+ input_df = input_vars.get("main", "df")
692
701
 
693
702
  # Separate groupby columns from aggregation columns
694
703
  group_cols = []
695
704
  agg_exprs = []
696
705
 
697
706
  for agg_col in settings.groupby_input.agg_cols:
698
- if agg_col.agg == 'groupby':
707
+ if agg_col.agg == "groupby":
699
708
  group_cols.append(agg_col.old_name)
700
709
  else:
701
710
  agg_func = self._get_agg_function(agg_col.agg)
@@ -708,9 +717,9 @@ class FlowGraphToPolarsConverter:
708
717
  self._add_code("])")
709
718
  self._add_code("")
710
719
 
711
- def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: Dict[str, str]) -> None:
720
+ def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: dict[str, str]) -> None:
712
721
  """Handle formula/expression nodes."""
713
- input_df = input_vars.get('main', 'df')
722
+ input_df = input_vars.get("main", "df")
714
723
  self.imports.add("from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr")
715
724
 
716
725
  # Convert SQL-like formula to Polars expression
@@ -718,11 +727,11 @@ class FlowGraphToPolarsConverter:
718
727
  col_name = settings.function.field.name
719
728
  self._add_code(f"{var_name} = {input_df}.with_columns([")
720
729
  self._add_code(f'simple_function_to_expr({repr(formula)}).alias("{col_name}")')
721
- if settings.function.field.data_type not in (None, "Auto"):
730
+ if settings.function.field.data_type not in (None, transform_schema.AUTO_DATA_TYPE):
722
731
  output_type = convert_pl_type_to_string(cast_str_to_polars_type(settings.function.field.data_type))
723
732
  if output_type[:3] != "pl.":
724
733
  output_type = "pl." + output_type
725
- self._add_code(f' .cast({output_type})')
734
+ self._add_code(f" .cast({output_type})")
726
735
 
727
736
  self._add_code("])")
728
737
  self._add_code("")
@@ -730,11 +739,11 @@ class FlowGraphToPolarsConverter:
730
739
  def _handle_pivot_no_index(self, settings: input_schema.NodePivot, var_name: str, input_df: str, agg_func: str):
731
740
  pivot_input = settings.pivot_input
732
741
 
733
- self._add_code(f'{var_name} = ({input_df}.collect()')
742
+ self._add_code(f"{var_name} = ({input_df}.collect()")
734
743
  self._add_code(' .with_columns(pl.lit(1).alias("__temp_index__"))')
735
- self._add_code(' .pivot(')
744
+ self._add_code(" .pivot(")
736
745
  self._add_code(f' values="{pivot_input.value_col}",')
737
- self._add_code(f' index=["__temp_index__"],')
746
+ self._add_code(' index=["__temp_index__"],')
738
747
  self._add_code(f' columns="{pivot_input.pivot_column}",')
739
748
  self._add_code(f' aggregate_function="{agg_func}"')
740
749
  self._add_code(" )")
@@ -742,17 +751,16 @@ class FlowGraphToPolarsConverter:
742
751
  self._add_code(").lazy()")
743
752
  self._add_code("")
744
753
 
745
- def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: Dict[str, str]) -> None:
754
+ def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: dict[str, str]) -> None:
746
755
  """Handle pivot nodes."""
747
- input_df = input_vars.get('main', 'df')
756
+ input_df = input_vars.get("main", "df")
748
757
  pivot_input = settings.pivot_input
749
758
  if len(pivot_input.aggregations) > 1:
750
- logger.error("Multiple aggregations are not convertable to polars code. "
751
- "Taking the first value")
759
+ logger.error("Multiple aggregations are not convertable to polars code. " "Taking the first value")
752
760
  if len(pivot_input.aggregations) > 0:
753
761
  agg_func = pivot_input.aggregations[0]
754
762
  else:
755
- agg_func = 'first'
763
+ agg_func = "first"
756
764
  if len(settings.pivot_input.index_columns) == 0:
757
765
  self._handle_pivot_no_index(settings, var_name, input_df, agg_func)
758
766
  else:
@@ -766,9 +774,9 @@ class FlowGraphToPolarsConverter:
766
774
  self._add_code(").lazy()")
767
775
  self._add_code("")
768
776
 
769
- def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: Dict[str, str]) -> None:
777
+ def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: dict[str, str]) -> None:
770
778
  """Handle unpivot nodes."""
771
- input_df = input_vars.get('main', 'df')
779
+ input_df = input_vars.get("main", "df")
772
780
  unpivot_input = settings.unpivot_input
773
781
 
774
782
  self._add_code(f"{var_name} = {input_df}.unpivot(")
@@ -784,22 +792,22 @@ class FlowGraphToPolarsConverter:
784
792
  self._add_code(")")
785
793
  self._add_code("")
786
794
 
787
- def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: Dict[str, str]) -> None:
795
+ def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: dict[str, str]) -> None:
788
796
  """Handle union nodes."""
789
797
  # Get all input LazyFrame
790
798
  dfs = []
791
- if 'main' in input_vars:
792
- dfs.append(input_vars['main'])
799
+ if "main" in input_vars:
800
+ dfs.append(input_vars["main"])
793
801
  else:
794
802
  # Multiple main inputs
795
803
  for key, df_var in input_vars.items():
796
- if key.startswith('main'):
804
+ if key.startswith("main"):
797
805
  dfs.append(df_var)
798
806
 
799
- if settings.union_input.mode == 'relaxed':
800
- how = 'diagonal_relaxed'
807
+ if settings.union_input.mode == "relaxed":
808
+ how = "diagonal_relaxed"
801
809
  else:
802
- how = 'diagonal'
810
+ how = "diagonal"
803
811
 
804
812
  self._add_code(f"{var_name} = pl.concat([")
805
813
  for df in dfs:
@@ -807,73 +815,88 @@ class FlowGraphToPolarsConverter:
807
815
  self._add_code(f"], how='{how}')")
808
816
  self._add_code("")
809
817
 
810
- def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: Dict[str, str]) -> None:
818
+ def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: dict[str, str]) -> None:
811
819
  """Handle sort nodes."""
812
- input_df = input_vars.get('main', 'df')
820
+ input_df = input_vars.get("main", "df")
813
821
 
814
822
  sort_cols = []
815
823
  descending = []
816
824
 
817
825
  for sort_input in settings.sort_input:
818
826
  sort_cols.append(f'"{sort_input.column}"')
819
- descending.append(sort_input.how == 'desc')
827
+ descending.append(sort_input.how == "desc")
820
828
 
821
829
  self._add_code(f"{var_name} = {input_df}.sort([{', '.join(sort_cols)}], descending={descending})")
822
830
  self._add_code("")
823
831
 
824
- def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: Dict[str, str]) -> None:
832
+ def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: dict[str, str]) -> None:
825
833
  """Handle sample nodes."""
826
- input_df = input_vars.get('main', 'df')
834
+ input_df = input_vars.get("main", "df")
827
835
  self._add_code(f"{var_name} = {input_df}.head(n={settings.sample_size})")
828
836
  self._add_code("")
829
837
 
830
838
  @staticmethod
831
- def _transform_fuzzy_mappings_to_string(fuzzy_mappings: List[FuzzyMapping]) -> str:
839
+ def _transform_fuzzy_mappings_to_string(fuzzy_mappings: list[FuzzyMapping]) -> str:
832
840
  output_str = "["
833
841
  for i, fuzzy_mapping in enumerate(fuzzy_mappings):
834
-
835
- output_str += (f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
836
- f" right_col='{fuzzy_mapping.right_col}', "
837
- f"threshold_score={fuzzy_mapping.threshold_score}, "
838
- f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')")
842
+ output_str += (
843
+ f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
844
+ f" right_col='{fuzzy_mapping.right_col}', "
845
+ f"threshold_score={fuzzy_mapping.threshold_score}, "
846
+ f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')"
847
+ )
839
848
  if i < len(fuzzy_mappings) - 1:
840
849
  output_str += ",\n"
841
850
  output_str += "]"
842
851
  return output_str
843
852
 
844
- def _handle_fuzzy_match(self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: Dict[str, str]) -> None:
853
+ def _handle_fuzzy_match(
854
+ self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: dict[str, str]
855
+ ) -> None:
845
856
  """Handle fuzzy match nodes."""
846
857
  self.imports.add("from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs")
847
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
848
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
858
+ fuzzy_match_handler = transform_schema.FuzzyMatchInputManager(settings.join_input)
859
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
860
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
861
+
849
862
  if left_df == right_df:
850
863
  right_df = "df_right"
851
864
  self._add_code(f"{right_df} = {left_df}")
852
865
 
853
- if settings.join_input.left_select.has_drop_cols():
854
- self._add_code(f"{left_df} = {left_df}.drop({[c.old_name for c in settings.join_input.left_select.non_jk_drop_columns]})")
855
- if settings.join_input.right_select.has_drop_cols():
856
- self._add_code(f"{right_df} = {right_df}.drop({[c.old_name for c in settings.join_input.right_select.non_jk_drop_columns]})")
866
+ if fuzzy_match_handler.left_select.has_drop_cols():
867
+ self._add_code(
868
+ f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})"
869
+ )
870
+ if fuzzy_match_handler.right_select.has_drop_cols():
871
+ self._add_code(
872
+ f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})"
873
+ )
857
874
 
858
- fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(settings.join_input.join_mapping)
859
- self._add_code(f"{var_name} = fuzzy_match_dfs(\n"
860
- f" left_df={left_df}, right_df={right_df},\n"
861
- f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
862
- f" ).lazy()")
875
+ fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(fuzzy_match_handler.join_mapping)
876
+ self._add_code(
877
+ f"{var_name} = fuzzy_match_dfs(\n"
878
+ f" left_df={left_df}, right_df={right_df},\n"
879
+ f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
880
+ f" ).lazy()"
881
+ )
863
882
 
864
- def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: Dict[str, str]) -> None:
883
+ def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: dict[str, str]) -> None:
865
884
  """Handle unique/distinct nodes."""
866
- input_df = input_vars.get('main', 'df')
885
+ input_df = input_vars.get("main", "df")
867
886
 
868
887
  if settings.unique_input.columns:
869
- self._add_code(f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')")
888
+ self._add_code(
889
+ f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')"
890
+ )
870
891
  else:
871
892
  self._add_code(f"{var_name} = {input_df}.unique(keep='{settings.unique_input.strategy}')")
872
893
  self._add_code("")
873
894
 
874
- def _handle_text_to_rows(self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: Dict[str, str]) -> None:
895
+ def _handle_text_to_rows(
896
+ self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: dict[str, str]
897
+ ) -> None:
875
898
  """Handle text to rows (explode) nodes."""
876
- input_df = input_vars.get('main', 'df')
899
+ input_df = input_vars.get("main", "df")
877
900
  text_input = settings.text_to_rows_input
878
901
 
879
902
  # First split the column
@@ -886,96 +909,108 @@ class FlowGraphToPolarsConverter:
886
909
 
887
910
  self._add_code(f"{var_name} = {input_df}.with_columns({split_expr}).explode('{explode_col}')")
888
911
  self._add_code("")
912
+
889
913
  # .with_columns(
890
914
  # (pl.cum_count(record_id_settings.output_column_name)
891
915
  # .over(record_id_settings.group_by_columns) + record_id_settings.offset - 1)
892
916
  # .alias(record_id_settings.output_column_name)
893
917
  # )
894
- def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: Dict[str, str]) -> None:
918
+ def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: dict[str, str]) -> None:
895
919
  """Handle record ID nodes."""
896
- input_df = input_vars.get('main', 'df')
920
+ input_df = input_vars.get("main", "df")
897
921
  record_input = settings.record_id_input
898
922
  if record_input.group_by and record_input.group_by_columns:
899
-
900
923
  # Row number within groups
901
924
  self._add_code(f"{var_name} = ({input_df}")
902
925
  self._add_code(f" .with_columns(pl.lit(1).alias('{record_input.output_column_name}'))")
903
- self._add_code(f" .with_columns([")
904
- self._add_code(f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)")
926
+ self._add_code(" .with_columns([")
927
+ self._add_code(
928
+ f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)"
929
+ )
905
930
  self._add_code(f" .alias('{record_input.output_column_name}')")
906
931
  self._add_code("])")
907
- self._add_code(f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])")
932
+ self._add_code(
933
+ f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])"
934
+ )
908
935
  self._add_code(")")
909
936
  else:
910
937
  # Simple row number
911
- self._add_code(f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})")
938
+ self._add_code(
939
+ f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})"
940
+ )
912
941
  self._add_code("")
913
942
 
914
- def _handle_cross_join(self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: Dict[str, str]) -> None:
943
+ def _handle_cross_join(
944
+ self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: dict[str, str]
945
+ ) -> None:
915
946
  """Handle cross join nodes."""
916
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
917
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
947
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
948
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
918
949
 
919
950
  self._add_code(f"{var_name} = {left_df}.join({right_df}, how='cross')")
920
951
  self._add_code("")
921
952
 
922
- def _handle_cloud_storage_writer(self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: Dict[str, str]) -> None:
953
+ def _handle_cloud_storage_writer(
954
+ self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: dict[str, str]
955
+ ) -> None:
923
956
  """Handle cloud storage writer nodes."""
924
- input_df = input_vars.get('main', 'df')
957
+ input_df = input_vars.get("main", "df")
925
958
  # def write_csv_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: Optional[str] = None) -> 'FlowFrame': ...
926
959
 
927
960
  output_settings = settings.cloud_storage_settings
928
961
  self.imports.add("import flowfile as ff")
929
962
  self._add_code(f"(ff.FlowFrame({input_df})")
930
963
  if output_settings.file_format == "csv":
931
- self._add_code(f' .write_csv_to_cloud_storage(')
964
+ self._add_code(" .write_csv_to_cloud_storage(")
932
965
  self._add_code(f' path="{output_settings.resource_path}",')
933
966
  self._add_code(f' connection_name="{output_settings.connection_name}",')
934
967
  self._add_code(f' delimiter="{output_settings.csv_delimiter}",')
935
968
  self._add_code(f' encoding="{output_settings.csv_encoding}",')
936
969
  self._add_code(f' description="{settings.description}"')
937
970
  elif output_settings.file_format == "parquet":
938
- self._add_code(f' .write_parquet_to_cloud_storage(')
971
+ self._add_code(" .write_parquet_to_cloud_storage(")
939
972
  self._add_code(f' path="{output_settings.resource_path}",')
940
973
  self._add_code(f' connection_name="{output_settings.connection_name}",')
941
974
  self._add_code(f' description="{settings.description}"')
942
975
  elif output_settings.file_format == "json":
943
- self._add_code(f' .write_json_to_cloud_storage(')
976
+ self._add_code(" .write_json_to_cloud_storage(")
944
977
  self._add_code(f' path="{output_settings.resource_path}",')
945
978
  self._add_code(f' connection_name="{output_settings.connection_name}",')
946
979
  self._add_code(f' description="{settings.description}"')
947
980
  elif output_settings.file_format == "delta":
948
- self._add_code(f' .write_delta(')
981
+ self._add_code(" .write_delta(")
949
982
  self._add_code(f' path="{output_settings.resource_path}",')
950
983
  self._add_code(f' write_mode="{output_settings.write_mode}",')
951
984
  self._add_code(f' connection_name="{output_settings.connection_name}",')
952
985
  self._add_code(f' description="{settings.description}"')
953
- self._add_code(' )')
954
- self._add_code(')')
986
+ self._add_code(" )")
987
+ self._add_code(")")
955
988
 
956
- def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: Dict[str, str]) -> None:
989
+ def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: dict[str, str]) -> None:
957
990
  """Handle output nodes."""
958
- input_df = input_vars.get('main', 'df')
991
+ input_df = input_vars.get("main", "df")
959
992
  output_settings = settings.output_settings
960
993
 
961
- if output_settings.file_type == 'csv':
962
- self._add_code(f'{input_df}.sink_csv(')
994
+ if output_settings.file_type == "csv":
995
+ self._add_code(f"{input_df}.sink_csv(")
963
996
  self._add_code(f' "{output_settings.abs_file_path}",')
964
- self._add_code(f' separator="{output_settings.output_csv_table.delimiter}"')
965
- self._add_code(')')
997
+ self._add_code(f' separator="{output_settings.table_settings.delimiter}"')
998
+ self._add_code(")")
966
999
 
967
- elif output_settings.file_type == 'parquet':
1000
+ elif output_settings.file_type == "parquet":
968
1001
  self._add_code(f'{input_df}.sink_parquet("{output_settings.abs_file_path}")')
969
1002
 
970
- elif output_settings.file_type == 'excel':
971
- self._add_code(f'{input_df}.collect().write_excel(')
1003
+ elif output_settings.file_type == "excel":
1004
+ self._add_code(f"{input_df}.collect().write_excel(")
972
1005
  self._add_code(f' "{output_settings.abs_file_path}",')
973
- self._add_code(f' worksheet="{output_settings.output_excel_table.sheet_name}"')
974
- self._add_code(')')
1006
+ self._add_code(f' worksheet="{output_settings.table_settings.sheet_name}"')
1007
+ self._add_code(")")
975
1008
 
976
1009
  self._add_code("")
977
1010
 
978
- def _handle_polars_code(self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: Dict[str, str]) -> None:
1011
+ def _handle_polars_code(
1012
+ self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: dict[str, str]
1013
+ ) -> None:
979
1014
  """Handle custom Polars code nodes."""
980
1015
  code = settings.polars_code_input.polars_code.strip()
981
1016
  # Determine function parameters based on number of inputs
@@ -992,7 +1027,7 @@ class FlowGraphToPolarsConverter:
992
1027
  arg_list = []
993
1028
  i = 1
994
1029
  for key in sorted(input_vars.keys()):
995
- if key.startswith('main'):
1030
+ if key.startswith("main"):
996
1031
  param_list.append(f"input_df_{i}: pl.LazyFrame")
997
1032
  arg_list.append(input_vars[key])
998
1033
  i += 1
@@ -1003,7 +1038,7 @@ class FlowGraphToPolarsConverter:
1003
1038
  is_expression = "output_df" not in code
1004
1039
 
1005
1040
  # Wrap the code in a function
1006
- self._add_code(f"# Custom Polars code")
1041
+ self._add_code("# Custom Polars code")
1007
1042
  self._add_code(f"def _polars_code_{var_name.replace('df_', '')}({params}):")
1008
1043
 
1009
1044
  # Handle the code based on its structure
@@ -1012,18 +1047,18 @@ class FlowGraphToPolarsConverter:
1012
1047
  self._add_code(f" return {code}")
1013
1048
  else:
1014
1049
  # It contains assignments
1015
- for line in code.split('\n'):
1050
+ for line in code.split("\n"):
1016
1051
  if line.strip():
1017
1052
  self._add_code(f" {line}")
1018
1053
 
1019
1054
  # If no explicit return, try to detect what to return
1020
- if 'return' not in code:
1055
+ if "return" not in code:
1021
1056
  # Try to find the last assignment
1022
- lines = [l.strip() for l in code.split('\n') if l.strip() and '=' in l]
1057
+ lines = [l.strip() for l in code.split("\n") if l.strip() and "=" in l]
1023
1058
  if lines:
1024
1059
  last_assignment = lines[-1]
1025
- if '=' in last_assignment:
1026
- output_var = last_assignment.split('=')[0].strip()
1060
+ if "=" in last_assignment:
1061
+ output_var = last_assignment.split("=")[0].strip()
1027
1062
  self._add_code(f" return {output_var}")
1028
1063
 
1029
1064
  self._add_code("")
@@ -1056,14 +1091,7 @@ class FlowGraphToPolarsConverter:
1056
1091
  col, op, val = match.groups()
1057
1092
 
1058
1093
  # Map operators
1059
- op_map = {
1060
- '=': '==',
1061
- '!=': '!=',
1062
- '>': '>',
1063
- '<': '<',
1064
- '>=': '>=',
1065
- '<=': '<='
1066
- }
1094
+ op_map = {"=": "==", "!=": "!=", ">": ">", "<": "<", ">=": ">=", "<=": "<="}
1067
1095
 
1068
1096
  polars_op = op_map.get(op, op)
1069
1097
 
@@ -1077,45 +1105,129 @@ class FlowGraphToPolarsConverter:
1077
1105
  return re.sub(pattern, replace_expr, expr)
1078
1106
 
1079
1107
  def _create_basic_filter_expr(self, basic: transform_schema.BasicFilter) -> str:
1080
- """Create Polars expression from basic filter."""
1108
+ """Create Polars expression from basic filter.
1109
+
1110
+ Generates proper Polars code for all supported filter operators.
1111
+
1112
+ Args:
1113
+ basic: The BasicFilter configuration.
1114
+
1115
+ Returns:
1116
+ A string containing valid Polars filter expression code.
1117
+ """
1118
+ from flowfile_core.schemas.transform_schema import FilterOperator
1119
+
1081
1120
  col = f'pl.col("{basic.field}")'
1121
+ value = basic.value
1122
+ value2 = basic.value2
1123
+
1124
+ # Determine if value is numeric (for proper quoting)
1125
+ is_numeric = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
1126
+
1127
+ # Get the operator
1128
+ try:
1129
+ operator = basic.get_operator()
1130
+ except (ValueError, AttributeError):
1131
+ operator = FilterOperator.from_symbol(str(basic.operator))
1132
+
1133
+ # Generate expression based on operator
1134
+ if operator == FilterOperator.EQUALS:
1135
+ if is_numeric:
1136
+ return f"{col} == {value}"
1137
+ return f'{col} == "{value}"'
1138
+
1139
+ elif operator == FilterOperator.NOT_EQUALS:
1140
+ if is_numeric:
1141
+ return f"{col} != {value}"
1142
+ return f'{col} != "{value}"'
1143
+
1144
+ elif operator == FilterOperator.GREATER_THAN:
1145
+ if is_numeric:
1146
+ return f"{col} > {value}"
1147
+ return f'{col} > "{value}"'
1148
+
1149
+ elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
1150
+ if is_numeric:
1151
+ return f"{col} >= {value}"
1152
+ return f'{col} >= "{value}"'
1153
+
1154
+ elif operator == FilterOperator.LESS_THAN:
1155
+ if is_numeric:
1156
+ return f"{col} < {value}"
1157
+ return f'{col} < "{value}"'
1158
+
1159
+ elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
1160
+ if is_numeric:
1161
+ return f"{col} <= {value}"
1162
+ return f'{col} <= "{value}"'
1163
+
1164
+ elif operator == FilterOperator.CONTAINS:
1165
+ return f'{col}.str.contains("{value}")'
1166
+
1167
+ elif operator == FilterOperator.NOT_CONTAINS:
1168
+ return f'{col}.str.contains("{value}").not_()'
1169
+
1170
+ elif operator == FilterOperator.STARTS_WITH:
1171
+ return f'{col}.str.starts_with("{value}")'
1172
+
1173
+ elif operator == FilterOperator.ENDS_WITH:
1174
+ return f'{col}.str.ends_with("{value}")'
1175
+
1176
+ elif operator == FilterOperator.IS_NULL:
1177
+ return f"{col}.is_null()"
1178
+
1179
+ elif operator == FilterOperator.IS_NOT_NULL:
1180
+ return f"{col}.is_not_null()"
1181
+
1182
+ elif operator == FilterOperator.IN:
1183
+ values = [v.strip() for v in value.split(",")]
1184
+ if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
1185
+ values_str = ", ".join(values)
1186
+ else:
1187
+ values_str = ", ".join(f'"{v}"' for v in values)
1188
+ return f"{col}.is_in([{values_str}])"
1189
+
1190
+ elif operator == FilterOperator.NOT_IN:
1191
+ values = [v.strip() for v in value.split(",")]
1192
+ if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
1193
+ values_str = ", ".join(values)
1194
+ else:
1195
+ values_str = ", ".join(f'"{v}"' for v in values)
1196
+ return f"{col}.is_in([{values_str}]).not_()"
1197
+
1198
+ elif operator == FilterOperator.BETWEEN:
1199
+ if value2 is None:
1200
+ return f"{col} # BETWEEN requires two values"
1201
+ if is_numeric and value2.replace(".", "", 1).replace("-", "", 1).isnumeric():
1202
+ return f"({col} >= {value}) & ({col} <= {value2})"
1203
+ return f'({col} >= "{value}") & ({col} <= "{value2}")'
1082
1204
 
1083
- if basic.filter_type == 'equals':
1084
- return f'{col} == "{basic.filter_value}"'
1085
- elif basic.filter_type == 'not_equals':
1086
- return f'{col} != "{basic.filter_value}"'
1087
- elif basic.filter_type == 'greater':
1088
- return f'{col} > {basic.filter_value}'
1089
- elif basic.filter_type == 'less':
1090
- return f'{col} < {basic.filter_value}'
1091
- elif basic.filter_type == 'in':
1092
- values = basic.filter_value.split(',')
1093
- return f"pl.col('{col}').is_in({values})"
1205
+ # Fallback
1094
1206
  return col
1095
1207
 
1096
1208
  def _get_polars_dtype(self, dtype_str: str) -> str:
1097
1209
  """Convert Flowfile dtype string to Polars dtype."""
1098
1210
  dtype_map = {
1099
- 'String': 'pl.Utf8',
1100
- 'Integer': 'pl.Int64',
1101
- 'Double': 'pl.Float64',
1102
- 'Boolean': 'pl.Boolean',
1103
- 'Date': 'pl.Date',
1104
- 'Datetime': 'pl.Datetime',
1105
- 'Float32': 'pl.Float32',
1106
- 'Float64': 'pl.Float64',
1107
- 'Int32': 'pl.Int32',
1108
- 'Int64': 'pl.Int64',
1109
- 'Utf8': 'pl.Utf8',
1211
+ "String": "pl.Utf8",
1212
+ "Integer": "pl.Int64",
1213
+ "Double": "pl.Float64",
1214
+ "Boolean": "pl.Boolean",
1215
+ "Date": "pl.Date",
1216
+ "Datetime": "pl.Datetime",
1217
+ "Float32": "pl.Float32",
1218
+ "Float64": "pl.Float64",
1219
+ "Int32": "pl.Int32",
1220
+ "Int64": "pl.Int64",
1221
+ "Utf8": "pl.Utf8",
1110
1222
  }
1111
- return dtype_map.get(dtype_str, 'pl.Utf8')
1223
+ return dtype_map.get(dtype_str, "pl.Utf8")
1112
1224
 
1113
1225
  def _get_agg_function(self, agg: str) -> str:
1114
1226
  """Get Polars aggregation function name."""
1115
1227
  agg_map = {
1116
- 'avg': 'mean',
1117
- 'average': 'mean',
1118
- 'concat': 'str.concat',
1228
+ "avg": "mean",
1229
+ "average": "mean",
1230
+ "concat": "str.concat",
1119
1231
  }
1120
1232
  return agg_map.get(agg, agg)
1121
1233
 
@@ -1128,12 +1240,12 @@ class FlowGraphToPolarsConverter:
1128
1240
  import re
1129
1241
 
1130
1242
  # Pattern for column names (simplified)
1131
- col_pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
1243
+ col_pattern = r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b"
1132
1244
 
1133
1245
  def replace_col(match):
1134
1246
  col_name = match.group(1)
1135
1247
  # Skip SQL keywords
1136
- keywords = {'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'AND', 'OR', 'NOT', 'IN', 'AS'}
1248
+ keywords = {"CASE", "WHEN", "THEN", "ELSE", "END", "AND", "OR", "NOT", "IN", "AS"}
1137
1249
  if col_name.upper() in keywords:
1138
1250
  return col_name
1139
1251
  return f'pl.col("{col_name}")'
@@ -1141,13 +1253,13 @@ class FlowGraphToPolarsConverter:
1141
1253
  result = re.sub(col_pattern, replace_col, sql_expr)
1142
1254
 
1143
1255
  # Handle CASE WHEN
1144
- if 'CASE' in result:
1256
+ if "CASE" in result:
1145
1257
  # This would need proper parsing
1146
1258
  result = "pl.when(...).then(...).otherwise(...)"
1147
1259
 
1148
1260
  return result
1149
1261
 
1150
- def add_return_code(self, lines: List[str]) -> None:
1262
+ def add_return_code(self, lines: list[str]) -> None:
1151
1263
  if self.output_nodes:
1152
1264
  # Return marked output nodes
1153
1265
  if len(self.output_nodes) == 1:
@@ -1177,8 +1289,8 @@ class FlowGraphToPolarsConverter:
1177
1289
  # Add main function
1178
1290
  lines.append("def run_etl_pipeline():")
1179
1291
  lines.append(' """')
1180
- lines.append(f' ETL Pipeline: {self.flow_graph.__name__}')
1181
- lines.append(' Generated from Flowfile')
1292
+ lines.append(f" ETL Pipeline: {self.flow_graph.__name__}")
1293
+ lines.append(" Generated from Flowfile")
1182
1294
  lines.append(' """')
1183
1295
  lines.append(" ")
1184
1296