Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +194 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  8. flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
  9. flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  12. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
  13. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
  14. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  15. flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
  16. flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
  17. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  18. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
  19. flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
  20. flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
  21. flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
  22. flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
  26. flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
  31. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
  32. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  33. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
  34. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
  43. flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
  44. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
  51. flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
  52. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
  55. flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
  62. flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
  65. flowfile/web/static/assets/Output-692dd25d.css +37 -0
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  71. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
  75. flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
  78. flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
  81. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
  82. flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  85. flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  90. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
  92. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
  93. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  94. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
  95. flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
  96. flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
  97. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
  98. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
  99. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
  100. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  101. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
  102. flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
  103. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
  104. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
  105. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  106. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
  107. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
  108. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
  109. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
  110. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
  111. flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
  112. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  113. flowfile/web/static/assets/Unique-2b705521.css +3 -0
  114. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
  115. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
  116. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
  117. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
  118. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  119. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  120. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
  121. flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
  122. flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
  123. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
  124. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
  125. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
  126. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
  127. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
  128. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  129. flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
  130. flowfile/web/static/assets/index-6b367bb5.js +38 -0
  131. flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
  132. flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
  133. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  134. flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
  135. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
  136. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  137. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
  138. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  139. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  140. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
  141. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  142. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
  143. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  144. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
  145. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
  146. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  147. flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
  148. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
  149. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  150. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
  151. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
  152. flowfile/web/static/index.html +2 -2
  153. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
  154. flowfile-0.5.4.dist-info/RECORD +407 -0
  155. flowfile_core/__init__.py +13 -6
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +64 -19
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +145 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/__init__.py +11 -0
  177. flowfile_core/flowfile/code_generator/code_generator.py +706 -247
  178. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  179. flowfile_core/flowfile/connection_manager/models.py +1 -1
  180. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  181. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  182. flowfile_core/flowfile/extensions.py +17 -12
  183. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  184. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  185. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  190. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  191. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  192. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  193. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  194. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  195. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  196. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  197. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  198. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  201. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  202. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  203. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  204. flowfile_core/flowfile/flow_graph.py +920 -571
  205. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  206. flowfile_core/flowfile/flow_node/flow_node.py +379 -258
  207. flowfile_core/flowfile/flow_node/models.py +53 -41
  208. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  209. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  210. flowfile_core/flowfile/handler.py +80 -30
  211. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  212. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  213. flowfile_core/flowfile/node_designer/__init__.py +19 -13
  214. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  215. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  216. flowfile_core/flowfile/node_designer/ui_components.py +278 -34
  217. flowfile_core/flowfile/schema_callbacks.py +71 -51
  218. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  219. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  220. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  221. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  223. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  224. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  225. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  226. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  227. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  228. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  229. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  230. flowfile_core/flowfile/util/node_skipper.py +4 -4
  231. flowfile_core/flowfile/utils.py +19 -21
  232. flowfile_core/main.py +26 -19
  233. flowfile_core/routes/auth.py +284 -11
  234. flowfile_core/routes/cloud_connections.py +25 -25
  235. flowfile_core/routes/logs.py +21 -29
  236. flowfile_core/routes/public.py +46 -4
  237. flowfile_core/routes/routes.py +70 -34
  238. flowfile_core/routes/secrets.py +25 -27
  239. flowfile_core/routes/user_defined_components.py +483 -4
  240. flowfile_core/run_lock.py +0 -1
  241. flowfile_core/schemas/__init__.py +4 -6
  242. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  243. flowfile_core/schemas/cloud_storage_schemas.py +96 -66
  244. flowfile_core/schemas/input_schema.py +231 -144
  245. flowfile_core/schemas/output_model.py +49 -34
  246. flowfile_core/schemas/schemas.py +116 -89
  247. flowfile_core/schemas/transform_schema.py +518 -263
  248. flowfile_core/schemas/yaml_types.py +21 -7
  249. flowfile_core/secret_manager/secret_manager.py +123 -18
  250. flowfile_core/types.py +29 -9
  251. flowfile_core/utils/arrow_reader.py +7 -6
  252. flowfile_core/utils/excel_file_manager.py +3 -3
  253. flowfile_core/utils/fileManager.py +7 -7
  254. flowfile_core/utils/fl_executor.py +8 -10
  255. flowfile_core/utils/utils.py +4 -4
  256. flowfile_core/utils/validate_setup.py +5 -4
  257. flowfile_frame/__init__.py +117 -51
  258. flowfile_frame/adapters.py +2 -9
  259. flowfile_frame/adding_expr.py +73 -32
  260. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  261. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  262. flowfile_frame/config.py +2 -5
  263. flowfile_frame/database/__init__.py +36 -0
  264. flowfile_frame/database/connection_manager.py +205 -0
  265. flowfile_frame/database/frame_helpers.py +249 -0
  266. flowfile_frame/expr.py +311 -218
  267. flowfile_frame/expr.pyi +160 -159
  268. flowfile_frame/expr_name.py +23 -23
  269. flowfile_frame/flow_frame.py +571 -476
  270. flowfile_frame/flow_frame.pyi +123 -104
  271. flowfile_frame/flow_frame_methods.py +227 -246
  272. flowfile_frame/group_frame.py +50 -20
  273. flowfile_frame/join.py +2 -2
  274. flowfile_frame/lazy.py +129 -87
  275. flowfile_frame/lazy_methods.py +83 -30
  276. flowfile_frame/list_name_space.py +55 -50
  277. flowfile_frame/selectors.py +148 -68
  278. flowfile_frame/series.py +9 -7
  279. flowfile_frame/utils.py +19 -21
  280. flowfile_worker/__init__.py +12 -7
  281. flowfile_worker/configs.py +41 -33
  282. flowfile_worker/create/__init__.py +14 -9
  283. flowfile_worker/create/funcs.py +114 -77
  284. flowfile_worker/create/models.py +46 -43
  285. flowfile_worker/create/pl_types.py +14 -15
  286. flowfile_worker/create/read_excel_tables.py +34 -41
  287. flowfile_worker/create/utils.py +22 -19
  288. flowfile_worker/external_sources/s3_source/main.py +18 -51
  289. flowfile_worker/external_sources/s3_source/models.py +34 -27
  290. flowfile_worker/external_sources/sql_source/main.py +8 -5
  291. flowfile_worker/external_sources/sql_source/models.py +13 -9
  292. flowfile_worker/flow_logger.py +10 -8
  293. flowfile_worker/funcs.py +214 -155
  294. flowfile_worker/main.py +11 -17
  295. flowfile_worker/models.py +35 -28
  296. flowfile_worker/process_manager.py +2 -3
  297. flowfile_worker/routes.py +121 -90
  298. flowfile_worker/secrets.py +114 -21
  299. flowfile_worker/spawner.py +89 -54
  300. flowfile_worker/utils.py +3 -2
  301. shared/__init__.py +2 -7
  302. shared/storage_config.py +25 -13
  303. test_utils/postgres/commands.py +3 -2
  304. test_utils/postgres/fixtures.py +9 -9
  305. test_utils/s3/commands.py +1 -1
  306. test_utils/s3/data_generator.py +3 -4
  307. test_utils/s3/demo_data_generator.py +4 -7
  308. test_utils/s3/fixtures.py +7 -5
  309. tools/migrate/__init__.py +1 -1
  310. tools/migrate/__main__.py +16 -29
  311. tools/migrate/legacy_schemas.py +251 -190
  312. tools/migrate/migrate.py +193 -181
  313. tools/migrate/tests/conftest.py +1 -3
  314. tools/migrate/tests/test_migrate.py +36 -41
  315. tools/migrate/tests/test_migration_e2e.py +28 -29
  316. tools/migrate/tests/test_node_migrations.py +50 -20
  317. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  318. flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
  319. flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
  320. flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
  321. flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
  322. flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
  323. flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
  324. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  325. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  326. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  327. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  328. flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
  329. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  330. flowfile/web/static/assets/Output-283fe388.css +0 -37
  331. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  332. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  333. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
  334. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  335. flowfile/web/static/assets/Sort-3643d625.css +0 -51
  336. flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
  337. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  338. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  339. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  340. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  341. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  342. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  343. flowfile-0.5.1.dist-info/RECORD +0 -388
  344. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
  345. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
  346. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,15 +1,28 @@
1
- from typing import List, Dict, Optional, Set, Tuple
1
+ import inspect
2
+ import typing
2
3
  import polars as pl
3
-
4
4
  from pl_fuzzy_frame_match.models import FuzzyMapping
5
5
 
6
- from flowfile_core.flowfile.flow_graph import FlowGraph
6
+ from flowfile_core.configs import logger
7
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
7
8
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
8
9
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
10
+ from flowfile_core.flowfile.flow_graph import FlowGraph
9
11
  from flowfile_core.flowfile.flow_node.flow_node import FlowNode
10
12
  from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
11
13
  from flowfile_core.schemas import input_schema, transform_schema
12
- from flowfile_core.configs import logger
14
+
15
+
16
+ class UnsupportedNodeError(Exception):
17
+ """Raised when code generation encounters a node type that cannot be converted to standalone code."""
18
+
19
+ def __init__(self, node_type: str, node_id: int, reason: str):
20
+ self.node_type = node_type
21
+ self.node_id = node_id
22
+ self.reason = reason
23
+ super().__init__(
24
+ f"Cannot generate code for node '{node_type}' (node_id={node_id}): {reason}"
25
+ )
13
26
 
14
27
 
15
28
  class FlowGraphToPolarsConverter:
@@ -19,20 +32,25 @@ class FlowGraphToPolarsConverter:
19
32
  This class takes a FlowGraph instance and generates standalone Python code
20
33
  that uses only Polars, without any Flowfile dependencies.
21
34
  """
35
+
22
36
  flow_graph: FlowGraph
23
- node_var_mapping: Dict[int, str]
24
- imports: Set[str]
25
- code_lines: List[str]
26
- output_nodes: List[Tuple[int, str]] = []
27
- last_node_var: Optional[str] = None
37
+ node_var_mapping: dict[int, str]
38
+ imports: set[str]
39
+ code_lines: list[str]
40
+ output_nodes: list[tuple[int, str]] = []
41
+ last_node_var: str | None = None
42
+ unsupported_nodes: list[tuple[int, str, str]] # List of (node_id, node_type, reason)
43
+ custom_node_classes: dict[str, str] # Maps custom node class name to source code
28
44
 
29
45
  def __init__(self, flow_graph: FlowGraph):
30
46
  self.flow_graph = flow_graph
31
- self.node_var_mapping: Dict[int, str] = {} # Maps node_id to variable name
32
- self.imports: Set[str] = {"import polars as pl"}
33
- self.code_lines: List[str] = []
47
+ self.node_var_mapping: dict[int, str] = {} # Maps node_id to variable name
48
+ self.imports: set[str] = {"import polars as pl"}
49
+ self.code_lines: list[str] = []
34
50
  self.output_nodes = []
35
51
  self.last_node_var = None
52
+ self.unsupported_nodes = []
53
+ self.custom_node_classes = {}
36
54
 
37
55
  def convert(self) -> str:
38
56
  """
@@ -40,23 +58,41 @@ class FlowGraphToPolarsConverter:
40
58
 
41
59
  Returns:
42
60
  str: Complete Python code that can be executed standalone
61
+
62
+ Raises:
63
+ UnsupportedNodeError: If the graph contains nodes that cannot be converted
64
+ to standalone code (e.g., database nodes, explore_data, external_source).
43
65
  """
44
66
  # Get execution order
45
67
  execution_order = determine_execution_order(
46
68
  all_nodes=[node for node in self.flow_graph.nodes if node.is_correct],
47
- flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes()
69
+ flow_starts=self.flow_graph._flow_starts + self.flow_graph.get_implicit_starter_nodes(),
48
70
  )
49
71
 
50
72
  # Generate code for each node in order
51
73
  for node in execution_order:
52
74
  self._generate_node_code(node)
53
75
 
76
+ # Check for unsupported nodes and raise an error with all of them listed
77
+ if self.unsupported_nodes:
78
+ error_messages = []
79
+ for node_id, node_type, reason in self.unsupported_nodes:
80
+ error_messages.append(f" - Node {node_id} ({node_type}): {reason}")
81
+ raise UnsupportedNodeError(
82
+ node_type=self.unsupported_nodes[0][1],
83
+ node_id=self.unsupported_nodes[0][0],
84
+ reason=(
85
+ f"The flow contains {len(self.unsupported_nodes)} node(s) that cannot be converted to code:\n"
86
+ + "\n".join(error_messages)
87
+ ),
88
+ )
89
+
54
90
  # Combine everything
55
91
  return self._build_final_code()
56
92
 
57
93
  def handle_output_node(self, node: FlowNode, var_name: str) -> None:
58
94
  settings = node.setting_input
59
- if hasattr(settings, 'is_flow_output') and settings.is_flow_output:
95
+ if hasattr(settings, "is_flow_output") and settings.is_flow_output:
60
96
  self.output_nodes.append((node.node_id, var_name))
61
97
 
62
98
  def _generate_node_code(self, node: FlowNode) -> None:
@@ -74,75 +110,79 @@ class FlowGraphToPolarsConverter:
74
110
  self.last_node_var = var_name
75
111
  # Get input variable names
76
112
  input_vars = self._get_input_vars(node)
113
+
114
+ # Check if this is a user-defined node
115
+ if isinstance(settings, input_schema.UserDefinedNode) or getattr(settings, "is_user_defined", False):
116
+ self._handle_user_defined(node, var_name, input_vars)
117
+ return
118
+
77
119
  # Route to appropriate handler based on node type
78
120
  handler = getattr(self, f"_handle_{node_type}", None)
79
121
  if handler:
80
122
  handler(settings, var_name, input_vars)
81
123
  else:
82
- self._add_comment(f"# TODO: Implement handler for node type: {node_type}")
83
- raise Exception(f"No handler implemented for node type: {node_type}")
84
-
85
- def _get_input_vars(self, node: FlowNode) -> Dict[str, str]:
124
+ # Unknown node type - add to unsupported list
125
+ self.unsupported_nodes.append((
126
+ node.node_id,
127
+ node_type,
128
+ f"No code generator implemented for node type '{node_type}'"
129
+ ))
130
+ self._add_comment(f"# WARNING: Cannot generate code for node type '{node_type}' (node_id={node.node_id})")
131
+ self._add_comment(f"# This node type is not supported for code export")
132
+
133
+ def _get_input_vars(self, node: FlowNode) -> dict[str, str]:
86
134
  """Get input variable names for a node."""
87
135
  input_vars = {}
88
136
 
89
137
  if node.node_inputs.main_inputs:
90
138
  if len(node.node_inputs.main_inputs) == 1:
91
- input_vars['main'] = self.node_var_mapping.get(
92
- node.node_inputs.main_inputs[0].node_id, 'df'
93
- )
139
+ input_vars["main"] = self.node_var_mapping.get(node.node_inputs.main_inputs[0].node_id, "df")
94
140
  else:
95
141
  for i, input_node in enumerate(node.node_inputs.main_inputs):
96
- input_vars[f'main_{i}'] = self.node_var_mapping.get(
97
- input_node.node_id, f'df_{i}'
98
- )
142
+ input_vars[f"main_{i}"] = self.node_var_mapping.get(input_node.node_id, f"df_{i}")
99
143
 
100
144
  if node.node_inputs.left_input:
101
- input_vars['left'] = self.node_var_mapping.get(
102
- node.node_inputs.left_input.node_id, 'df_left'
103
- )
145
+ input_vars["left"] = self.node_var_mapping.get(node.node_inputs.left_input.node_id, "df_left")
104
146
 
105
147
  if node.node_inputs.right_input:
106
- input_vars['right'] = self.node_var_mapping.get(
107
- node.node_inputs.right_input.node_id, 'df_right'
108
- )
148
+ input_vars["right"] = self.node_var_mapping.get(node.node_inputs.right_input.node_id, "df_right")
109
149
 
110
150
  return input_vars
111
151
 
112
152
  def _handle_csv_read(self, file_settings: input_schema.ReceivedTable, var_name: str):
113
- if file_settings.table_settings.encoding.lower() in ('utf-8', 'utf8'):
153
+ if file_settings.table_settings.encoding.lower() in ("utf-8", "utf8"):
114
154
  encoding = "utf8-lossy"
115
155
  self._add_code(f"{var_name} = pl.scan_csv(")
116
156
  self._add_code(f' "{file_settings.abs_file_path}",')
117
157
  self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
118
- self._add_code(f' has_header={file_settings.table_settings.has_headers},')
119
- self._add_code(f' ignore_errors={file_settings.table_settings.ignore_errors},')
158
+ self._add_code(f" has_header={file_settings.table_settings.has_headers},")
159
+ self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
120
160
  self._add_code(f' encoding="{encoding}",')
121
- self._add_code(f' skip_rows={file_settings.table_settings.starting_from_line},')
161
+ self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
122
162
  self._add_code(")")
123
163
  else:
124
164
  self._add_code(f"{var_name} = pl.read_csv(")
125
165
  self._add_code(f' "{file_settings.abs_file_path}",')
126
166
  self._add_code(f' separator="{file_settings.table_settings.delimiter}",')
127
- self._add_code(f' has_header={file_settings.table_settings.has_headers},')
128
- self._add_code(f' ignore_errors={file_settings.table_settings.ignore_errors},')
167
+ self._add_code(f" has_header={file_settings.table_settings.has_headers},")
168
+ self._add_code(f" ignore_errors={file_settings.table_settings.ignore_errors},")
129
169
  if file_settings.table_settings.encoding:
130
170
  self._add_code(f' encoding="{file_settings.table_settings.encoding}",')
131
- self._add_code(f' skip_rows={file_settings.table_settings.starting_from_line},')
171
+ self._add_code(f" skip_rows={file_settings.table_settings.starting_from_line},")
132
172
  self._add_code(").lazy()")
133
173
 
134
- def _handle_cloud_storage_reader(self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: Dict[str, str]):
174
+ def _handle_cloud_storage_reader(
175
+ self, settings: input_schema.NodeCloudStorageReader, var_name: str, input_vars: dict[str, str]
176
+ ):
135
177
  cloud_read_settings = settings.cloud_storage_settings
136
- self.imports.add(
137
- "import flowfile as ff"
138
- )
178
+ self.imports.add("import flowfile as ff")
139
179
  if cloud_read_settings.file_format == "csv":
140
180
  self._add_code(f"{var_name} = ff.scan_csv_from_cloud_storage(")
141
181
  self._add_code(f' "{cloud_read_settings.resource_path}",')
142
182
  self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
143
183
  self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
144
184
  self._add_code(f' delimiter="{cloud_read_settings.csv_delimiter}",')
145
- self._add_code(f' has_header={cloud_read_settings.csv_has_header},')
185
+ self._add_code(f" has_header={cloud_read_settings.csv_has_header},")
146
186
  self._add_code(f' encoding="{cloud_read_settings.csv_encoding}",')
147
187
 
148
188
  elif cloud_read_settings.file_format == "parquet":
@@ -162,22 +202,22 @@ class FlowGraphToPolarsConverter:
162
202
  self._add_code(f' "{cloud_read_settings.resource_path}",')
163
203
  self._add_code(f' connection_name="{cloud_read_settings.connection_name}",')
164
204
  self._add_code(f' scan_mode="{cloud_read_settings.scan_mode}",')
165
- self._add_code(f' version_id={cloud_read_settings.delta_version},')
205
+ self._add_code(f" version_id={cloud_read_settings.delta_version},")
166
206
  else:
167
207
  return
168
208
  self._add_code(").data")
169
209
 
170
- def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: Dict[str, str]) -> None:
210
+ def _handle_read(self, settings: input_schema.NodeRead, var_name: str, input_vars: dict[str, str]) -> None:
171
211
  """Handle file reading nodes."""
172
212
  file_settings = settings.received_file
173
213
 
174
- if file_settings.file_type == 'csv':
214
+ if file_settings.file_type == "csv":
175
215
  self._handle_csv_read(file_settings, var_name)
176
216
 
177
- elif file_settings.file_type == 'parquet':
217
+ elif file_settings.file_type == "parquet":
178
218
  self._add_code(f'{var_name} = pl.scan_parquet("{file_settings.abs_file_path}")')
179
219
 
180
- elif file_settings.file_type in ('xlsx', 'excel'):
220
+ elif file_settings.file_type in ("xlsx", "excel"):
181
221
  self._add_code(f"{var_name} = pl.read_excel(")
182
222
  self._add_code(f' "{file_settings.abs_file_path}",')
183
223
  if file_settings.table_settings.sheet_name:
@@ -187,12 +227,18 @@ class FlowGraphToPolarsConverter:
187
227
  self._add_code("")
188
228
 
189
229
  @staticmethod
190
- def _generate_pl_schema_with_typing(flowfile_schema: List[FlowfileColumn]) -> str:
191
- polars_schema_str = "pl.Schema([" + ", ".join(f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
192
- for flowfile_column in flowfile_schema) + "])"
230
+ def _generate_pl_schema_with_typing(flowfile_schema: list[FlowfileColumn]) -> str:
231
+ polars_schema_str = (
232
+ "pl.Schema(["
233
+ + ", ".join(
234
+ f'("{flowfile_column.column_name}", pl.{flowfile_column.data_type})'
235
+ for flowfile_column in flowfile_schema
236
+ )
237
+ + "])"
238
+ )
193
239
  return polars_schema_str
194
240
 
195
- def get_manual_schema_input(self, flowfile_schema: List[FlowfileColumn]) -> str:
241
+ def get_manual_schema_input(self, flowfile_schema: list[FlowfileColumn]) -> str:
196
242
  polars_schema_str = self._generate_pl_schema_with_typing(flowfile_schema)
197
243
  is_valid_pl_schema = self._validate_pl_schema(polars_schema_str)
198
244
  if is_valid_pl_schema:
@@ -210,19 +256,23 @@ class FlowGraphToPolarsConverter:
210
256
  logger.error(f"Invalid Polars schema: {e}")
211
257
  return False
212
258
 
213
- def _handle_manual_input(self, settings: input_schema.NodeManualInput, var_name: str, input_vars: Dict[str, str]) -> None:
259
+ def _handle_manual_input(
260
+ self, settings: input_schema.NodeManualInput, var_name: str, input_vars: dict[str, str]
261
+ ) -> None:
214
262
  """Handle manual data input nodes."""
215
263
  data = settings.raw_data_format.data
216
- flowfile_schema = list(FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns)
264
+ flowfile_schema = list(
265
+ FlowfileColumn.create_from_minimal_field_info(c) for c in settings.raw_data_format.columns
266
+ )
217
267
  schema = self.get_manual_schema_input(flowfile_schema)
218
268
  self._add_code(f"{var_name} = pl.LazyFrame({data}, schema={schema}, strict=False)")
219
269
  self._add_code("")
220
270
 
221
- def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: Dict[str, str]) -> None:
271
+ def _handle_filter(self, settings: input_schema.NodeFilter, var_name: str, input_vars: dict[str, str]) -> None:
222
272
  """Handle filter nodes."""
223
- input_df = input_vars.get('main', 'df')
273
+ input_df = input_vars.get("main", "df")
224
274
 
225
- if settings.filter_input.filter_type == 'advanced':
275
+ if settings.filter_input.is_advanced():
226
276
  # Parse the advanced filter expression
227
277
  self.imports.add(
228
278
  "from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr"
@@ -233,28 +283,33 @@ class FlowGraphToPolarsConverter:
233
283
  else:
234
284
  # Handle basic filter
235
285
  basic = settings.filter_input.basic_filter
236
- filter_expr = self._create_basic_filter_expr(basic)
237
- self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
286
+ if basic is not None:
287
+ filter_expr = self._create_basic_filter_expr(basic)
288
+ self._add_code(f"{var_name} = {input_df}.filter({filter_expr})")
289
+ else:
290
+ self._add_code(f"{var_name} = {input_df} # No filter applied")
238
291
  self._add_code("")
239
292
 
240
- def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: Dict[str, str]):
241
- input_df = input_vars.get('main', 'df')
293
+ def _handle_record_count(self, settings: input_schema.NodeRecordCount, var_name: str, input_vars: dict[str, str]):
294
+ input_df = input_vars.get("main", "df")
242
295
  self._add_code(f"{var_name} = {input_df}.select(pl.len().alias('number_of_records'))")
243
296
 
244
- def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: Dict[str, str]):
245
- input_df = input_vars.get('main', 'df')
297
+ def _handle_graph_solver(self, settings: input_schema.NodeGraphSolver, var_name: str, input_vars: dict[str, str]):
298
+ input_df = input_vars.get("main", "df")
246
299
  from_col_name = settings.graph_solver_input.col_from
247
300
  to_col_name = settings.graph_solver_input.col_to
248
301
  output_col_name = settings.graph_solver_input.output_column_name
249
- self._add_code(f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
250
- f'pl.col("{to_col_name}"))'
251
- f'.alias("{output_col_name}"))')
302
+ self._add_code(
303
+ f'{var_name} = {input_df}.with_columns(graph_solver(pl.col("{from_col_name}"), '
304
+ f'pl.col("{to_col_name}"))'
305
+ f'.alias("{output_col_name}"))'
306
+ )
252
307
  self._add_code("")
253
308
  self.imports.add("from polars_grouper import graph_solver")
254
309
 
255
- def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: Dict[str, str]) -> None:
310
+ def _handle_select(self, settings: input_schema.NodeSelect, var_name: str, input_vars: dict[str, str]) -> None:
256
311
  """Handle select/rename nodes."""
257
- input_df = input_vars.get('main', 'df')
312
+ input_df = input_vars.get("main", "df")
258
313
  # Get columns to keep and renames
259
314
  select_exprs = []
260
315
  for select_input in settings.select_input:
@@ -266,7 +321,7 @@ class FlowGraphToPolarsConverter:
266
321
 
267
322
  if (select_input.data_type_change or select_input.is_altered) and select_input.data_type:
268
323
  polars_dtype = self._get_polars_dtype(select_input.data_type)
269
- expr = f'{expr}.cast({polars_dtype})'
324
+ expr = f"{expr}.cast({polars_dtype})"
270
325
 
271
326
  select_exprs.append(expr)
272
327
 
@@ -279,7 +334,7 @@ class FlowGraphToPolarsConverter:
279
334
  self._add_code(f"{var_name} = {input_df}")
280
335
  self._add_code("")
281
336
 
282
- def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: Dict[str, str]) -> None:
337
+ def _handle_join(self, settings: input_schema.NodeJoin, var_name: str, input_vars: dict[str, str]) -> None:
283
338
  """Handle join nodes by routing to appropriate join type handler.
284
339
 
285
340
  This is the main entry point for processing join operations. It determines
@@ -293,8 +348,8 @@ class FlowGraphToPolarsConverter:
293
348
  Returns:
294
349
  None: Modifies internal state by adding generated code
295
350
  """
296
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
297
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
351
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
352
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
298
353
  # Ensure left and right DataFrames are distinct
299
354
  if left_df == right_df:
300
355
  right_df = "df_right"
@@ -305,8 +360,9 @@ class FlowGraphToPolarsConverter:
305
360
  else:
306
361
  self._handle_standard_join(settings, var_name, left_df, right_df)
307
362
 
308
- def _handle_semi_anti_join(self, settings: input_schema.NodeJoin, var_name: str, left_df: str,
309
- right_df: str) -> None:
363
+ def _handle_semi_anti_join(
364
+ self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
365
+ ) -> None:
310
366
  """Handle semi and anti joins which only return rows from the left DataFrame.
311
367
 
312
368
  Semi joins return rows from left DataFrame that have matches in right.
@@ -333,8 +389,9 @@ class FlowGraphToPolarsConverter:
333
389
  self._add_code(" )")
334
390
  self._add_code(")")
335
391
 
336
- def _handle_standard_join(self, settings: input_schema.NodeJoin, var_name: str, left_df: str,
337
- right_df: str) -> None:
392
+ def _handle_standard_join(
393
+ self, settings: input_schema.NodeJoin, var_name: str, left_df: str, right_df: str
394
+ ) -> None:
338
395
  """Handle standard joins (left, right, inner, outer) with full column management.
339
396
 
340
397
  Standard joins may include columns from both DataFrames and require careful
@@ -370,12 +427,11 @@ class FlowGraphToPolarsConverter:
370
427
  )
371
428
  # Execute the join
372
429
  self._execute_join_with_post_processing(
373
- settings, var_name, left_df, right_df, left_on, right_on,
374
- after_join_drop_cols, reverse_action
430
+ settings, var_name, left_df, right_df, left_on, right_on, after_join_drop_cols, reverse_action
375
431
  )
376
432
 
377
433
  @staticmethod
378
- def _get_join_keys(settings: transform_schema.JoinInputManager) -> Tuple[List[str], List[str]]:
434
+ def _get_join_keys(settings: transform_schema.JoinInputManager) -> tuple[list[str], list[str]]:
379
435
  """Extract join keys based on join type.
380
436
 
381
437
  Different join types require different handling of join keys:
@@ -397,8 +453,9 @@ class FlowGraphToPolarsConverter:
397
453
 
398
454
  return left_on, right_on
399
455
 
400
- def _apply_pre_join_transformations(self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str) -> Tuple[
401
- str, str]:
456
+ def _apply_pre_join_transformations(
457
+ self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str
458
+ ) -> tuple[str, str]:
402
459
  """Apply column renames and drops before the join operation.
403
460
 
404
461
  Pre-join transformations prepare DataFrames by:
@@ -419,8 +476,7 @@ class FlowGraphToPolarsConverter:
419
476
  right_renames = {
420
477
  column.old_name: column.new_name
421
478
  for column in settings.right_select.renames
422
- if
423
- column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
479
+ if column.old_name != column.new_name and not column.join_key or settings.how in ("outer", "right")
424
480
  }
425
481
 
426
482
  left_renames = {
@@ -430,13 +486,11 @@ class FlowGraphToPolarsConverter:
430
486
  }
431
487
 
432
488
  left_drop_columns = [
433
- column.old_name for column in settings.left_select.renames
434
- if not column.keep and not column.join_key
489
+ column.old_name for column in settings.left_select.renames if not column.keep and not column.join_key
435
490
  ]
436
491
 
437
492
  right_drop_columns = [
438
- column.old_name for column in settings.right_select.renames
439
- if not column.keep and not column.join_key
493
+ column.old_name for column in settings.right_select.renames if not column.keep and not column.join_key
440
494
  ]
441
495
 
442
496
  # Apply transformations
@@ -451,9 +505,14 @@ class FlowGraphToPolarsConverter:
451
505
 
452
506
  return left_df, right_df
453
507
 
454
- def _handle_join_key_transformations(self, settings: transform_schema.JoinInputManager, left_df: str, right_df: str,
455
- left_on: List[str], right_on: List[str]) \
456
- -> Tuple[List[str], List[str], Optional[Dict], List[str]]:
508
+ def _handle_join_key_transformations(
509
+ self,
510
+ settings: transform_schema.JoinInputManager,
511
+ left_df: str,
512
+ right_df: str,
513
+ left_on: list[str],
514
+ right_on: list[str],
515
+ ) -> tuple[list[str], list[str], dict | None, list[str]]:
457
516
  """Route to appropriate join-specific key transformation handler.
458
517
 
459
518
  Different join types require different strategies for handling join keys
@@ -484,9 +543,9 @@ class FlowGraphToPolarsConverter:
484
543
  else:
485
544
  return left_on, right_on, None, []
486
545
 
487
- def _handle_left_inner_join_keys(self, settings: transform_schema.JoinInputManager, right_df: str,
488
- left_on: List[str], right_on: List[str]) -> Tuple[
489
- List[str], List[str], Dict, List[str]]:
546
+ def _handle_left_inner_join_keys(
547
+ self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
548
+ ) -> tuple[list[str], list[str], dict, list[str]]:
490
549
  """Handle key transformations for left and inner joins.
491
550
 
492
551
  For left/inner joins:
@@ -510,27 +569,26 @@ class FlowGraphToPolarsConverter:
510
569
  left_join_keys_to_keep = [jk.new_name for jk in settings.left_select.join_key_selects if jk.keep]
511
570
  join_key_duplication_command = [
512
571
  f'pl.col("{rjk.old_name}").alias("__DROP__{rjk.new_name}__DROP__")'
513
- for rjk in settings.right_select.join_key_selects if rjk.keep
572
+ for rjk in settings.right_select.join_key_selects
573
+ if rjk.keep
514
574
  ]
515
575
 
516
576
  reverse_action = {
517
577
  f"__DROP__{rjk.new_name}__DROP__": rjk.new_name
518
- for rjk in settings.right_select.join_key_selects if rjk.keep
578
+ for rjk in settings.right_select.join_key_selects
579
+ if rjk.keep
519
580
  }
520
581
 
521
582
  if join_key_duplication_command:
522
583
  self._add_code(f"{right_df} = {right_df}.with_columns([{', '.join(join_key_duplication_command)}])")
523
584
 
524
- after_join_drop_cols = [
525
- k.new_name for k in settings.left_select.join_key_selects
526
- if not k.keep
527
- ]
585
+ after_join_drop_cols = [k.new_name for k in settings.left_select.join_key_selects if not k.keep]
528
586
 
529
587
  return left_on, right_on, reverse_action, after_join_drop_cols
530
588
 
531
- def _handle_right_join_keys(self, settings: transform_schema.JoinInputManager, left_df: str,
532
- left_on: List[str], right_on: List[str]) -> Tuple[
533
- List[str], List[str], None, List[str]]:
589
+ def _handle_right_join_keys(
590
+ self, settings: transform_schema.JoinInputManager, left_df: str, left_on: list[str], right_on: list[str]
591
+ ) -> tuple[list[str], list[str], None, list[str]]:
534
592
  """Handle key transformations for right joins.
535
593
 
536
594
  For right joins:
@@ -553,7 +611,8 @@ class FlowGraphToPolarsConverter:
553
611
  """
554
612
  join_key_duplication_command = [
555
613
  f'pl.col("{ljk.new_name}").alias("__jk_{ljk.new_name}")'
556
- for ljk in settings.left_select.join_key_selects if ljk.keep
614
+ for ljk in settings.left_select.join_key_selects
615
+ if ljk.keep
557
616
  ]
558
617
 
559
618
  # Update left_on keys
@@ -569,14 +628,15 @@ class FlowGraphToPolarsConverter:
569
628
  left_join_keys_keep = {jk.new_name for jk in settings.left_select.join_key_selects if jk.keep}
570
629
  after_join_drop_cols_right = [
571
630
  jk.new_name if jk.new_name not in left_join_keys_keep else jk.new_name + "_right"
572
- for jk in settings.right_select.join_key_selects if not jk.keep
631
+ for jk in settings.right_select.join_key_selects
632
+ if not jk.keep
573
633
  ]
574
634
  after_join_drop_cols = list(set(after_join_drop_cols_right))
575
635
  return left_on, right_on, None, after_join_drop_cols
576
636
 
577
- def _handle_outer_join_keys(self, settings: transform_schema.JoinInputManager, right_df: str,
578
- left_on: List[str],
579
- right_on: List[str]) -> Tuple[List[str], List[str], Dict, List[str]]:
637
+ def _handle_outer_join_keys(
638
+ self, settings: transform_schema.JoinInputManager, right_df: str, left_on: list[str], right_on: list[str]
639
+ ) -> tuple[list[str], list[str], dict, list[str]]:
580
640
  """Handle key transformations for outer joins.
581
641
 
582
642
  For outer joins:
@@ -600,14 +660,10 @@ class FlowGraphToPolarsConverter:
600
660
  left_join_keys = {jk.new_name for jk in settings.left_select.join_key_selects}
601
661
 
602
662
  join_keys_to_keep_and_rename = [
603
- rjk for rjk in settings.right_select.join_key_selects
604
- if rjk.keep and rjk.new_name in left_join_keys
663
+ rjk for rjk in settings.right_select.join_key_selects if rjk.keep and rjk.new_name in left_join_keys
605
664
  ]
606
665
 
607
- join_key_rename_command = {
608
- rjk.new_name: f"__jk_{rjk.new_name}"
609
- for rjk in join_keys_to_keep_and_rename
610
- }
666
+ join_key_rename_command = {rjk.new_name: f"__jk_{rjk.new_name}" for rjk in join_keys_to_keep_and_rename}
611
667
 
612
668
  # Update right_on keys
613
669
  for position, right_on_key in enumerate(right_on):
@@ -621,20 +677,27 @@ class FlowGraphToPolarsConverter:
621
677
  reverse_action = {f"__jk_{rjk.new_name}": rjk.new_name for rjk in join_keys_to_keep_and_rename}
622
678
 
623
679
  # Calculate columns to drop after join
624
- after_join_drop_cols_left = [
625
- jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep
626
- ]
680
+ after_join_drop_cols_left = [jk.new_name for jk in settings.left_select.join_key_selects if not jk.keep]
627
681
  after_join_drop_cols_right = [
628
682
  jk.new_name if jk.new_name not in left_join_keys else jk.new_name + "_right"
629
- for jk in settings.right_select.join_key_selects if not jk.keep
683
+ for jk in settings.right_select.join_key_selects
684
+ if not jk.keep
630
685
  ]
631
686
  after_join_drop_cols = after_join_drop_cols_left + after_join_drop_cols_right
632
687
 
633
688
  return left_on, right_on, reverse_action, after_join_drop_cols
634
689
 
635
- def _execute_join_with_post_processing(self, settings: input_schema.NodeJoin, var_name: str,
636
- left_df: str, right_df: str, left_on: List[str], right_on: List[str],
637
- after_join_drop_cols: List[str], reverse_action: Optional[Dict]) -> None:
690
+ def _execute_join_with_post_processing(
691
+ self,
692
+ settings: input_schema.NodeJoin,
693
+ var_name: str,
694
+ left_df: str,
695
+ right_df: str,
696
+ left_on: list[str],
697
+ right_on: list[str],
698
+ after_join_drop_cols: list[str],
699
+ reverse_action: dict | None,
700
+ ) -> None:
638
701
  """Execute the join operation and apply post-processing steps.
639
702
 
640
703
  Generates the actual join code with any necessary post-processing:
@@ -665,7 +728,7 @@ class FlowGraphToPolarsConverter:
665
728
  self._add_code(" )")
666
729
 
667
730
  # Handle right join special case
668
- if settings.join_input.how == 'right':
731
+ if settings.join_input.how == "right":
669
732
  self._add_code(".collect()") # Right join needs to be collected first cause of issue with rename
670
733
 
671
734
  # Apply post-join transformations
@@ -676,21 +739,21 @@ class FlowGraphToPolarsConverter:
676
739
  self._add_code(f".rename({reverse_action})")
677
740
 
678
741
  # Convert back to lazy for right joins
679
- if settings.join_input.how == 'right':
680
- self._add_code(f".lazy()")
742
+ if settings.join_input.how == "right":
743
+ self._add_code(".lazy()")
681
744
 
682
745
  self._add_code(")")
683
746
 
684
- def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: Dict[str, str]) -> None:
747
+ def _handle_group_by(self, settings: input_schema.NodeGroupBy, var_name: str, input_vars: dict[str, str]) -> None:
685
748
  """Handle group by nodes."""
686
- input_df = input_vars.get('main', 'df')
749
+ input_df = input_vars.get("main", "df")
687
750
 
688
751
  # Separate groupby columns from aggregation columns
689
752
  group_cols = []
690
753
  agg_exprs = []
691
754
 
692
755
  for agg_col in settings.groupby_input.agg_cols:
693
- if agg_col.agg == 'groupby':
756
+ if agg_col.agg == "groupby":
694
757
  group_cols.append(agg_col.old_name)
695
758
  else:
696
759
  agg_func = self._get_agg_function(agg_col.agg)
@@ -703,9 +766,9 @@ class FlowGraphToPolarsConverter:
703
766
  self._add_code("])")
704
767
  self._add_code("")
705
768
 
706
- def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: Dict[str, str]) -> None:
769
+ def _handle_formula(self, settings: input_schema.NodeFormula, var_name: str, input_vars: dict[str, str]) -> None:
707
770
  """Handle formula/expression nodes."""
708
- input_df = input_vars.get('main', 'df')
771
+ input_df = input_vars.get("main", "df")
709
772
  self.imports.add("from polars_expr_transformer.process.polars_expr_transformer import simple_function_to_expr")
710
773
 
711
774
  # Convert SQL-like formula to Polars expression
@@ -717,7 +780,7 @@ class FlowGraphToPolarsConverter:
717
780
  output_type = convert_pl_type_to_string(cast_str_to_polars_type(settings.function.field.data_type))
718
781
  if output_type[:3] != "pl.":
719
782
  output_type = "pl." + output_type
720
- self._add_code(f' .cast({output_type})')
783
+ self._add_code(f" .cast({output_type})")
721
784
 
722
785
  self._add_code("])")
723
786
  self._add_code("")
@@ -725,11 +788,11 @@ class FlowGraphToPolarsConverter:
725
788
  def _handle_pivot_no_index(self, settings: input_schema.NodePivot, var_name: str, input_df: str, agg_func: str):
726
789
  pivot_input = settings.pivot_input
727
790
 
728
- self._add_code(f'{var_name} = ({input_df}.collect()')
791
+ self._add_code(f"{var_name} = ({input_df}.collect()")
729
792
  self._add_code(' .with_columns(pl.lit(1).alias("__temp_index__"))')
730
- self._add_code(' .pivot(')
793
+ self._add_code(" .pivot(")
731
794
  self._add_code(f' values="{pivot_input.value_col}",')
732
- self._add_code(f' index=["__temp_index__"],')
795
+ self._add_code(' index=["__temp_index__"],')
733
796
  self._add_code(f' columns="{pivot_input.pivot_column}",')
734
797
  self._add_code(f' aggregate_function="{agg_func}"')
735
798
  self._add_code(" )")
@@ -737,17 +800,16 @@ class FlowGraphToPolarsConverter:
737
800
  self._add_code(").lazy()")
738
801
  self._add_code("")
739
802
 
740
- def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: Dict[str, str]) -> None:
803
+ def _handle_pivot(self, settings: input_schema.NodePivot, var_name: str, input_vars: dict[str, str]) -> None:
741
804
  """Handle pivot nodes."""
742
- input_df = input_vars.get('main', 'df')
805
+ input_df = input_vars.get("main", "df")
743
806
  pivot_input = settings.pivot_input
744
807
  if len(pivot_input.aggregations) > 1:
745
- logger.error("Multiple aggregations are not convertable to polars code. "
746
- "Taking the first value")
808
+ logger.error("Multiple aggregations are not convertable to polars code. " "Taking the first value")
747
809
  if len(pivot_input.aggregations) > 0:
748
810
  agg_func = pivot_input.aggregations[0]
749
811
  else:
750
- agg_func = 'first'
812
+ agg_func = "first"
751
813
  if len(settings.pivot_input.index_columns) == 0:
752
814
  self._handle_pivot_no_index(settings, var_name, input_df, agg_func)
753
815
  else:
@@ -761,9 +823,9 @@ class FlowGraphToPolarsConverter:
761
823
  self._add_code(").lazy()")
762
824
  self._add_code("")
763
825
 
764
- def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: Dict[str, str]) -> None:
826
+ def _handle_unpivot(self, settings: input_schema.NodeUnpivot, var_name: str, input_vars: dict[str, str]) -> None:
765
827
  """Handle unpivot nodes."""
766
- input_df = input_vars.get('main', 'df')
828
+ input_df = input_vars.get("main", "df")
767
829
  unpivot_input = settings.unpivot_input
768
830
 
769
831
  self._add_code(f"{var_name} = {input_df}.unpivot(")
@@ -779,22 +841,22 @@ class FlowGraphToPolarsConverter:
779
841
  self._add_code(")")
780
842
  self._add_code("")
781
843
 
782
- def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: Dict[str, str]) -> None:
844
+ def _handle_union(self, settings: input_schema.NodeUnion, var_name: str, input_vars: dict[str, str]) -> None:
783
845
  """Handle union nodes."""
784
846
  # Get all input LazyFrame
785
847
  dfs = []
786
- if 'main' in input_vars:
787
- dfs.append(input_vars['main'])
848
+ if "main" in input_vars:
849
+ dfs.append(input_vars["main"])
788
850
  else:
789
851
  # Multiple main inputs
790
852
  for key, df_var in input_vars.items():
791
- if key.startswith('main'):
853
+ if key.startswith("main"):
792
854
  dfs.append(df_var)
793
855
 
794
- if settings.union_input.mode == 'relaxed':
795
- how = 'diagonal_relaxed'
856
+ if settings.union_input.mode == "relaxed":
857
+ how = "diagonal_relaxed"
796
858
  else:
797
- how = 'diagonal'
859
+ how = "diagonal"
798
860
 
799
861
  self._add_code(f"{var_name} = pl.concat([")
800
862
  for df in dfs:
@@ -802,76 +864,88 @@ class FlowGraphToPolarsConverter:
802
864
  self._add_code(f"], how='{how}')")
803
865
  self._add_code("")
804
866
 
805
- def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: Dict[str, str]) -> None:
867
+ def _handle_sort(self, settings: input_schema.NodeSort, var_name: str, input_vars: dict[str, str]) -> None:
806
868
  """Handle sort nodes."""
807
- input_df = input_vars.get('main', 'df')
869
+ input_df = input_vars.get("main", "df")
808
870
 
809
871
  sort_cols = []
810
872
  descending = []
811
873
 
812
874
  for sort_input in settings.sort_input:
813
875
  sort_cols.append(f'"{sort_input.column}"')
814
- descending.append(sort_input.how == 'desc')
876
+ descending.append(sort_input.how == "desc")
815
877
 
816
878
  self._add_code(f"{var_name} = {input_df}.sort([{', '.join(sort_cols)}], descending={descending})")
817
879
  self._add_code("")
818
880
 
819
- def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: Dict[str, str]) -> None:
881
+ def _handle_sample(self, settings: input_schema.NodeSample, var_name: str, input_vars: dict[str, str]) -> None:
820
882
  """Handle sample nodes."""
821
- input_df = input_vars.get('main', 'df')
883
+ input_df = input_vars.get("main", "df")
822
884
  self._add_code(f"{var_name} = {input_df}.head(n={settings.sample_size})")
823
885
  self._add_code("")
824
886
 
825
887
  @staticmethod
826
- def _transform_fuzzy_mappings_to_string(fuzzy_mappings: List[FuzzyMapping]) -> str:
827
-
888
+ def _transform_fuzzy_mappings_to_string(fuzzy_mappings: list[FuzzyMapping]) -> str:
828
889
  output_str = "["
829
890
  for i, fuzzy_mapping in enumerate(fuzzy_mappings):
830
-
831
- output_str += (f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
832
- f" right_col='{fuzzy_mapping.right_col}', "
833
- f"threshold_score={fuzzy_mapping.threshold_score}, "
834
- f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')")
891
+ output_str += (
892
+ f"FuzzyMapping(left_col='{fuzzy_mapping.left_col}',"
893
+ f" right_col='{fuzzy_mapping.right_col}', "
894
+ f"threshold_score={fuzzy_mapping.threshold_score}, "
895
+ f"fuzzy_type='{fuzzy_mapping.fuzzy_type}')"
896
+ )
835
897
  if i < len(fuzzy_mappings) - 1:
836
898
  output_str += ",\n"
837
899
  output_str += "]"
838
900
  return output_str
839
901
 
840
- def _handle_fuzzy_match(self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: Dict[str, str]) -> None:
902
+ def _handle_fuzzy_match(
903
+ self, settings: input_schema.NodeFuzzyMatch, var_name: str, input_vars: dict[str, str]
904
+ ) -> None:
841
905
  """Handle fuzzy match nodes."""
842
906
  self.imports.add("from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs")
843
907
  fuzzy_match_handler = transform_schema.FuzzyMatchInputManager(settings.join_input)
844
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
845
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
908
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
909
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
846
910
 
847
911
  if left_df == right_df:
848
912
  right_df = "df_right"
849
913
  self._add_code(f"{right_df} = {left_df}")
850
914
 
851
915
  if fuzzy_match_handler.left_select.has_drop_cols():
852
- self._add_code(f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})")
916
+ self._add_code(
917
+ f"{left_df} = {left_df}.drop({[c.old_name for c in fuzzy_match_handler.left_select.non_jk_drop_columns]})"
918
+ )
853
919
  if fuzzy_match_handler.right_select.has_drop_cols():
854
- self._add_code(f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})")
920
+ self._add_code(
921
+ f"{right_df} = {right_df}.drop({[c.old_name for c in fuzzy_match_handler.right_select.non_jk_drop_columns]})"
922
+ )
855
923
 
856
924
  fuzzy_join_mapping_settings = self._transform_fuzzy_mappings_to_string(fuzzy_match_handler.join_mapping)
857
- self._add_code(f"{var_name} = fuzzy_match_dfs(\n"
858
- f" left_df={left_df}, right_df={right_df},\n"
859
- f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
860
- f" ).lazy()")
925
+ self._add_code(
926
+ f"{var_name} = fuzzy_match_dfs(\n"
927
+ f" left_df={left_df}, right_df={right_df},\n"
928
+ f" fuzzy_maps={fuzzy_join_mapping_settings}\n"
929
+ f" ).lazy()"
930
+ )
861
931
 
862
- def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: Dict[str, str]) -> None:
932
+ def _handle_unique(self, settings: input_schema.NodeUnique, var_name: str, input_vars: dict[str, str]) -> None:
863
933
  """Handle unique/distinct nodes."""
864
- input_df = input_vars.get('main', 'df')
934
+ input_df = input_vars.get("main", "df")
865
935
 
866
936
  if settings.unique_input.columns:
867
- self._add_code(f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')")
937
+ self._add_code(
938
+ f"{var_name} = {input_df}.unique(subset={settings.unique_input.columns}, keep='{settings.unique_input.strategy}')"
939
+ )
868
940
  else:
869
941
  self._add_code(f"{var_name} = {input_df}.unique(keep='{settings.unique_input.strategy}')")
870
942
  self._add_code("")
871
943
 
872
- def _handle_text_to_rows(self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: Dict[str, str]) -> None:
944
+ def _handle_text_to_rows(
945
+ self, settings: input_schema.NodeTextToRows, var_name: str, input_vars: dict[str, str]
946
+ ) -> None:
873
947
  """Handle text to rows (explode) nodes."""
874
- input_df = input_vars.get('main', 'df')
948
+ input_df = input_vars.get("main", "df")
875
949
  text_input = settings.text_to_rows_input
876
950
 
877
951
  # First split the column
@@ -884,96 +958,108 @@ class FlowGraphToPolarsConverter:
884
958
 
885
959
  self._add_code(f"{var_name} = {input_df}.with_columns({split_expr}).explode('{explode_col}')")
886
960
  self._add_code("")
961
+
887
962
  # .with_columns(
888
963
  # (pl.cum_count(record_id_settings.output_column_name)
889
964
  # .over(record_id_settings.group_by_columns) + record_id_settings.offset - 1)
890
965
  # .alias(record_id_settings.output_column_name)
891
966
  # )
892
- def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: Dict[str, str]) -> None:
967
+ def _handle_record_id(self, settings: input_schema.NodeRecordId, var_name: str, input_vars: dict[str, str]) -> None:
893
968
  """Handle record ID nodes."""
894
- input_df = input_vars.get('main', 'df')
969
+ input_df = input_vars.get("main", "df")
895
970
  record_input = settings.record_id_input
896
971
  if record_input.group_by and record_input.group_by_columns:
897
-
898
972
  # Row number within groups
899
973
  self._add_code(f"{var_name} = ({input_df}")
900
974
  self._add_code(f" .with_columns(pl.lit(1).alias('{record_input.output_column_name}'))")
901
- self._add_code(f" .with_columns([")
902
- self._add_code(f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)")
975
+ self._add_code(" .with_columns([")
976
+ self._add_code(
977
+ f" (pl.cum_count('{record_input.output_column_name}').over({record_input.group_by_columns}) + {record_input.offset} - 1)"
978
+ )
903
979
  self._add_code(f" .alias('{record_input.output_column_name}')")
904
980
  self._add_code("])")
905
- self._add_code(f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])")
981
+ self._add_code(
982
+ f".select(['{record_input.output_column_name}'] + [col for col in {input_df}.columns if col != '{record_input.output_column_name}'])"
983
+ )
906
984
  self._add_code(")")
907
985
  else:
908
986
  # Simple row number
909
- self._add_code(f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})")
987
+ self._add_code(
988
+ f"{var_name} = {input_df}.with_row_count(name='{record_input.output_column_name}', offset={record_input.offset})"
989
+ )
910
990
  self._add_code("")
911
991
 
912
- def _handle_cross_join(self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: Dict[str, str]) -> None:
992
+ def _handle_cross_join(
993
+ self, settings: input_schema.NodeCrossJoin, var_name: str, input_vars: dict[str, str]
994
+ ) -> None:
913
995
  """Handle cross join nodes."""
914
- left_df = input_vars.get('main', input_vars.get('main_0', 'df_left'))
915
- right_df = input_vars.get('right', input_vars.get('main_1', 'df_right'))
996
+ left_df = input_vars.get("main", input_vars.get("main_0", "df_left"))
997
+ right_df = input_vars.get("right", input_vars.get("main_1", "df_right"))
916
998
 
917
999
  self._add_code(f"{var_name} = {left_df}.join({right_df}, how='cross')")
918
1000
  self._add_code("")
919
1001
 
920
- def _handle_cloud_storage_writer(self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: Dict[str, str]) -> None:
1002
+ def _handle_cloud_storage_writer(
1003
+ self, settings: input_schema.NodeCloudStorageWriter, var_name: str, input_vars: dict[str, str]
1004
+ ) -> None:
921
1005
  """Handle cloud storage writer nodes."""
922
- input_df = input_vars.get('main', 'df')
1006
+ input_df = input_vars.get("main", "df")
923
1007
  # def write_csv_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: Optional[str] = None) -> 'FlowFrame': ...
924
1008
 
925
1009
  output_settings = settings.cloud_storage_settings
926
1010
  self.imports.add("import flowfile as ff")
927
1011
  self._add_code(f"(ff.FlowFrame({input_df})")
928
1012
  if output_settings.file_format == "csv":
929
- self._add_code(f' .write_csv_to_cloud_storage(')
1013
+ self._add_code(" .write_csv_to_cloud_storage(")
930
1014
  self._add_code(f' path="{output_settings.resource_path}",')
931
1015
  self._add_code(f' connection_name="{output_settings.connection_name}",')
932
1016
  self._add_code(f' delimiter="{output_settings.csv_delimiter}",')
933
1017
  self._add_code(f' encoding="{output_settings.csv_encoding}",')
934
1018
  self._add_code(f' description="{settings.description}"')
935
1019
  elif output_settings.file_format == "parquet":
936
- self._add_code(f' .write_parquet_to_cloud_storage(')
1020
+ self._add_code(" .write_parquet_to_cloud_storage(")
937
1021
  self._add_code(f' path="{output_settings.resource_path}",')
938
1022
  self._add_code(f' connection_name="{output_settings.connection_name}",')
939
1023
  self._add_code(f' description="{settings.description}"')
940
1024
  elif output_settings.file_format == "json":
941
- self._add_code(f' .write_json_to_cloud_storage(')
1025
+ self._add_code(" .write_json_to_cloud_storage(")
942
1026
  self._add_code(f' path="{output_settings.resource_path}",')
943
1027
  self._add_code(f' connection_name="{output_settings.connection_name}",')
944
1028
  self._add_code(f' description="{settings.description}"')
945
1029
  elif output_settings.file_format == "delta":
946
- self._add_code(f' .write_delta(')
1030
+ self._add_code(" .write_delta(")
947
1031
  self._add_code(f' path="{output_settings.resource_path}",')
948
1032
  self._add_code(f' write_mode="{output_settings.write_mode}",')
949
1033
  self._add_code(f' connection_name="{output_settings.connection_name}",')
950
1034
  self._add_code(f' description="{settings.description}"')
951
- self._add_code(' )')
952
- self._add_code(')')
1035
+ self._add_code(" )")
1036
+ self._add_code(")")
953
1037
 
954
- def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: Dict[str, str]) -> None:
1038
+ def _handle_output(self, settings: input_schema.NodeOutput, var_name: str, input_vars: dict[str, str]) -> None:
955
1039
  """Handle output nodes."""
956
- input_df = input_vars.get('main', 'df')
1040
+ input_df = input_vars.get("main", "df")
957
1041
  output_settings = settings.output_settings
958
1042
 
959
- if output_settings.file_type == 'csv':
960
- self._add_code(f'{input_df}.sink_csv(')
1043
+ if output_settings.file_type == "csv":
1044
+ self._add_code(f"{input_df}.sink_csv(")
961
1045
  self._add_code(f' "{output_settings.abs_file_path}",')
962
1046
  self._add_code(f' separator="{output_settings.table_settings.delimiter}"')
963
- self._add_code(')')
1047
+ self._add_code(")")
964
1048
 
965
- elif output_settings.file_type == 'parquet':
1049
+ elif output_settings.file_type == "parquet":
966
1050
  self._add_code(f'{input_df}.sink_parquet("{output_settings.abs_file_path}")')
967
1051
 
968
- elif output_settings.file_type == 'excel':
969
- self._add_code(f'{input_df}.collect().write_excel(')
1052
+ elif output_settings.file_type == "excel":
1053
+ self._add_code(f"{input_df}.collect().write_excel(")
970
1054
  self._add_code(f' "{output_settings.abs_file_path}",')
971
1055
  self._add_code(f' worksheet="{output_settings.table_settings.sheet_name}"')
972
- self._add_code(')')
1056
+ self._add_code(")")
973
1057
 
974
1058
  self._add_code("")
975
1059
 
976
- def _handle_polars_code(self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: Dict[str, str]) -> None:
1060
+ def _handle_polars_code(
1061
+ self, settings: input_schema.NodePolarsCode, var_name: str, input_vars: dict[str, str]
1062
+ ) -> None:
977
1063
  """Handle custom Polars code nodes."""
978
1064
  code = settings.polars_code_input.polars_code.strip()
979
1065
  # Determine function parameters based on number of inputs
@@ -990,7 +1076,7 @@ class FlowGraphToPolarsConverter:
990
1076
  arg_list = []
991
1077
  i = 1
992
1078
  for key in sorted(input_vars.keys()):
993
- if key.startswith('main'):
1079
+ if key.startswith("main"):
994
1080
  param_list.append(f"input_df_{i}: pl.LazyFrame")
995
1081
  arg_list.append(input_vars[key])
996
1082
  i += 1
@@ -1001,7 +1087,7 @@ class FlowGraphToPolarsConverter:
1001
1087
  is_expression = "output_df" not in code
1002
1088
 
1003
1089
  # Wrap the code in a function
1004
- self._add_code(f"# Custom Polars code")
1090
+ self._add_code("# Custom Polars code")
1005
1091
  self._add_code(f"def _polars_code_{var_name.replace('df_', '')}({params}):")
1006
1092
 
1007
1093
  # Handle the code based on its structure
@@ -1010,18 +1096,18 @@ class FlowGraphToPolarsConverter:
1010
1096
  self._add_code(f" return {code}")
1011
1097
  else:
1012
1098
  # It contains assignments
1013
- for line in code.split('\n'):
1099
+ for line in code.split("\n"):
1014
1100
  if line.strip():
1015
1101
  self._add_code(f" {line}")
1016
1102
 
1017
1103
  # If no explicit return, try to detect what to return
1018
- if 'return' not in code:
1104
+ if "return" not in code:
1019
1105
  # Try to find the last assignment
1020
- lines = [l.strip() for l in code.split('\n') if l.strip() and '=' in l]
1106
+ lines = [l.strip() for l in code.split("\n") if l.strip() and "=" in l]
1021
1107
  if lines:
1022
1108
  last_assignment = lines[-1]
1023
- if '=' in last_assignment:
1024
- output_var = last_assignment.split('=')[0].strip()
1109
+ if "=" in last_assignment:
1110
+ output_var = last_assignment.split("=")[0].strip()
1025
1111
  self._add_code(f" return {output_var}")
1026
1112
 
1027
1113
  self._add_code("")
@@ -1030,6 +1116,291 @@ class FlowGraphToPolarsConverter:
1030
1116
  self._add_code(f"{var_name} = _polars_code_{var_name.replace('df_', '')}({args})")
1031
1117
  self._add_code("")
1032
1118
 
1119
+ # Handlers for unsupported node types - these add nodes to the unsupported list
1120
+
1121
+ def _handle_explore_data(
1122
+ self, settings: input_schema.NodeExploreData, var_name: str, input_vars: dict[str, str]
1123
+ ) -> None:
1124
+ """Handle explore_data nodes - these are skipped as they are interactive visualization only."""
1125
+ # explore_data is just for visualization in the UI, it doesn't transform data
1126
+ # So we skip it in code generation but don't fail - just add a comment
1127
+ input_df = input_vars.get("main", "df")
1128
+ self._add_comment(f"# Node {settings.node_id}: Explore Data (skipped - interactive visualization only)")
1129
+ self._add_code(f"{var_name} = {input_df} # Pass through unchanged")
1130
+ self._add_code("")
1131
+
1132
+ def _handle_database_reader(
1133
+ self, settings: input_schema.NodeDatabaseReader, var_name: str, input_vars: dict[str, str]
1134
+ ) -> None:
1135
+ """Handle database_reader nodes by generating code to read from database using a named connection."""
1136
+ db_settings = settings.database_settings
1137
+
1138
+ # Only reference mode is supported for code generation
1139
+ if db_settings.connection_mode != "reference":
1140
+ self.unsupported_nodes.append((
1141
+ settings.node_id,
1142
+ "database_reader",
1143
+ "Database Reader nodes with inline connections cannot be exported. "
1144
+ "Please use a named connection (reference mode) instead."
1145
+ ))
1146
+ self._add_comment(f"# Node {settings.node_id}: Database Reader - Inline connections not supported")
1147
+ return
1148
+
1149
+ if not db_settings.database_connection_name:
1150
+ self.unsupported_nodes.append((
1151
+ settings.node_id,
1152
+ "database_reader",
1153
+ "Database Reader node is missing a connection name"
1154
+ ))
1155
+ return
1156
+
1157
+ self.imports.add("import flowfile as ff")
1158
+
1159
+ connection_name = db_settings.database_connection_name
1160
+ self._add_code(f"# Read from database using connection: {connection_name}")
1161
+
1162
+ if db_settings.query_mode == "query" and db_settings.query:
1163
+ # Query mode - use triple quotes to preserve query formatting
1164
+ self._add_code(f'{var_name} = ff.read_database(')
1165
+ self._add_code(f' "{connection_name}",')
1166
+ self._add_code(f' query="""')
1167
+ # Add each line of the query with proper indentation
1168
+ for line in db_settings.query.split("\n"):
1169
+ self._add_code(f" {line}")
1170
+ self._add_code(' """,')
1171
+ self._add_code(")")
1172
+ else:
1173
+ # Table mode
1174
+ self._add_code(f'{var_name} = ff.read_database(')
1175
+ self._add_code(f' "{connection_name}",')
1176
+ if db_settings.table_name:
1177
+ self._add_code(f' table_name="{db_settings.table_name}",')
1178
+ if db_settings.schema_name:
1179
+ self._add_code(f' schema_name="{db_settings.schema_name}",')
1180
+ self._add_code(")")
1181
+
1182
+ self._add_code("")
1183
+
1184
+ def _handle_database_writer(
1185
+ self, settings: input_schema.NodeDatabaseWriter, var_name: str, input_vars: dict[str, str]
1186
+ ) -> None:
1187
+ """Handle database_writer nodes by generating code to write to database using a named connection."""
1188
+ db_settings = settings.database_write_settings
1189
+
1190
+ # Only reference mode is supported for code generation
1191
+ if db_settings.connection_mode != "reference":
1192
+ self.unsupported_nodes.append((
1193
+ settings.node_id,
1194
+ "database_writer",
1195
+ "Database Writer nodes with inline connections cannot be exported. "
1196
+ "Please use a named connection (reference mode) instead."
1197
+ ))
1198
+ self._add_comment(f"# Node {settings.node_id}: Database Writer - Inline connections not supported")
1199
+ return
1200
+
1201
+ if not db_settings.database_connection_name:
1202
+ self.unsupported_nodes.append((
1203
+ settings.node_id,
1204
+ "database_writer",
1205
+ "Database Writer node is missing a connection name"
1206
+ ))
1207
+ return
1208
+
1209
+ self.imports.add("import flowfile as ff")
1210
+
1211
+ connection_name = db_settings.database_connection_name
1212
+ input_df = input_vars.get("main", "df")
1213
+
1214
+ self._add_code(f"# Write to database using connection: {connection_name}")
1215
+ self._add_code(f"ff.write_database(")
1216
+ self._add_code(f" {input_df}.collect(),")
1217
+ self._add_code(f' "{connection_name}",')
1218
+ self._add_code(f' "{db_settings.table_name}",')
1219
+ if db_settings.schema_name:
1220
+ self._add_code(f' schema_name="{db_settings.schema_name}",')
1221
+ if db_settings.if_exists:
1222
+ self._add_code(f' if_exists="{db_settings.if_exists}",')
1223
+ self._add_code(")")
1224
+ self._add_code(f"{var_name} = {input_df} # Pass through the input DataFrame")
1225
+ self._add_code("")
1226
+
1227
+ def _handle_external_source(
1228
+ self, settings: input_schema.NodeExternalSource, var_name: str, input_vars: dict[str, str]
1229
+ ) -> None:
1230
+ """Handle external_source nodes - these are not supported for code generation."""
1231
+ self.unsupported_nodes.append((
1232
+ settings.node_id,
1233
+ "external_source",
1234
+ "External Source nodes use dynamic data sources that cannot be included in generated code"
1235
+ ))
1236
+ self._add_comment(f"# Node {settings.node_id}: External Source - Not supported for code export")
1237
+ self._add_comment("# (External data sources require runtime configuration)")
1238
+
1239
+ def _check_process_method_signature(self, custom_node_class: type) -> tuple[bool, bool]:
1240
+ """
1241
+ Check the process method signature to determine if collect/lazy is needed.
1242
+
1243
+ Returns:
1244
+ Tuple of (needs_collect, needs_lazy):
1245
+ - needs_collect: True if inputs need to be collected to DataFrame before passing to process()
1246
+ - needs_lazy: True if output needs to be converted to LazyFrame after process()
1247
+ """
1248
+ needs_collect = True # Default: assume needs DataFrame input
1249
+ needs_lazy = True # Default: assume returns DataFrame
1250
+
1251
+ process_method = getattr(custom_node_class, 'process', None)
1252
+ if process_method is None:
1253
+ return needs_collect, needs_lazy
1254
+
1255
+ try:
1256
+ # Try to get type hints from the process method
1257
+ type_hints = typing.get_type_hints(process_method)
1258
+
1259
+ # Check return type
1260
+ return_type = type_hints.get('return')
1261
+ if return_type is not None:
1262
+ return_type_str = str(return_type)
1263
+ if 'LazyFrame' in return_type_str:
1264
+ needs_lazy = False
1265
+
1266
+ # Check input parameter types (look for *inputs parameter or first param after self)
1267
+ sig = inspect.signature(process_method)
1268
+ params = list(sig.parameters.values())
1269
+ for param in params[1:]: # Skip 'self'
1270
+ if param.annotation != inspect.Parameter.empty:
1271
+ param_type_str = str(param.annotation)
1272
+ if 'LazyFrame' in param_type_str:
1273
+ needs_collect = False
1274
+ break
1275
+ # Also check the type_hints dict for this param
1276
+ if param.name in type_hints:
1277
+ hint_str = str(type_hints[param.name])
1278
+ if 'LazyFrame' in hint_str:
1279
+ needs_collect = False
1280
+ break
1281
+ except Exception as e:
1282
+ # If we can't determine types, use defaults (collect + lazy)
1283
+ logger.debug(f"Could not determine process method signature: {e}")
1284
+
1285
+ return needs_collect, needs_lazy
1286
+
1287
+ def _read_custom_node_source_file(self, custom_node_class: type) -> str | None:
1288
+ """
1289
+ Read the entire source file where a custom node class is defined.
1290
+ This includes all class definitions in that file (settings schemas, etc.).
1291
+
1292
+ Returns:
1293
+ The complete source code from the file, or None if not readable.
1294
+ """
1295
+ try:
1296
+ source_file = inspect.getfile(custom_node_class)
1297
+ with open(source_file, 'r') as f:
1298
+ return f.read()
1299
+ except (OSError, TypeError):
1300
+ return None
1301
+
1302
+ def _handle_user_defined(
1303
+ self, node: FlowNode, var_name: str, input_vars: dict[str, str]
1304
+ ) -> None:
1305
+ """Handle user-defined custom nodes by including their class definition and calling process()."""
1306
+ node_type = node.node_type
1307
+ settings = node.setting_input
1308
+
1309
+ # Get the custom node class from the registry
1310
+ custom_node_class = CUSTOM_NODE_STORE.get(node_type)
1311
+ if custom_node_class is None:
1312
+ self.unsupported_nodes.append((
1313
+ node.node_id,
1314
+ node_type,
1315
+ f"User-defined node type '{node_type}' not found in the custom node registry"
1316
+ ))
1317
+ self._add_comment(f"# Node {node.node_id}: User-defined node '{node_type}' - Not found in registry")
1318
+ return
1319
+
1320
+ # Store the entire source file if we haven't already
1321
+ class_name = custom_node_class.__name__
1322
+ if class_name not in self.custom_node_classes:
1323
+ # Read the entire source file - it contains everything we need
1324
+ file_source = self._read_custom_node_source_file(custom_node_class)
1325
+ if file_source:
1326
+ # Remove import lines from the file since we handle imports separately
1327
+ lines = file_source.split('\n')
1328
+ non_import_lines = []
1329
+ in_multiline_import = False
1330
+ for line in lines:
1331
+ stripped = line.strip()
1332
+ # Track multi-line imports (using parentheses)
1333
+ if stripped.startswith('import ') or stripped.startswith('from '):
1334
+ if '(' in stripped and ')' not in stripped:
1335
+ in_multiline_import = True
1336
+ continue
1337
+ if in_multiline_import:
1338
+ if ')' in stripped:
1339
+ in_multiline_import = False
1340
+ continue
1341
+ # Skip comments at the very start (like "# Auto-generated custom node")
1342
+ if stripped.startswith('#') and not non_import_lines:
1343
+ continue
1344
+ non_import_lines.append(line)
1345
+ # Remove leading empty lines
1346
+ while non_import_lines and not non_import_lines[0].strip():
1347
+ non_import_lines.pop(0)
1348
+ self.custom_node_classes[class_name] = '\n'.join(non_import_lines)
1349
+ else:
1350
+ # Fallback to just the class source
1351
+ try:
1352
+ self.custom_node_classes[class_name] = inspect.getsource(custom_node_class)
1353
+ except (OSError, TypeError) as e:
1354
+ self.unsupported_nodes.append((
1355
+ node.node_id,
1356
+ node_type,
1357
+ f"Could not retrieve source code for user-defined node: {e}"
1358
+ ))
1359
+ self._add_comment(f"# Node {node.node_id}: User-defined node '{node_type}' - Source code unavailable")
1360
+ return
1361
+
1362
+ # Add necessary imports
1363
+ self.imports.add("from flowfile_core.flowfile.node_designer import CustomNodeBase, Section, NodeSettings, SingleSelect, MultiSelect, IncomingColumns, ColumnSelector, NumericInput, TextInput, DropdownSelector, TextArea, Toggle")
1364
+
1365
+ # Get settings values to initialize the node
1366
+ settings_dict = getattr(settings, "settings", {}) or {}
1367
+
1368
+ # Check process method signature to determine if collect/lazy is needed
1369
+ needs_collect, needs_lazy = self._check_process_method_signature(custom_node_class)
1370
+
1371
+ # Generate the code to instantiate and run the custom node
1372
+ self._add_code(f"# User-defined node: {custom_node_class.model_fields.get('node_name', type('', (), {'default': node_type})).default}")
1373
+ self._add_code(f"_custom_node_{node.node_id} = {class_name}()")
1374
+
1375
+ # If there are settings, apply them
1376
+ if settings_dict:
1377
+ self._add_code(f"_custom_node_{node.node_id}_settings = {repr(settings_dict)}")
1378
+ self._add_code(f"if _custom_node_{node.node_id}.settings_schema:")
1379
+ self._add_code(f" _custom_node_{node.node_id}.settings_schema.populate_values(_custom_node_{node.node_id}_settings)")
1380
+
1381
+ # Prepare input arguments based on whether we need to collect
1382
+ if len(input_vars) == 0:
1383
+ input_args = ""
1384
+ elif len(input_vars) == 1:
1385
+ input_df = list(input_vars.values())[0]
1386
+ input_args = f"{input_df}.collect()" if needs_collect else input_df
1387
+ else:
1388
+ arg_list = []
1389
+ for key in sorted(input_vars.keys()):
1390
+ if key.startswith("main"):
1391
+ if needs_collect:
1392
+ arg_list.append(f"{input_vars[key]}.collect()")
1393
+ else:
1394
+ arg_list.append(input_vars[key])
1395
+ input_args = ", ".join(arg_list)
1396
+
1397
+ # Call the process method, adding .lazy() only if needed
1398
+ if needs_lazy:
1399
+ self._add_code(f"{var_name} = _custom_node_{node.node_id}.process({input_args}).lazy()")
1400
+ else:
1401
+ self._add_code(f"{var_name} = _custom_node_{node.node_id}.process({input_args})")
1402
+ self._add_code("")
1403
+
1033
1404
  # Helper methods
1034
1405
 
1035
1406
  def _add_code(self, line: str) -> None:
@@ -1054,14 +1425,7 @@ class FlowGraphToPolarsConverter:
1054
1425
  col, op, val = match.groups()
1055
1426
 
1056
1427
  # Map operators
1057
- op_map = {
1058
- '=': '==',
1059
- '!=': '!=',
1060
- '>': '>',
1061
- '<': '<',
1062
- '>=': '>=',
1063
- '<=': '<='
1064
- }
1428
+ op_map = {"=": "==", "!=": "!=", ">": ">", "<": "<", ">=": ">=", "<=": "<="}
1065
1429
 
1066
1430
  polars_op = op_map.get(op, op)
1067
1431
 
@@ -1075,45 +1439,129 @@ class FlowGraphToPolarsConverter:
1075
1439
  return re.sub(pattern, replace_expr, expr)
1076
1440
 
1077
1441
  def _create_basic_filter_expr(self, basic: transform_schema.BasicFilter) -> str:
1078
- """Create Polars expression from basic filter."""
1442
+ """Create Polars expression from basic filter.
1443
+
1444
+ Generates proper Polars code for all supported filter operators.
1445
+
1446
+ Args:
1447
+ basic: The BasicFilter configuration.
1448
+
1449
+ Returns:
1450
+ A string containing valid Polars filter expression code.
1451
+ """
1452
+ from flowfile_core.schemas.transform_schema import FilterOperator
1453
+
1079
1454
  col = f'pl.col("{basic.field}")'
1455
+ value = basic.value
1456
+ value2 = basic.value2
1457
+
1458
+ # Determine if value is numeric (for proper quoting)
1459
+ is_numeric = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
1460
+
1461
+ # Get the operator
1462
+ try:
1463
+ operator = basic.get_operator()
1464
+ except (ValueError, AttributeError):
1465
+ operator = FilterOperator.from_symbol(str(basic.operator))
1466
+
1467
+ # Generate expression based on operator
1468
+ if operator == FilterOperator.EQUALS:
1469
+ if is_numeric:
1470
+ return f"{col} == {value}"
1471
+ return f'{col} == "{value}"'
1472
+
1473
+ elif operator == FilterOperator.NOT_EQUALS:
1474
+ if is_numeric:
1475
+ return f"{col} != {value}"
1476
+ return f'{col} != "{value}"'
1477
+
1478
+ elif operator == FilterOperator.GREATER_THAN:
1479
+ if is_numeric:
1480
+ return f"{col} > {value}"
1481
+ return f'{col} > "{value}"'
1482
+
1483
+ elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
1484
+ if is_numeric:
1485
+ return f"{col} >= {value}"
1486
+ return f'{col} >= "{value}"'
1487
+
1488
+ elif operator == FilterOperator.LESS_THAN:
1489
+ if is_numeric:
1490
+ return f"{col} < {value}"
1491
+ return f'{col} < "{value}"'
1492
+
1493
+ elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
1494
+ if is_numeric:
1495
+ return f"{col} <= {value}"
1496
+ return f'{col} <= "{value}"'
1497
+
1498
+ elif operator == FilterOperator.CONTAINS:
1499
+ return f'{col}.str.contains("{value}")'
1500
+
1501
+ elif operator == FilterOperator.NOT_CONTAINS:
1502
+ return f'{col}.str.contains("{value}").not_()'
1503
+
1504
+ elif operator == FilterOperator.STARTS_WITH:
1505
+ return f'{col}.str.starts_with("{value}")'
1506
+
1507
+ elif operator == FilterOperator.ENDS_WITH:
1508
+ return f'{col}.str.ends_with("{value}")'
1509
+
1510
+ elif operator == FilterOperator.IS_NULL:
1511
+ return f"{col}.is_null()"
1512
+
1513
+ elif operator == FilterOperator.IS_NOT_NULL:
1514
+ return f"{col}.is_not_null()"
1515
+
1516
+ elif operator == FilterOperator.IN:
1517
+ values = [v.strip() for v in value.split(",")]
1518
+ if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
1519
+ values_str = ", ".join(values)
1520
+ else:
1521
+ values_str = ", ".join(f'"{v}"' for v in values)
1522
+ return f"{col}.is_in([{values_str}])"
1523
+
1524
+ elif operator == FilterOperator.NOT_IN:
1525
+ values = [v.strip() for v in value.split(",")]
1526
+ if all(v.replace(".", "", 1).replace("-", "", 1).isnumeric() for v in values):
1527
+ values_str = ", ".join(values)
1528
+ else:
1529
+ values_str = ", ".join(f'"{v}"' for v in values)
1530
+ return f"{col}.is_in([{values_str}]).not_()"
1080
1531
 
1081
- if basic.filter_type == 'equals':
1082
- return f'{col} == "{basic.filter_value}"'
1083
- elif basic.filter_type == 'not_equals':
1084
- return f'{col} != "{basic.filter_value}"'
1085
- elif basic.filter_type == 'greater':
1086
- return f'{col} > {basic.filter_value}'
1087
- elif basic.filter_type == 'less':
1088
- return f'{col} < {basic.filter_value}'
1089
- elif basic.filter_type == 'in':
1090
- values = basic.filter_value.split(',')
1091
- return f"pl.col('{col}').is_in({values})"
1532
+ elif operator == FilterOperator.BETWEEN:
1533
+ if value2 is None:
1534
+ return f"{col} # BETWEEN requires two values"
1535
+ if is_numeric and value2.replace(".", "", 1).replace("-", "", 1).isnumeric():
1536
+ return f"({col} >= {value}) & ({col} <= {value2})"
1537
+ return f'({col} >= "{value}") & ({col} <= "{value2}")'
1538
+
1539
+ # Fallback
1092
1540
  return col
1093
1541
 
1094
1542
  def _get_polars_dtype(self, dtype_str: str) -> str:
1095
1543
  """Convert Flowfile dtype string to Polars dtype."""
1096
1544
  dtype_map = {
1097
- 'String': 'pl.Utf8',
1098
- 'Integer': 'pl.Int64',
1099
- 'Double': 'pl.Float64',
1100
- 'Boolean': 'pl.Boolean',
1101
- 'Date': 'pl.Date',
1102
- 'Datetime': 'pl.Datetime',
1103
- 'Float32': 'pl.Float32',
1104
- 'Float64': 'pl.Float64',
1105
- 'Int32': 'pl.Int32',
1106
- 'Int64': 'pl.Int64',
1107
- 'Utf8': 'pl.Utf8',
1545
+ "String": "pl.Utf8",
1546
+ "Integer": "pl.Int64",
1547
+ "Double": "pl.Float64",
1548
+ "Boolean": "pl.Boolean",
1549
+ "Date": "pl.Date",
1550
+ "Datetime": "pl.Datetime",
1551
+ "Float32": "pl.Float32",
1552
+ "Float64": "pl.Float64",
1553
+ "Int32": "pl.Int32",
1554
+ "Int64": "pl.Int64",
1555
+ "Utf8": "pl.Utf8",
1108
1556
  }
1109
- return dtype_map.get(dtype_str, 'pl.Utf8')
1557
+ return dtype_map.get(dtype_str, "pl.Utf8")
1110
1558
 
1111
1559
  def _get_agg_function(self, agg: str) -> str:
1112
1560
  """Get Polars aggregation function name."""
1113
1561
  agg_map = {
1114
- 'avg': 'mean',
1115
- 'average': 'mean',
1116
- 'concat': 'str.concat',
1562
+ "avg": "mean",
1563
+ "average": "mean",
1564
+ "concat": "str.concat",
1117
1565
  }
1118
1566
  return agg_map.get(agg, agg)
1119
1567
 
@@ -1126,12 +1574,12 @@ class FlowGraphToPolarsConverter:
1126
1574
  import re
1127
1575
 
1128
1576
  # Pattern for column names (simplified)
1129
- col_pattern = r'\b([a-zA-Z_][a-zA-Z0-9_]*)\b'
1577
+ col_pattern = r"\b([a-zA-Z_][a-zA-Z0-9_]*)\b"
1130
1578
 
1131
1579
  def replace_col(match):
1132
1580
  col_name = match.group(1)
1133
1581
  # Skip SQL keywords
1134
- keywords = {'CASE', 'WHEN', 'THEN', 'ELSE', 'END', 'AND', 'OR', 'NOT', 'IN', 'AS'}
1582
+ keywords = {"CASE", "WHEN", "THEN", "ELSE", "END", "AND", "OR", "NOT", "IN", "AS"}
1135
1583
  if col_name.upper() in keywords:
1136
1584
  return col_name
1137
1585
  return f'pl.col("{col_name}")'
@@ -1139,13 +1587,13 @@ class FlowGraphToPolarsConverter:
1139
1587
  result = re.sub(col_pattern, replace_col, sql_expr)
1140
1588
 
1141
1589
  # Handle CASE WHEN
1142
- if 'CASE' in result:
1590
+ if "CASE" in result:
1143
1591
  # This would need proper parsing
1144
1592
  result = "pl.when(...).then(...).otherwise(...)"
1145
1593
 
1146
1594
  return result
1147
1595
 
1148
- def add_return_code(self, lines: List[str]) -> None:
1596
+ def add_return_code(self, lines: list[str]) -> None:
1149
1597
  if self.output_nodes:
1150
1598
  # Return marked output nodes
1151
1599
  if len(self.output_nodes) == 1:
@@ -1172,11 +1620,22 @@ class FlowGraphToPolarsConverter:
1172
1620
  lines.append("")
1173
1621
  lines.append("")
1174
1622
 
1623
+ # Add custom node class definitions if any
1624
+ if self.custom_node_classes:
1625
+ lines.append("# Custom Node Class Definitions")
1626
+ lines.append("# These classes are user-defined nodes that were included in the flow")
1627
+ lines.append("")
1628
+ for class_name, source_code in self.custom_node_classes.items():
1629
+ for source_line in source_code.split("\n"):
1630
+ lines.append(source_line)
1631
+ lines.append("")
1632
+ lines.append("")
1633
+
1175
1634
  # Add main function
1176
1635
  lines.append("def run_etl_pipeline():")
1177
1636
  lines.append(' """')
1178
- lines.append(f' ETL Pipeline: {self.flow_graph.__name__}')
1179
- lines.append(' Generated from Flowfile')
1637
+ lines.append(f" ETL Pipeline: {self.flow_graph.__name__}")
1638
+ lines.append(" Generated from Flowfile")
1180
1639
  lines.append(' """')
1181
1640
  lines.append(" ")
1182
1641