Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +179 -73
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +52 -59
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
  36. flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
  37. flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
  79. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
  140. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
  143. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
  149. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
  154. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
  155. flowfile_core/__init__.py +13 -3
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +8 -6
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +123 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/code_generator.py +391 -279
  177. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  178. flowfile_core/flowfile/connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  180. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  181. flowfile_core/flowfile/extensions.py +17 -12
  182. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  183. flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
  184. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  190. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
  191. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  192. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  193. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
  194. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  195. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  196. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  197. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
  201. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  202. flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
  203. flowfile_core/flowfile/flow_graph.py +1011 -561
  204. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  205. flowfile_core/flowfile/flow_node/flow_node.py +332 -232
  206. flowfile_core/flowfile/flow_node/models.py +54 -41
  207. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  208. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  209. flowfile_core/flowfile/handler.py +82 -32
  210. flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
  211. flowfile_core/flowfile/manage/io_flowfile.py +391 -0
  212. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  213. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  214. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  215. flowfile_core/flowfile/node_designer/ui_components.py +136 -35
  216. flowfile_core/flowfile/schema_callbacks.py +77 -54
  217. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  218. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  219. flowfile_core/flowfile/setting_generator/settings.py +72 -55
  220. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  221. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  223. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  224. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  225. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  226. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  227. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  228. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  229. flowfile_core/flowfile/util/node_skipper.py +4 -4
  230. flowfile_core/flowfile/utils.py +19 -21
  231. flowfile_core/main.py +26 -19
  232. flowfile_core/routes/auth.py +284 -11
  233. flowfile_core/routes/cloud_connections.py +25 -25
  234. flowfile_core/routes/logs.py +21 -29
  235. flowfile_core/routes/public.py +3 -3
  236. flowfile_core/routes/routes.py +77 -43
  237. flowfile_core/routes/secrets.py +25 -27
  238. flowfile_core/routes/user_defined_components.py +483 -4
  239. flowfile_core/run_lock.py +0 -1
  240. flowfile_core/schemas/__init__.py +4 -6
  241. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  242. flowfile_core/schemas/cloud_storage_schemas.py +59 -55
  243. flowfile_core/schemas/input_schema.py +398 -154
  244. flowfile_core/schemas/output_model.py +50 -35
  245. flowfile_core/schemas/schemas.py +207 -67
  246. flowfile_core/schemas/transform_schema.py +1360 -435
  247. flowfile_core/schemas/yaml_types.py +117 -0
  248. flowfile_core/secret_manager/secret_manager.py +17 -13
  249. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
  250. flowfile_core/utils/arrow_reader.py +7 -6
  251. flowfile_core/utils/excel_file_manager.py +3 -3
  252. flowfile_core/utils/fileManager.py +7 -7
  253. flowfile_core/utils/fl_executor.py +8 -10
  254. flowfile_core/utils/utils.py +4 -4
  255. flowfile_core/utils/validate_setup.py +5 -4
  256. flowfile_frame/__init__.py +107 -50
  257. flowfile_frame/adapters.py +2 -9
  258. flowfile_frame/adding_expr.py +73 -32
  259. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  260. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  261. flowfile_frame/config.py +2 -5
  262. flowfile_frame/expr.py +311 -218
  263. flowfile_frame/expr.pyi +160 -159
  264. flowfile_frame/expr_name.py +23 -23
  265. flowfile_frame/flow_frame.py +581 -489
  266. flowfile_frame/flow_frame.pyi +123 -104
  267. flowfile_frame/flow_frame_methods.py +236 -252
  268. flowfile_frame/group_frame.py +50 -20
  269. flowfile_frame/join.py +2 -2
  270. flowfile_frame/lazy.py +129 -87
  271. flowfile_frame/lazy_methods.py +83 -30
  272. flowfile_frame/list_name_space.py +55 -50
  273. flowfile_frame/selectors.py +148 -68
  274. flowfile_frame/series.py +9 -7
  275. flowfile_frame/utils.py +19 -21
  276. flowfile_worker/__init__.py +12 -4
  277. flowfile_worker/configs.py +11 -19
  278. flowfile_worker/create/__init__.py +14 -27
  279. flowfile_worker/create/funcs.py +143 -94
  280. flowfile_worker/create/models.py +139 -68
  281. flowfile_worker/create/pl_types.py +14 -15
  282. flowfile_worker/create/read_excel_tables.py +34 -41
  283. flowfile_worker/create/utils.py +22 -19
  284. flowfile_worker/external_sources/s3_source/main.py +18 -51
  285. flowfile_worker/external_sources/s3_source/models.py +34 -27
  286. flowfile_worker/external_sources/sql_source/main.py +8 -5
  287. flowfile_worker/external_sources/sql_source/models.py +13 -9
  288. flowfile_worker/flow_logger.py +10 -8
  289. flowfile_worker/funcs.py +214 -155
  290. flowfile_worker/main.py +11 -17
  291. flowfile_worker/models.py +35 -28
  292. flowfile_worker/process_manager.py +2 -3
  293. flowfile_worker/routes.py +121 -93
  294. flowfile_worker/secrets.py +9 -6
  295. flowfile_worker/spawner.py +80 -49
  296. flowfile_worker/utils.py +3 -2
  297. shared/__init__.py +2 -7
  298. shared/storage_config.py +25 -13
  299. test_utils/postgres/commands.py +3 -2
  300. test_utils/postgres/fixtures.py +9 -9
  301. test_utils/s3/commands.py +1 -1
  302. test_utils/s3/data_generator.py +3 -4
  303. test_utils/s3/demo_data_generator.py +4 -7
  304. test_utils/s3/fixtures.py +7 -5
  305. tools/migrate/README.md +56 -0
  306. tools/migrate/__init__.py +12 -0
  307. tools/migrate/__main__.py +118 -0
  308. tools/migrate/legacy_schemas.py +682 -0
  309. tools/migrate/migrate.py +610 -0
  310. tools/migrate/tests/__init__.py +0 -0
  311. tools/migrate/tests/conftest.py +21 -0
  312. tools/migrate/tests/test_migrate.py +622 -0
  313. tools/migrate/tests/test_migration_e2e.py +1009 -0
  314. tools/migrate/tests/test_node_migrations.py +843 -0
  315. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  316. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  317. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  318. flowfile/web/static/assets/Filter-812dcbca.js +0 -164
  319. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  320. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  321. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  322. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  323. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  324. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  325. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  326. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  327. flowfile/web/static/assets/secretApi-538058f3.js +0 -46
  328. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  329. flowfile-0.4.1.dist-info/RECORD +0 -376
  330. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  331. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
  332. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,61 +1,103 @@
1
1
  import datetime
2
- import pickle
3
-
2
+ import json
4
3
  import os
5
-
6
- import polars as pl
4
+ from collections.abc import Callable
5
+ from copy import deepcopy
6
+ from functools import partial
7
+ from importlib.metadata import PackageNotFoundError, version
8
+ from pathlib import Path
9
+ from time import time
10
+ from typing import Any, Literal, Union
11
+ from uuid import uuid1
7
12
 
8
13
  import fastexcel
14
+ import polars as pl
15
+ import yaml
9
16
  from fastapi.exceptions import HTTPException
10
- from time import time
11
- from functools import partial
12
- from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
13
- from uuid import uuid1
14
- from copy import deepcopy
15
17
  from pyarrow.parquet import ParquetFile
18
+
16
19
  from flowfile_core.configs import logger
17
20
  from flowfile_core.configs.flow_logger import FlowLogger
18
- from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
19
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
20
-
21
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
22
+ from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
23
+ from flowfile_core.flowfile.database_connection_manager.db_connections import (
24
+ get_local_cloud_connection,
25
+ get_local_database_connection,
26
+ )
21
27
  from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
22
- from flowfile_core.utils.arrow_reader import get_read_top_n
23
28
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
24
- from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (get_open_xlsx_datatypes,
25
- get_calamine_xlsx_data_types)
26
-
27
- from flowfile_core.flowfile.schema_callbacks import (calculate_fuzzy_match_schema, pre_calculate_pivot_schema)
29
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
30
+ from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
31
+ from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (
32
+ get_calamine_xlsx_data_types,
33
+ get_open_xlsx_datatypes,
34
+ )
35
+ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
36
+ ExternalCloudWriter,
37
+ ExternalDatabaseFetcher,
38
+ ExternalDatabaseWriter,
39
+ ExternalDfFetcher,
40
+ )
41
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
42
+ from flowfile_core.flowfile.graph_tree.graph_tree import (
43
+ add_un_drawn_nodes,
44
+ build_flow_paths,
45
+ build_node_info,
46
+ calculate_depth,
47
+ define_node_connections,
48
+ draw_merged_paths,
49
+ draw_standalone_paths,
50
+ group_nodes_by_depth,
51
+ )
52
+ from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
53
+ from flowfile_core.flowfile.schema_callbacks import calculate_fuzzy_match_schema, pre_calculate_pivot_schema
28
54
  from flowfile_core.flowfile.sources import external_sources
55
+ from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
56
+ from flowfile_core.flowfile.sources.external_sources.sql_source import models as sql_models
57
+ from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils
58
+ from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import BaseSqlSource, SqlSource
59
+ from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
60
+ from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
61
+ from flowfile_core.flowfile.utils import snake_case_to_camel_case
29
62
  from flowfile_core.schemas import input_schema, schemas, transform_schema
63
+ from flowfile_core.schemas.cloud_storage_schemas import (
64
+ AuthMethod,
65
+ CloudStorageReadSettingsInternal,
66
+ CloudStorageWriteSettingsInternal,
67
+ FullCloudStorageConnection,
68
+ get_cloud_storage_write_settings_worker_interface,
69
+ )
30
70
  from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
31
- from flowfile_core.schemas.cloud_storage_schemas import (CloudStorageReadSettingsInternal,
32
- CloudStorageWriteSettingsInternal,
33
- FullCloudStorageConnection,
34
- get_cloud_storage_write_settings_worker_interface, AuthMethod)
35
- from flowfile_core.flowfile.utils import snake_case_to_camel_case
36
- from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
37
- from flowfile_core.flowfile.flow_node.flow_node import FlowNode
38
- from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
39
- from flowfile_core.flowfile.graph_tree.graph_tree import (add_un_drawn_nodes, build_flow_paths,
40
- build_node_info, calculate_depth,
41
- define_node_connections, draw_merged_paths,
42
- draw_standalone_paths, group_nodes_by_depth)
43
- from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
44
- from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalDatabaseFetcher,
45
- ExternalDatabaseWriter,
46
- ExternalDfFetcher,
47
- ExternalCloudWriter)
48
- from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
49
- from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
50
- from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
51
- from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
52
- get_local_cloud_connection)
53
- from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
54
- from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
71
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
72
+ from flowfile_core.secret_manager.secret_manager import decrypt_secret, get_encrypted_secret
73
+ from flowfile_core.utils.arrow_reader import get_read_top_n
74
+
75
+ try:
76
+ __version__ = version("Flowfile")
77
+ except PackageNotFoundError:
78
+ __version__ = "0.5.0"
79
+
80
+
81
+ def represent_list_json(dumper, data):
82
+ """Use inline style for short simple lists, block style for complex ones."""
83
+ if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
84
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
85
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
86
+
87
+
88
+ yaml.add_representer(list, represent_list_json)
55
89
 
56
90
 
57
- def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
58
- end_row: int, end_column: int, has_headers: bool):
91
+ def get_xlsx_schema(
92
+ engine: str,
93
+ file_path: str,
94
+ sheet_name: str,
95
+ start_row: int,
96
+ start_column: int,
97
+ end_row: int,
98
+ end_column: int,
99
+ has_headers: bool,
100
+ ):
59
101
  """Calculates the schema of an XLSX file by reading a sample of rows.
60
102
 
61
103
  Args:
@@ -72,27 +114,29 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
72
114
  A list of FlowfileColumn objects representing the schema.
73
115
  """
74
116
  try:
75
- logger.info('Starting to calculate the schema')
76
- if engine == 'openpyxl':
117
+ logger.info("Starting to calculate the schema")
118
+ if engine == "openpyxl":
77
119
  max_col = end_column if end_column > 0 else None
78
- return get_open_xlsx_datatypes(file_path=file_path,
79
- sheet_name=sheet_name,
80
- min_row=start_row + 1,
81
- min_col=start_column + 1,
82
- max_row=100,
83
- max_col=max_col, has_headers=has_headers)
84
- elif engine == 'calamine':
85
- return get_calamine_xlsx_data_types(file_path=file_path,
86
- sheet_name=sheet_name,
87
- start_row=start_row,
88
- end_row=end_row)
89
- logger.info('done calculating the schema')
120
+ return get_open_xlsx_datatypes(
121
+ file_path=file_path,
122
+ sheet_name=sheet_name,
123
+ min_row=start_row + 1,
124
+ min_col=start_column + 1,
125
+ max_row=100,
126
+ max_col=max_col,
127
+ has_headers=has_headers,
128
+ )
129
+ elif engine == "calamine":
130
+ return get_calamine_xlsx_data_types(
131
+ file_path=file_path, sheet_name=sheet_name, start_row=start_row, end_row=end_row
132
+ )
133
+ logger.info("done calculating the schema")
90
134
  except Exception as e:
91
135
  logger.error(e)
92
136
  return []
93
137
 
94
138
 
95
- def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
139
+ def skip_node_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
96
140
  """Logs a warning message listing all nodes that will be skipped during execution.
97
141
 
98
142
  Args:
@@ -101,10 +145,10 @@ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
101
145
  """
102
146
  if len(nodes) > 0:
103
147
  msg = "\n".join(str(node) for node in nodes)
104
- flow_logger.warning(f'skipping nodes:\n{msg}')
148
+ flow_logger.warning(f"skipping nodes:\n{msg}")
105
149
 
106
150
 
107
- def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
151
+ def execution_order_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
108
152
  """Logs an informational message showing the determined execution order of nodes.
109
153
 
110
154
  Args:
@@ -112,11 +156,19 @@ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> N
112
156
  nodes: A list of FlowNode objects in the order they will be executed.
113
157
  """
114
158
  msg = "\n".join(str(node) for node in nodes)
115
- flow_logger.info(f'execution order:\n{msg}')
116
-
117
-
118
- def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
119
- end_row: int, end_column: int, has_headers: bool):
159
+ flow_logger.info(f"execution order:\n{msg}")
160
+
161
+
162
+ def get_xlsx_schema_callback(
163
+ engine: str,
164
+ file_path: str,
165
+ sheet_name: str,
166
+ start_row: int,
167
+ start_column: int,
168
+ end_row: int,
169
+ end_column: int,
170
+ has_headers: bool,
171
+ ):
120
172
  """Creates a partially applied function for lazy calculation of an XLSX schema.
121
173
 
122
174
  Args:
@@ -132,12 +184,22 @@ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start
132
184
  Returns:
133
185
  A callable function that, when called, will execute `get_xlsx_schema`.
134
186
  """
135
- return partial(get_xlsx_schema, engine=engine, file_path=file_path, sheet_name=sheet_name, start_row=start_row,
136
- start_column=start_column, end_row=end_row, end_column=end_column, has_headers=has_headers)
187
+ return partial(
188
+ get_xlsx_schema,
189
+ engine=engine,
190
+ file_path=file_path,
191
+ sheet_name=sheet_name,
192
+ start_row=start_row,
193
+ start_column=start_column,
194
+ end_row=end_row,
195
+ end_column=end_column,
196
+ has_headers=has_headers,
197
+ )
137
198
 
138
199
 
139
- def get_cloud_connection_settings(connection_name: str,
140
- user_id: int, auth_mode: AuthMethod) -> FullCloudStorageConnection:
200
+ def get_cloud_connection_settings(
201
+ connection_name: str, user_id: int, auth_mode: AuthMethod
202
+ ) -> FullCloudStorageConnection:
141
203
  """Retrieves cloud storage connection settings, falling back to environment variables if needed.
142
204
 
143
205
  Args:
@@ -152,7 +214,7 @@ def get_cloud_connection_settings(connection_name: str,
152
214
  HTTPException: If the connection settings cannot be found.
153
215
  """
154
216
  cloud_connection_settings = get_local_cloud_connection(connection_name, user_id)
155
- if cloud_connection_settings is None and auth_mode in ("env_vars", "auto"):
217
+ if cloud_connection_settings is None and auth_mode in ("env_vars", transform_schema.AUTO_DATA_TYPE):
156
218
  # If the auth mode is aws-cli, we do not need connection settings
157
219
  cloud_connection_settings = FullCloudStorageConnection(storage_type="s3", auth_method="env_vars")
158
220
  elif cloud_connection_settings is None and auth_mode == "aws-cli":
@@ -167,32 +229,44 @@ class FlowGraph:
167
229
 
168
230
  It manages nodes, connections, and the execution of the entire flow.
169
231
  """
232
+
170
233
  uuid: str
171
- depends_on: Dict[int, Union[ParquetFile, FlowDataEngine, "FlowGraph", pl.DataFrame,]]
234
+ depends_on: dict[
235
+ int,
236
+ Union[
237
+ ParquetFile,
238
+ FlowDataEngine,
239
+ "FlowGraph",
240
+ pl.DataFrame,
241
+ ],
242
+ ]
172
243
  _flow_id: int
173
244
  _input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
174
- _input_cols: List[str]
175
- _output_cols: List[str]
176
- _node_db: Dict[Union[str, int], FlowNode]
177
- _node_ids: List[Union[str, int]]
178
- _results: Optional[FlowDataEngine] = None
245
+ _input_cols: list[str]
246
+ _output_cols: list[str]
247
+ _node_db: dict[str | int, FlowNode]
248
+ _node_ids: list[str | int]
249
+ _results: FlowDataEngine | None = None
179
250
  cache_results: bool = False
180
- schema: Optional[List[FlowfileColumn]] = None
251
+ schema: list[FlowfileColumn] | None = None
181
252
  has_over_row_function: bool = False
182
- _flow_starts: List[Union[int, str]] = None
183
- latest_run_info: Optional[RunInformation] = None
253
+ _flow_starts: list[int | str] = None
254
+ latest_run_info: RunInformation | None = None
184
255
  start_datetime: datetime = None
185
256
  end_datetime: datetime = None
186
257
  _flow_settings: schemas.FlowSettings = None
187
258
  flow_logger: FlowLogger
188
259
 
189
- def __init__(self,
190
- flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
191
- name: str = None, input_cols: List[str] = None,
192
- output_cols: List[str] = None,
193
- path_ref: str = None,
194
- input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
195
- cache_results: bool = False):
260
+ def __init__(
261
+ self,
262
+ flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
263
+ name: str = None,
264
+ input_cols: list[str] = None,
265
+ output_cols: list[str] = None,
266
+ path_ref: str = None,
267
+ input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
268
+ cache_results: bool = False,
269
+ ):
196
270
  """Initializes a new FlowGraph instance.
197
271
 
198
272
  Args:
@@ -214,7 +288,7 @@ class FlowGraph:
214
288
  self.latest_run_info = None
215
289
  self._flow_id = flow_settings.flow_id
216
290
  self.flow_logger = FlowLogger(flow_settings.flow_id)
217
- self._flow_starts: List[FlowNode] = []
291
+ self._flow_starts: list[FlowNode] = []
218
292
  self._results = None
219
293
  self.schema = None
220
294
  self.has_over_row_function = False
@@ -223,7 +297,7 @@ class FlowGraph:
223
297
  self._node_ids = []
224
298
  self._node_db = {}
225
299
  self.cache_results = cache_results
226
- self.__name__ = name if name else id(self)
300
+ self.__name__ = name if name else "flow_" + str(id(self))
227
301
  self.depends_on = {}
228
302
  if path_ref is not None:
229
303
  self.add_datasource(input_schema.NodeDatasource(file_path=path_ref))
@@ -236,13 +310,21 @@ class FlowGraph:
236
310
 
237
311
  @flow_settings.setter
238
312
  def flow_settings(self, flow_settings: schemas.FlowSettings):
239
- if (
240
- (self._flow_settings.execution_location != flow_settings.execution_location) or
241
- (self._flow_settings.execution_mode != flow_settings.execution_mode)
313
+ if (self._flow_settings.execution_location != flow_settings.execution_location) or (
314
+ self._flow_settings.execution_mode != flow_settings.execution_mode
242
315
  ):
243
316
  self.reset()
244
317
  self._flow_settings = flow_settings
245
318
 
319
+ def add_node_to_starting_list(self, node: FlowNode) -> None:
320
+ """Adds a node to the list of starting nodes for the flow if not already present.
321
+
322
+ Args:
323
+ node: The FlowNode to add as a starting node.
324
+ """
325
+ if node.node_id not in {self_node.node_id for self_node in self._flow_starts}:
326
+ self._flow_starts.append(node)
327
+
246
328
  def add_node_promise(self, node_promise: input_schema.NodePromise):
247
329
  """Adds a placeholder node to the graph that is not yet fully configured.
248
330
 
@@ -251,13 +333,31 @@ class FlowGraph:
251
333
  Args:
252
334
  node_promise: A promise object containing basic node information.
253
335
  """
336
+
254
337
  def placeholder(n: FlowNode = None):
255
338
  if n is None:
256
339
  return FlowDataEngine()
257
340
  return n
258
341
 
259
- self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=placeholder,
260
- setting_input=node_promise)
342
+ self.add_node_step(
343
+ node_id=node_promise.node_id,
344
+ node_type=node_promise.node_type,
345
+ function=placeholder,
346
+ setting_input=node_promise,
347
+ )
348
+ if node_promise.is_user_defined:
349
+ node_needs_settings: bool
350
+ custom_node = CUSTOM_NODE_STORE.get(node_promise.node_type)
351
+ if custom_node is None:
352
+ raise Exception(f"Custom node type '{node_promise.node_type}' not found in registry.")
353
+ settings_schema = custom_node.model_fields["settings_schema"].default
354
+ node_needs_settings = settings_schema is not None and not settings_schema.is_empty()
355
+ if not node_needs_settings:
356
+ user_defined_node_settings = input_schema.UserDefinedNode(settings={}, **node_promise.model_dump())
357
+ initialized_model = custom_node()
358
+ self.add_user_defined_node(
359
+ custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings
360
+ )
261
361
 
262
362
  def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
263
363
  """Calculates and applies a layered layout to all nodes in the graph.
@@ -285,20 +385,24 @@ class FlowGraph:
285
385
  updated_count = 0
286
386
  for node_id, (pos_x, pos_y) in new_positions.items():
287
387
  node = self.get_node(node_id)
288
- if node and hasattr(node, 'setting_input'):
388
+ if node and hasattr(node, "setting_input"):
289
389
  setting = node.setting_input
290
- if hasattr(setting, 'pos_x') and hasattr(setting, 'pos_y'):
390
+ if hasattr(setting, "pos_x") and hasattr(setting, "pos_y"):
291
391
  setting.pos_x = pos_x
292
392
  setting.pos_y = pos_y
293
393
  updated_count += 1
294
394
  else:
295
- self.flow_logger.warning(f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes.")
395
+ self.flow_logger.warning(
396
+ f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes."
397
+ )
296
398
  elif node:
297
399
  self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
298
400
  # else: Node not found, already warned by calculate_layered_layout
299
401
 
300
402
  end_time = time()
301
- self.flow_logger.info(f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds.")
403
+ self.flow_logger.info(
404
+ f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds."
405
+ )
302
406
 
303
407
  except Exception as e:
304
408
  self.flow_logger.error(f"Error applying layout: {e}")
@@ -318,13 +422,13 @@ class FlowGraph:
318
422
  """
319
423
  self._flow_id = new_id
320
424
  for node in self.nodes:
321
- if hasattr(node.setting_input, 'flow_id'):
425
+ if hasattr(node.setting_input, "flow_id"):
322
426
  node.setting_input.flow_id = new_id
323
427
  self.flow_settings.flow_id = new_id
324
428
 
325
429
  def __repr__(self):
326
430
  """Provides the official string representation of the FlowGraph instance."""
327
- settings_str = " -" + '\n -'.join(f"{k}: {v}" for k, v in self.flow_settings)
431
+ settings_str = " -" + "\n -".join(f"{k}: {v}" for k, v in self.flow_settings)
328
432
  return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
329
433
 
330
434
  def print_tree(self):
@@ -342,7 +446,7 @@ class FlowGraph:
342
446
 
343
447
  # Group nodes by depth
344
448
  depth_groups, max_depth = group_nodes_by_depth(node_info)
345
-
449
+
346
450
  # Sort nodes within each depth group
347
451
  for depth in depth_groups:
348
452
  depth_groups[depth].sort()
@@ -352,7 +456,7 @@ class FlowGraph:
352
456
 
353
457
  # Track which nodes connect to what
354
458
  merge_points = define_node_connections(node_info)
355
-
459
+
356
460
  # Build the flow paths
357
461
 
358
462
  # Find the maximum label length for each depth level
@@ -361,15 +465,15 @@ class FlowGraph:
361
465
  if depth in depth_groups:
362
466
  max_len = max(len(node_info[nid].label) for nid in depth_groups[depth])
363
467
  max_label_length[depth] = max_len
364
-
468
+
365
469
  # Draw the paths
366
470
  drawn_nodes = set()
367
471
  merge_drawn = set()
368
-
472
+
369
473
  # Group paths by their merge points
370
474
  paths_by_merge = {}
371
475
  standalone_paths = []
372
-
476
+
373
477
  # Build flow paths
374
478
  paths = build_flow_paths(node_info, self._flow_starts, merge_points)
375
479
 
@@ -391,22 +495,22 @@ class FlowGraph:
391
495
 
392
496
  # Add undrawn nodes
393
497
  add_un_drawn_nodes(drawn_nodes, node_info, lines)
394
-
498
+
395
499
  try:
396
500
  skip_nodes, ordered_nodes = compute_execution_plan(
397
- nodes=self.nodes,
398
- flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
501
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
502
+ )
399
503
  if ordered_nodes:
400
504
  for i, node in enumerate(ordered_nodes, 1):
401
505
  lines.append(f" {i:3d}. {node_info[node.node_id].label}")
402
506
  except Exception as e:
403
507
  lines.append(f" Could not determine execution order: {e}")
404
-
508
+
405
509
  # Print everything
406
510
  output = "\n".join(lines)
407
-
511
+
408
512
  print(output)
409
-
513
+
410
514
  def get_nodes_overview(self):
411
515
  """Gets a list of dictionary representations for all nodes in the graph."""
412
516
  output = []
@@ -414,7 +518,7 @@ class FlowGraph:
414
518
  output.append(v.get_repr())
415
519
  return output
416
520
 
417
- def remove_from_output_cols(self, columns: List[str]):
521
+ def remove_from_output_cols(self, columns: list[str]):
418
522
  """Removes specified columns from the list of expected output columns.
419
523
 
420
524
  Args:
@@ -423,7 +527,7 @@ class FlowGraph:
423
527
  cols = set(columns)
424
528
  self._output_cols = [c for c in self._output_cols if c not in cols]
425
529
 
426
- def get_node(self, node_id: Union[int, str] = None) -> FlowNode | None:
530
+ def get_node(self, node_id: int | str = None) -> FlowNode | None:
427
531
  """Retrieves a node from the graph by its ID.
428
532
 
429
533
  Args:
@@ -437,24 +541,43 @@ class FlowGraph:
437
541
  node = self._node_db.get(node_id)
438
542
  if node is not None:
439
543
  return node
440
-
441
- def add_user_defined_node(self, *,
442
- custom_node: CustomNodeBase,
443
- user_defined_node_settings: input_schema.UserDefinedNode
444
- ):
445
-
446
- def _func(*fdes: FlowDataEngine) -> FlowDataEngine | None:
447
- output = custom_node.process(*(fde.data_frame for fde in fdes))
448
- if isinstance(output, pl.LazyFrame | pl.DataFrame):
544
+
545
+ def add_user_defined_node(
546
+ self, *, custom_node: CustomNodeBase, user_defined_node_settings: input_schema.UserDefinedNode
547
+ ):
548
+ """Adds a user-defined custom node to the graph.
549
+
550
+ Args:
551
+ custom_node: The custom node instance to add.
552
+ user_defined_node_settings: The settings for the user-defined node.
553
+ """
554
+
555
+ def _func(*flow_data_engine: FlowDataEngine) -> FlowDataEngine | None:
556
+ user_id = user_defined_node_settings.user_id
557
+ if user_id is not None:
558
+ custom_node.set_execution_context(user_id)
559
+ if custom_node.settings_schema:
560
+ custom_node.settings_schema.set_secret_context(user_id, custom_node.accessed_secrets)
561
+
562
+ output = custom_node.process(*(fde.data_frame for fde in flow_data_engine))
563
+
564
+ accessed_secrets = custom_node.get_accessed_secrets()
565
+ if accessed_secrets:
566
+ logger.info(f"Node '{user_defined_node_settings.node_id}' accessed secrets: {accessed_secrets}")
567
+ if isinstance(output, (pl.LazyFrame, pl.DataFrame)):
449
568
  return FlowDataEngine(output)
450
569
  return None
451
-
452
- self.add_node_step(node_id=user_defined_node_settings.node_id,
453
- function=_func,
454
- setting_input=user_defined_node_settings,
455
- input_node_ids=user_defined_node_settings.depending_on_ids,
456
- node_type=custom_node.item,
457
- )
570
+
571
+ self.add_node_step(
572
+ node_id=user_defined_node_settings.node_id,
573
+ function=_func,
574
+ setting_input=user_defined_node_settings,
575
+ input_node_ids=user_defined_node_settings.depending_on_ids,
576
+ node_type=custom_node.item,
577
+ )
578
+ if custom_node.number_of_inputs == 0:
579
+ node = self.get_node(user_defined_node_settings.node_id)
580
+ self.add_node_to_starting_list(node)
458
581
 
459
582
  def add_pivot(self, pivot_settings: input_schema.NodePivot):
460
583
  """Adds a pivot node to the graph.
@@ -466,11 +589,13 @@ class FlowGraph:
466
589
  def _func(fl: FlowDataEngine):
467
590
  return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
468
591
 
469
- self.add_node_step(node_id=pivot_settings.node_id,
470
- function=_func,
471
- node_type='pivot',
472
- setting_input=pivot_settings,
473
- input_node_ids=[pivot_settings.depending_on_id])
592
+ self.add_node_step(
593
+ node_id=pivot_settings.node_id,
594
+ function=_func,
595
+ node_type="pivot",
596
+ setting_input=pivot_settings,
597
+ input_node_ids=[pivot_settings.depending_on_id],
598
+ )
474
599
 
475
600
  node = self.get_node(pivot_settings.node_id)
476
601
 
@@ -479,6 +604,7 @@ class FlowGraph:
479
604
  input_data.lazy = True # ensure the dataset is lazy
480
605
  input_lf = input_data.data_frame # get the lazy frame
481
606
  return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
607
+
482
608
  node.schema_callback = schema_callback
483
609
 
484
610
  def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
@@ -491,11 +617,13 @@ class FlowGraph:
491
617
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
492
618
  return fl.unpivot(unpivot_settings.unpivot_input)
493
619
 
494
- self.add_node_step(node_id=unpivot_settings.node_id,
495
- function=_func,
496
- node_type='unpivot',
497
- setting_input=unpivot_settings,
498
- input_node_ids=[unpivot_settings.depending_on_id])
620
+ self.add_node_step(
621
+ node_id=unpivot_settings.node_id,
622
+ function=_func,
623
+ node_type="unpivot",
624
+ setting_input=unpivot_settings,
625
+ input_node_ids=[unpivot_settings.depending_on_id],
626
+ )
499
627
 
500
628
  def add_union(self, union_settings: input_schema.NodeUnion):
501
629
  """Adds a union node to combine multiple data streams.
@@ -505,14 +633,16 @@ class FlowGraph:
505
633
  """
506
634
 
507
635
  def _func(*flowfile_tables: FlowDataEngine):
508
- dfs: List[pl.LazyFrame] | List[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
509
- return FlowDataEngine(pl.concat(dfs, how='diagonal_relaxed'))
636
+ dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
637
+ return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
510
638
 
511
- self.add_node_step(node_id=union_settings.node_id,
512
- function=_func,
513
- node_type=f'union',
514
- setting_input=union_settings,
515
- input_node_ids=union_settings.depending_on_ids)
639
+ self.add_node_step(
640
+ node_id=union_settings.node_id,
641
+ function=_func,
642
+ node_type="union",
643
+ setting_input=union_settings,
644
+ input_node_ids=union_settings.depending_on_ids,
645
+ )
516
646
 
517
647
  def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
518
648
  """Adds a data exploration/analysis node based on a node promise.
@@ -540,13 +670,14 @@ class FlowGraph:
540
670
  flowfile_table = flowfile_table.get_sample(sample_size, random=True)
541
671
  external_sampler = ExternalDfFetcher(
542
672
  lf=flowfile_table.data_frame,
543
- file_ref="__gf_walker"+node.hash,
673
+ file_ref="__gf_walker" + node.hash,
544
674
  wait_on_completion=True,
545
675
  node_id=node.node_id,
546
676
  flow_id=self.flow_id,
547
677
  )
548
- node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref,
549
- n=min(sample_size, number_of_records))
678
+ node.results.analysis_data_generator = get_read_top_n(
679
+ external_sampler.status.file_ref, n=min(sample_size, number_of_records)
680
+ )
550
681
  return flowfile_table
551
682
 
552
683
  def schema_callback():
@@ -555,11 +686,15 @@ class FlowGraph:
555
686
  input_node = node.all_inputs[0]
556
687
  return input_node.schema
557
688
  else:
558
- return [FlowfileColumn.from_input('col_1', 'na')]
689
+ return [FlowfileColumn.from_input("col_1", "na")]
559
690
 
560
- self.add_node_step(node_id=node_analysis.node_id, node_type='explore_data',
561
- function=analysis_preparation,
562
- setting_input=node_analysis, schema_callback=schema_callback)
691
+ self.add_node_step(
692
+ node_id=node_analysis.node_id,
693
+ node_type="explore_data",
694
+ function=analysis_preparation,
695
+ setting_input=node_analysis,
696
+ schema_callback=schema_callback,
697
+ )
563
698
  node = self.get_node(node_analysis.node_id)
564
699
 
565
700
  def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
@@ -572,19 +707,20 @@ class FlowGraph:
572
707
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
573
708
  return fl.do_group_by(group_by_settings.groupby_input, False)
574
709
 
575
- self.add_node_step(node_id=group_by_settings.node_id,
576
- function=_func,
577
- node_type=f'group_by',
578
- setting_input=group_by_settings,
579
- input_node_ids=[group_by_settings.depending_on_id])
710
+ self.add_node_step(
711
+ node_id=group_by_settings.node_id,
712
+ function=_func,
713
+ node_type="group_by",
714
+ setting_input=group_by_settings,
715
+ input_node_ids=[group_by_settings.depending_on_id],
716
+ )
580
717
 
581
718
  node = self.get_node(group_by_settings.node_id)
582
719
 
583
720
  def schema_callback():
584
-
585
721
  output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
586
722
  depends_on = node.node_inputs.main_inputs[0]
587
- input_schema_dict: Dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
723
+ input_schema_dict: dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
588
724
  output_schema = []
589
725
  for old_name, new_name, data_type in output_columns:
590
726
  data_type = input_schema_dict[old_name] if data_type is None else data_type
@@ -599,38 +735,148 @@ class FlowGraph:
599
735
  Args:
600
736
  filter_settings: The settings for the filter operation.
601
737
  """
738
+ from flowfile_core.schemas.transform_schema import FilterOperator
739
+
740
+ def _build_basic_filter_expression(
741
+ basic_filter: transform_schema.BasicFilter, field_data_type: str | None = None
742
+ ) -> str:
743
+ """Build a filter expression string from a BasicFilter object.
744
+
745
+ Uses the Flowfile expression language that is compatible with polars_expr_transformer.
746
+
747
+ Args:
748
+ basic_filter: The basic filter configuration.
749
+ field_data_type: The data type of the field (optional, for smart quoting).
750
+
751
+ Returns:
752
+ A filter expression string compatible with polars_expr_transformer.
753
+ """
754
+ field = f"[{basic_filter.field}]"
755
+ value = basic_filter.value
756
+ value2 = basic_filter.value2
757
+
758
+ is_numeric_value = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
759
+ should_quote = field_data_type == "str" or not is_numeric_value
760
+
761
+ try:
762
+ operator = basic_filter.get_operator()
763
+ except (ValueError, AttributeError):
764
+ operator = FilterOperator.from_symbol(str(basic_filter.operator))
765
+
766
+ if operator == FilterOperator.EQUALS:
767
+ if should_quote:
768
+ return f'{field}="{value}"'
769
+ return f"{field}={value}"
770
+
771
+ elif operator == FilterOperator.NOT_EQUALS:
772
+ if should_quote:
773
+ return f'{field}!="{value}"'
774
+ return f"{field}!={value}"
775
+
776
+ elif operator == FilterOperator.GREATER_THAN:
777
+ if should_quote:
778
+ return f'{field}>"{value}"'
779
+ return f"{field}>{value}"
780
+
781
+ elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
782
+ if should_quote:
783
+ return f'{field}>="{value}"'
784
+ return f"{field}>={value}"
785
+
786
+ elif operator == FilterOperator.LESS_THAN:
787
+ if should_quote:
788
+ return f'{field}<"{value}"'
789
+ return f"{field}<{value}"
790
+
791
+ elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
792
+ if should_quote:
793
+ return f'{field}<="{value}"'
794
+ return f"{field}<={value}"
795
+
796
+ elif operator == FilterOperator.CONTAINS:
797
+ return f'contains({field}, "{value}")'
798
+
799
+ elif operator == FilterOperator.NOT_CONTAINS:
800
+ return f'contains({field}, "{value}") = false'
801
+
802
+ elif operator == FilterOperator.STARTS_WITH:
803
+ return f'left({field}, {len(value)}) = "{value}"'
804
+
805
+ elif operator == FilterOperator.ENDS_WITH:
806
+ return f'right({field}, {len(value)}) = "{value}"'
807
+
808
+ elif operator == FilterOperator.IS_NULL:
809
+ return f"is_empty({field})"
810
+
811
+ elif operator == FilterOperator.IS_NOT_NULL:
812
+ return f"is_not_empty({field})"
813
+
814
+ elif operator == FilterOperator.IN:
815
+ values = [v.strip() for v in value.split(",")]
816
+ if len(values) == 1:
817
+ if should_quote:
818
+ return f'{field}="{values[0]}"'
819
+ return f"{field}={values[0]}"
820
+ if should_quote:
821
+ conditions = [f'({field}="{v}")' for v in values]
822
+ else:
823
+ conditions = [f"({field}={v})" for v in values]
824
+ return " | ".join(conditions)
825
+
826
+ elif operator == FilterOperator.NOT_IN:
827
+ values = [v.strip() for v in value.split(",")]
828
+ if len(values) == 1:
829
+ if should_quote:
830
+ return f'{field}!="{values[0]}"'
831
+ return f"{field}!={values[0]}"
832
+ if should_quote:
833
+ conditions = [f'({field}!="{v}")' for v in values]
834
+ else:
835
+ conditions = [f"({field}!={v})" for v in values]
836
+ return " & ".join(conditions)
602
837
 
603
- is_advanced = filter_settings.filter_input.filter_type == 'advanced'
604
- if is_advanced:
605
- predicate = filter_settings.filter_input.advanced_filter
606
- else:
607
- _basic_filter = filter_settings.filter_input.basic_filter
608
- filter_settings.filter_input.advanced_filter = (f'[{_basic_filter.field}]{_basic_filter.filter_type}"'
609
- f'{_basic_filter.filter_value}"')
838
+ elif operator == FilterOperator.BETWEEN:
839
+ if value2 is None:
840
+ raise ValueError("BETWEEN operator requires value2")
841
+ if should_quote:
842
+ return f'({field}>="{value}") & ({field}<="{value2}")'
843
+ return f"({field}>={value}) & ({field}<={value2})"
844
+
845
+ else:
846
+ # Fallback for unknown operators - use legacy format
847
+ if should_quote:
848
+ return f'{field}{operator.to_symbol()}"{value}"'
849
+ return f"{field}{operator.to_symbol()}{value}"
610
850
 
611
851
  def _func(fl: FlowDataEngine):
612
- is_advanced = filter_settings.filter_input.filter_type == 'advanced'
852
+ is_advanced = filter_settings.filter_input.is_advanced()
853
+
613
854
  if is_advanced:
855
+ predicate = filter_settings.filter_input.advanced_filter
614
856
  return fl.do_filter(predicate)
615
857
  else:
616
858
  basic_filter = filter_settings.filter_input.basic_filter
617
- if basic_filter.filter_value.isnumeric():
859
+ if basic_filter is None:
860
+ logger.warning("Basic filter is None, returning unfiltered data")
861
+ return fl
862
+
863
+ try:
618
864
  field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
619
- if field_data_type == 'str':
620
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
621
- else:
622
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}{basic_filter.filter_value}'
623
- else:
624
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
625
- filter_settings.filter_input.advanced_filter = _f
626
- return fl.do_filter(_f)
865
+ except Exception:
866
+ field_data_type = None
867
+
868
+ expression = _build_basic_filter_expression(basic_filter, field_data_type)
869
+ filter_settings.filter_input.advanced_filter = expression
870
+ return fl.do_filter(expression)
627
871
 
628
- self.add_node_step(filter_settings.node_id, _func,
629
- node_type='filter',
630
- renew_schema=False,
631
- setting_input=filter_settings,
632
- input_node_ids=[filter_settings.depending_on_id]
633
- )
872
+ self.add_node_step(
873
+ filter_settings.node_id,
874
+ _func,
875
+ node_type="filter",
876
+ renew_schema=False,
877
+ setting_input=filter_settings,
878
+ input_node_ids=[filter_settings.depending_on_id],
879
+ )
634
880
 
635
881
  def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
636
882
  """Adds a filter node to the graph.
@@ -642,11 +888,13 @@ class FlowGraph:
642
888
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
643
889
  return fl.get_record_count()
644
890
 
645
- self.add_node_step(node_id=node_number_of_records.node_id,
646
- function=_func,
647
- node_type='record_count',
648
- setting_input=node_number_of_records,
649
- input_node_ids=[node_number_of_records.depending_on_id])
891
+ self.add_node_step(
892
+ node_id=node_number_of_records.node_id,
893
+ function=_func,
894
+ node_type="record_count",
895
+ setting_input=node_number_of_records,
896
+ input_node_ids=[node_number_of_records.depending_on_id],
897
+ )
650
898
 
651
899
  def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
652
900
  """Adds a node that executes custom Polars code.
@@ -657,11 +905,14 @@ class FlowGraph:
657
905
 
658
906
  def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
659
907
  return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
660
- self.add_node_step(node_id=node_polars_code.node_id,
661
- function=_func,
662
- node_type='polars_code',
663
- setting_input=node_polars_code,
664
- input_node_ids=node_polars_code.depending_on_ids)
908
+
909
+ self.add_node_step(
910
+ node_id=node_polars_code.node_id,
911
+ function=_func,
912
+ node_type="polars_code",
913
+ setting_input=node_polars_code,
914
+ input_node_ids=node_polars_code.depending_on_ids,
915
+ )
665
916
 
666
917
  try:
667
918
  polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
@@ -669,9 +920,7 @@ class FlowGraph:
669
920
  node = self.get_node(node_id=node_polars_code.node_id)
670
921
  node.results.errors = str(e)
671
922
 
672
- def add_dependency_on_polars_lazy_frame(self,
673
- lazy_frame: pl.LazyFrame,
674
- node_id: int):
923
+ def add_dependency_on_polars_lazy_frame(self, lazy_frame: pl.LazyFrame, node_id: int):
675
924
  """Adds a special node that directly injects a Polars LazyFrame into the graph.
676
925
 
677
926
  Note: This is intended for backend use and will not work in the UI editor.
@@ -680,13 +929,16 @@ class FlowGraph:
680
929
  lazy_frame: The Polars LazyFrame to inject.
681
930
  node_id: The ID for the new node.
682
931
  """
932
+
683
933
  def _func():
684
934
  return FlowDataEngine(lazy_frame)
685
- node_promise = input_schema.NodePromise(flow_id=self.flow_id,
686
- node_id=node_id, node_type="polars_lazy_frame",
687
- is_setup=True)
688
- self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func,
689
- setting_input=node_promise)
935
+
936
+ node_promise = input_schema.NodePromise(
937
+ flow_id=self.flow_id, node_id=node_id, node_type="polars_lazy_frame", is_setup=True
938
+ )
939
+ self.add_node_step(
940
+ node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func, setting_input=node_promise
941
+ )
690
942
 
691
943
  def add_unique(self, unique_settings: input_schema.NodeUnique):
692
944
  """Adds a node to find and remove duplicate rows.
@@ -698,12 +950,14 @@ class FlowGraph:
698
950
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
699
951
  return fl.make_unique(unique_settings.unique_input)
700
952
 
701
- self.add_node_step(node_id=unique_settings.node_id,
702
- function=_func,
703
- input_columns=[],
704
- node_type='unique',
705
- setting_input=unique_settings,
706
- input_node_ids=[unique_settings.depending_on_id])
953
+ self.add_node_step(
954
+ node_id=unique_settings.node_id,
955
+ function=_func,
956
+ input_columns=[],
957
+ node_type="unique",
958
+ setting_input=unique_settings,
959
+ input_node_ids=[unique_settings.depending_on_id],
960
+ )
707
961
 
708
962
  def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
709
963
  """Adds a node that solves graph-like problems within the data.
@@ -716,14 +970,17 @@ class FlowGraph:
716
970
  graph_solver_settings: The settings object defining the graph inputs
717
971
  and the specific algorithm to apply.
718
972
  """
973
+
719
974
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
720
975
  return fl.solve_graph(graph_solver_settings.graph_solver_input)
721
976
 
722
- self.add_node_step(node_id=graph_solver_settings.node_id,
723
- function=_func,
724
- node_type='graph_solver',
725
- setting_input=graph_solver_settings,
726
- input_node_ids=[graph_solver_settings.depending_on_id])
977
+ self.add_node_step(
978
+ node_id=graph_solver_settings.node_id,
979
+ function=_func,
980
+ node_type="graph_solver",
981
+ setting_input=graph_solver_settings,
982
+ input_node_ids=[graph_solver_settings.depending_on_id],
983
+ )
727
984
 
728
985
  def add_formula(self, function_settings: input_schema.NodeFormula):
729
986
  """Adds a node that applies a formula to create or modify a column.
@@ -733,28 +990,33 @@ class FlowGraph:
733
990
  """
734
991
 
735
992
  error = ""
736
- if function_settings.function.field.data_type not in (None, "Auto"):
993
+ if function_settings.function.field.data_type not in (None, transform_schema.AUTO_DATA_TYPE):
737
994
  output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
738
995
  else:
739
996
  output_type = None
740
- if output_type not in (None, "Auto"):
741
- new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
742
- data_type=str(output_type))]
997
+ if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
998
+ new_col = [
999
+ FlowfileColumn.from_input(column_name=function_settings.function.field.name, data_type=str(output_type))
1000
+ ]
743
1001
  else:
744
- new_col = [FlowfileColumn.from_input(function_settings.function.field.name, 'String')]
1002
+ new_col = [FlowfileColumn.from_input(function_settings.function.field.name, "String")]
745
1003
 
746
1004
  def _func(fl: FlowDataEngine):
747
- return fl.apply_sql_formula(func=function_settings.function.function,
748
- col_name=function_settings.function.field.name,
749
- output_data_type=output_type)
750
-
751
- self.add_node_step(function_settings.node_id, _func,
752
- output_schema=new_col,
753
- node_type='formula',
754
- renew_schema=False,
755
- setting_input=function_settings,
756
- input_node_ids=[function_settings.depending_on_id]
757
- )
1005
+ return fl.apply_sql_formula(
1006
+ func=function_settings.function.function,
1007
+ col_name=function_settings.function.field.name,
1008
+ output_data_type=output_type,
1009
+ )
1010
+
1011
+ self.add_node_step(
1012
+ function_settings.node_id,
1013
+ _func,
1014
+ output_schema=new_col,
1015
+ node_type="formula",
1016
+ renew_schema=False,
1017
+ setting_input=function_settings,
1018
+ input_node_ids=[function_settings.depending_on_id],
1019
+ )
758
1020
  if error != "":
759
1021
  node = self.get_node(function_settings.node_id)
760
1022
  node.results.errors = error
@@ -777,18 +1039,21 @@ class FlowGraph:
777
1039
  left_select.is_available = True if left_select.old_name in main.schema else False
778
1040
  for right_select in cross_join_settings.cross_join_input.right_select.renames:
779
1041
  right_select.is_available = True if right_select.old_name in right.schema else False
1042
+ return main.do_cross_join(
1043
+ cross_join_input=cross_join_settings.cross_join_input,
1044
+ auto_generate_selection=cross_join_settings.auto_generate_selection,
1045
+ verify_integrity=False,
1046
+ other=right,
1047
+ )
780
1048
 
781
- return main.do_cross_join(cross_join_input=cross_join_settings.cross_join_input,
782
- auto_generate_selection=cross_join_settings.auto_generate_selection,
783
- verify_integrity=False,
784
- other=right)
785
-
786
- self.add_node_step(node_id=cross_join_settings.node_id,
787
- function=_func,
788
- input_columns=[],
789
- node_type='cross_join',
790
- setting_input=cross_join_settings,
791
- input_node_ids=cross_join_settings.depending_on_ids)
1049
+ self.add_node_step(
1050
+ node_id=cross_join_settings.node_id,
1051
+ function=_func,
1052
+ input_columns=[],
1053
+ node_type="cross_join",
1054
+ setting_input=cross_join_settings,
1055
+ input_node_ids=cross_join_settings.depending_on_ids,
1056
+ )
792
1057
  return self
793
1058
 
794
1059
  def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
@@ -806,18 +1071,21 @@ class FlowGraph:
806
1071
  left_select.is_available = True if left_select.old_name in main.schema else False
807
1072
  for right_select in join_settings.join_input.right_select.renames:
808
1073
  right_select.is_available = True if right_select.old_name in right.schema else False
1074
+ return main.join(
1075
+ join_input=join_settings.join_input,
1076
+ auto_generate_selection=join_settings.auto_generate_selection,
1077
+ verify_integrity=False,
1078
+ other=right,
1079
+ )
809
1080
 
810
- return main.join(join_input=join_settings.join_input,
811
- auto_generate_selection=join_settings.auto_generate_selection,
812
- verify_integrity=False,
813
- other=right)
814
-
815
- self.add_node_step(node_id=join_settings.node_id,
816
- function=_func,
817
- input_columns=[],
818
- node_type='join',
819
- setting_input=join_settings,
820
- input_node_ids=join_settings.depending_on_ids)
1081
+ self.add_node_step(
1082
+ node_id=join_settings.node_id,
1083
+ function=_func,
1084
+ input_columns=[],
1085
+ node_type="join",
1086
+ setting_input=join_settings,
1087
+ input_node_ids=join_settings.depending_on_ids,
1088
+ )
821
1089
  return self
822
1090
 
823
1091
  def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
@@ -833,31 +1101,43 @@ class FlowGraph:
833
1101
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
834
1102
  node = self.get_node(node_id=fuzzy_settings.node_id)
835
1103
  if self.execution_location == "local":
836
- return main.fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
837
- other=right,
838
- node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id))
1104
+ return main.fuzzy_join(
1105
+ fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
1106
+ other=right,
1107
+ node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id),
1108
+ )
839
1109
 
840
- f = main.start_fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input), other=right, file_ref=node.hash,
841
- flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
1110
+ f = main.start_fuzzy_join(
1111
+ fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
1112
+ other=right,
1113
+ file_ref=node.hash,
1114
+ flow_id=self.flow_id,
1115
+ node_id=fuzzy_settings.node_id,
1116
+ )
842
1117
  logger.info("Started the fuzzy match action")
843
1118
  node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
844
1119
  return FlowDataEngine(f.get_result())
845
1120
 
846
1121
  def schema_callback():
847
- fm_input_copy = deepcopy(fuzzy_settings.join_input) # Deepcopy create an unique object per func
1122
+ fm_input_copy = FuzzyMatchInputManager(
1123
+ fuzzy_settings.join_input
1124
+ ) # Deepcopy create an unique object per func
848
1125
  node = self.get_node(node_id=fuzzy_settings.node_id)
849
- return calculate_fuzzy_match_schema(fm_input_copy,
850
- left_schema=node.node_inputs.main_inputs[0].schema,
851
- right_schema=node.node_inputs.right_input.schema
852
- )
853
-
854
- self.add_node_step(node_id=fuzzy_settings.node_id,
855
- function=_func,
856
- input_columns=[],
857
- node_type='fuzzy_match',
858
- setting_input=fuzzy_settings,
859
- input_node_ids=fuzzy_settings.depending_on_ids,
860
- schema_callback=schema_callback)
1126
+ return calculate_fuzzy_match_schema(
1127
+ fm_input_copy,
1128
+ left_schema=node.node_inputs.main_inputs[0].schema,
1129
+ right_schema=node.node_inputs.right_input.schema,
1130
+ )
1131
+
1132
+ self.add_node_step(
1133
+ node_id=fuzzy_settings.node_id,
1134
+ function=_func,
1135
+ input_columns=[],
1136
+ node_type="fuzzy_match",
1137
+ setting_input=fuzzy_settings,
1138
+ input_node_ids=fuzzy_settings.depending_on_ids,
1139
+ schema_callback=schema_callback,
1140
+ )
861
1141
 
862
1142
  return self
863
1143
 
@@ -874,14 +1154,17 @@ class FlowGraph:
874
1154
  Returns:
875
1155
  The `FlowGraph` instance for method chaining.
876
1156
  """
1157
+
877
1158
  def _func(table: FlowDataEngine) -> FlowDataEngine:
878
1159
  return table.split(node_text_to_rows.text_to_rows_input)
879
1160
 
880
- self.add_node_step(node_id=node_text_to_rows.node_id,
881
- function=_func,
882
- node_type='text_to_rows',
883
- setting_input=node_text_to_rows,
884
- input_node_ids=[node_text_to_rows.depending_on_id])
1161
+ self.add_node_step(
1162
+ node_id=node_text_to_rows.node_id,
1163
+ function=_func,
1164
+ node_type="text_to_rows",
1165
+ setting_input=node_text_to_rows,
1166
+ input_node_ids=[node_text_to_rows.depending_on_id],
1167
+ )
885
1168
  return self
886
1169
 
887
1170
  def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
@@ -897,11 +1180,13 @@ class FlowGraph:
897
1180
  def _func(table: FlowDataEngine) -> FlowDataEngine:
898
1181
  return table.do_sort(sort_settings.sort_input)
899
1182
 
900
- self.add_node_step(node_id=sort_settings.node_id,
901
- function=_func,
902
- node_type='sort',
903
- setting_input=sort_settings,
904
- input_node_ids=[sort_settings.depending_on_id])
1183
+ self.add_node_step(
1184
+ node_id=sort_settings.node_id,
1185
+ function=_func,
1186
+ node_type="sort",
1187
+ setting_input=sort_settings,
1188
+ input_node_ids=[sort_settings.depending_on_id],
1189
+ )
905
1190
  return self
906
1191
 
907
1192
  def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
@@ -913,15 +1198,17 @@ class FlowGraph:
913
1198
  Returns:
914
1199
  The `FlowGraph` instance for method chaining.
915
1200
  """
1201
+
916
1202
  def _func(table: FlowDataEngine) -> FlowDataEngine:
917
1203
  return table.get_sample(sample_settings.sample_size)
918
1204
 
919
- self.add_node_step(node_id=sample_settings.node_id,
920
- function=_func,
921
- node_type='sample',
922
- setting_input=sample_settings,
923
- input_node_ids=[sample_settings.depending_on_id]
924
- )
1205
+ self.add_node_step(
1206
+ node_id=sample_settings.node_id,
1207
+ function=_func,
1208
+ node_type="sample",
1209
+ setting_input=sample_settings,
1210
+ input_node_ids=[sample_settings.depending_on_id],
1211
+ )
925
1212
  return self
926
1213
 
927
1214
  def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
@@ -938,12 +1225,13 @@ class FlowGraph:
938
1225
  def _func(table: FlowDataEngine) -> FlowDataEngine:
939
1226
  return table.add_record_id(record_id_settings.record_id_input)
940
1227
 
941
- self.add_node_step(node_id=record_id_settings.node_id,
942
- function=_func,
943
- node_type='record_id',
944
- setting_input=record_id_settings,
945
- input_node_ids=[record_id_settings.depending_on_id]
946
- )
1228
+ self.add_node_step(
1229
+ node_id=record_id_settings.node_id,
1230
+ function=_func,
1231
+ node_type="record_id",
1232
+ setting_input=record_id_settings,
1233
+ input_node_ids=[record_id_settings.depending_on_id],
1234
+ )
947
1235
  return self
948
1236
 
949
1237
  def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
@@ -975,16 +1263,19 @@ class FlowGraph:
975
1263
  for i in ids_to_remove:
976
1264
  v = select_cols.pop(i)
977
1265
  del v
978
- return table.do_select(select_inputs=transform_schema.SelectInputs(select_cols),
979
- keep_missing=select_settings.keep_missing)
980
-
981
- self.add_node_step(node_id=select_settings.node_id,
982
- function=_func,
983
- input_columns=[],
984
- node_type='select',
985
- drop_columns=list(drop_cols),
986
- setting_input=select_settings,
987
- input_node_ids=[select_settings.depending_on_id])
1266
+ return table.do_select(
1267
+ select_inputs=transform_schema.SelectInputs(select_cols), keep_missing=select_settings.keep_missing
1268
+ )
1269
+
1270
+ self.add_node_step(
1271
+ node_id=select_settings.node_id,
1272
+ function=_func,
1273
+ input_columns=[],
1274
+ node_type="select",
1275
+ drop_columns=list(drop_cols),
1276
+ setting_input=select_settings,
1277
+ input_node_ids=[select_settings.depending_on_id],
1278
+ )
988
1279
  return self
989
1280
 
990
1281
  @property
@@ -992,7 +1283,7 @@ class FlowGraph:
992
1283
  """Checks if the graph has any nodes."""
993
1284
  return len(self._node_ids) > 0
994
1285
 
995
- def delete_node(self, node_id: Union[int, str]):
1286
+ def delete_node(self, node_id: int | str):
996
1287
  """Deletes a node from the graph and updates all its connections.
997
1288
 
998
1289
  Args:
@@ -1007,7 +1298,7 @@ class FlowGraph:
1007
1298
  if node:
1008
1299
  logger.info(f"Found node: {node_id}, processing deletion")
1009
1300
 
1010
- lead_to_steps: List[FlowNode] = node.leads_to_nodes
1301
+ lead_to_steps: list[FlowNode] = node.leads_to_nodes
1011
1302
  logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
1012
1303
 
1013
1304
  if len(lead_to_steps) > 0:
@@ -1016,7 +1307,7 @@ class FlowGraph:
1016
1307
  lead_to_step.delete_input_node(node_id, complete=True)
1017
1308
 
1018
1309
  if not node.is_start:
1019
- depends_on: List[FlowNode] = node.node_inputs.get_all_inputs()
1310
+ depends_on: list[FlowNode] = node.node_inputs.get_all_inputs()
1020
1311
  logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
1021
1312
 
1022
1313
  for depend_on in depends_on:
@@ -1036,18 +1327,20 @@ class FlowGraph:
1036
1327
  """Checks if the graph has an initial input data source."""
1037
1328
  return self._input_data is not None
1038
1329
 
1039
- def add_node_step(self,
1040
- node_id: Union[int, str],
1041
- function: Callable,
1042
- input_columns: List[str] = None,
1043
- output_schema: List[FlowfileColumn] = None,
1044
- node_type: str = None,
1045
- drop_columns: List[str] = None,
1046
- renew_schema: bool = True,
1047
- setting_input: Any = None,
1048
- cache_results: bool = None,
1049
- schema_callback: Callable = None,
1050
- input_node_ids: List[int] = None) -> FlowNode:
1330
+ def add_node_step(
1331
+ self,
1332
+ node_id: int | str,
1333
+ function: Callable,
1334
+ input_columns: list[str] = None,
1335
+ output_schema: list[FlowfileColumn] = None,
1336
+ node_type: str = None,
1337
+ drop_columns: list[str] = None,
1338
+ renew_schema: bool = True,
1339
+ setting_input: Any = None,
1340
+ cache_results: bool = None,
1341
+ schema_callback: Callable = None,
1342
+ input_node_ids: list[int] = None,
1343
+ ) -> FlowNode:
1051
1344
  """The core method for adding or updating a node in the graph.
1052
1345
 
1053
1346
  Args:
@@ -1080,29 +1373,33 @@ class FlowGraph:
1080
1373
  if isinstance(input_columns, str):
1081
1374
  input_columns = [input_columns]
1082
1375
  if (
1083
- input_nodes is not None or
1084
- function.__name__ in ('placeholder', 'analysis_preparation') or
1085
- node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
1376
+ input_nodes is not None
1377
+ or function.__name__ in ("placeholder", "analysis_preparation")
1378
+ or node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
1086
1379
  ):
1087
1380
  if not existing_node:
1088
- node = FlowNode(node_id=node_id,
1089
- function=function,
1090
- output_schema=output_schema,
1091
- input_columns=input_columns,
1092
- drop_columns=drop_columns,
1093
- renew_schema=renew_schema,
1094
- setting_input=setting_input,
1095
- node_type=node_type,
1096
- name=function.__name__,
1097
- schema_callback=schema_callback,
1098
- parent_uuid=self.uuid)
1381
+ node = FlowNode(
1382
+ node_id=node_id,
1383
+ function=function,
1384
+ output_schema=output_schema,
1385
+ input_columns=input_columns,
1386
+ drop_columns=drop_columns,
1387
+ renew_schema=renew_schema,
1388
+ setting_input=setting_input,
1389
+ node_type=node_type,
1390
+ name=function.__name__,
1391
+ schema_callback=schema_callback,
1392
+ parent_uuid=self.uuid,
1393
+ )
1099
1394
  else:
1100
- existing_node.update_node(function=function,
1101
- output_schema=output_schema,
1102
- input_columns=input_columns,
1103
- drop_columns=drop_columns,
1104
- setting_input=setting_input,
1105
- schema_callback=schema_callback)
1395
+ existing_node.update_node(
1396
+ function=function,
1397
+ output_schema=output_schema,
1398
+ input_columns=input_columns,
1399
+ drop_columns=drop_columns,
1400
+ setting_input=setting_input,
1401
+ schema_callback=schema_callback,
1402
+ )
1106
1403
  node = existing_node
1107
1404
  else:
1108
1405
  raise Exception("No data initialized")
@@ -1110,7 +1407,7 @@ class FlowGraph:
1110
1407
  self._node_ids.append(node_id)
1111
1408
  return node
1112
1409
 
1113
- def add_include_cols(self, include_columns: List[str]):
1410
+ def add_include_cols(self, include_columns: list[str]):
1114
1411
  """Adds columns to both the input and output column lists.
1115
1412
 
1116
1413
  Args:
@@ -1131,24 +1428,30 @@ class FlowGraph:
1131
1428
  """
1132
1429
 
1133
1430
  def _func(df: FlowDataEngine):
1134
- output_file.output_settings.populate_abs_file_path()
1135
- execute_remote = self.execution_location != 'local'
1136
- df.output(output_fs=output_file.output_settings, flow_id=self.flow_id, node_id=output_file.node_id,
1137
- execute_remote=execute_remote)
1431
+ execute_remote = self.execution_location != "local"
1432
+ df.output(
1433
+ output_fs=output_file.output_settings,
1434
+ flow_id=self.flow_id,
1435
+ node_id=output_file.node_id,
1436
+ execute_remote=execute_remote,
1437
+ )
1138
1438
  return df
1139
1439
 
1140
1440
  def schema_callback():
1141
1441
  input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
1142
1442
 
1143
1443
  return input_node.schema
1144
- input_node_id = getattr(output_file, "depending_on_id") if hasattr(output_file, 'depending_on_id') else None
1145
- self.add_node_step(node_id=output_file.node_id,
1146
- function=_func,
1147
- input_columns=[],
1148
- node_type='output',
1149
- setting_input=output_file,
1150
- schema_callback=schema_callback,
1151
- input_node_ids=[input_node_id])
1444
+
1445
+ input_node_id = output_file.depending_on_id if hasattr(output_file, "depending_on_id") else None
1446
+ self.add_node_step(
1447
+ node_id=output_file.node_id,
1448
+ function=_func,
1449
+ input_columns=[],
1450
+ node_type="output",
1451
+ setting_input=output_file,
1452
+ schema_callback=schema_callback,
1453
+ input_node_ids=[input_node_id],
1454
+ )
1152
1455
 
1153
1456
  def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
1154
1457
  """Adds a node to write data to a database.
@@ -1157,18 +1460,20 @@ class FlowGraph:
1157
1460
  node_database_writer: The settings for the database writer node.
1158
1461
  """
1159
1462
 
1160
- node_type = 'database_writer'
1463
+ node_type = "database_writer"
1161
1464
  database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
1162
- database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
1163
- if database_settings.connection_mode == 'inline':
1465
+ database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
1466
+ if database_settings.connection_mode == "inline":
1164
1467
  database_connection: input_schema.DatabaseConnection = database_settings.database_connection
1165
- encrypted_password = get_encrypted_secret(current_user_id=node_database_writer.user_id,
1166
- secret_name=database_connection.password_ref)
1468
+ encrypted_password = get_encrypted_secret(
1469
+ current_user_id=node_database_writer.user_id, secret_name=database_connection.password_ref
1470
+ )
1167
1471
  if encrypted_password is None:
1168
1472
  raise HTTPException(status_code=400, detail="Password not found")
1169
1473
  else:
1170
- database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
1171
- node_database_writer.user_id)
1474
+ database_reference_settings = get_local_database_connection(
1475
+ database_settings.database_connection_name, node_database_writer.user_id
1476
+ )
1172
1477
  encrypted_password = database_reference_settings.password.get_secret_value()
1173
1478
 
1174
1479
  def _func(df: FlowDataEngine):
@@ -1177,14 +1482,20 @@ class FlowGraph:
1177
1482
  sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
1178
1483
  node_database_writer=node_database_writer,
1179
1484
  password=encrypted_password,
1180
- table_name=(database_settings.schema_name+'.'+database_settings.table_name
1181
- if database_settings.schema_name else database_settings.table_name),
1182
- database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
1183
- else None),
1184
- lf=df.data_frame
1485
+ table_name=(
1486
+ database_settings.schema_name + "." + database_settings.table_name
1487
+ if database_settings.schema_name
1488
+ else database_settings.table_name
1489
+ ),
1490
+ database_reference_settings=(
1491
+ database_reference_settings if database_settings.connection_mode == "reference" else None
1492
+ ),
1493
+ lf=df.data_frame,
1185
1494
  )
1186
1495
  )
1187
- external_database_writer = ExternalDatabaseWriter(database_external_write_settings, wait_on_completion=False)
1496
+ external_database_writer = ExternalDatabaseWriter(
1497
+ database_external_write_settings, wait_on_completion=False
1498
+ )
1188
1499
  node._fetch_cached_df = external_database_writer
1189
1500
  external_database_writer.get_result()
1190
1501
  return df
@@ -1211,56 +1522,64 @@ class FlowGraph:
1211
1522
  """
1212
1523
 
1213
1524
  logger.info("Adding database reader")
1214
- node_type = 'database_reader'
1525
+ node_type = "database_reader"
1215
1526
  database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
1216
- database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
1217
- if database_settings.connection_mode == 'inline':
1527
+ database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
1528
+ if database_settings.connection_mode == "inline":
1218
1529
  database_connection: input_schema.DatabaseConnection = database_settings.database_connection
1219
- encrypted_password = get_encrypted_secret(current_user_id=node_database_reader.user_id,
1220
- secret_name=database_connection.password_ref)
1530
+ encrypted_password = get_encrypted_secret(
1531
+ current_user_id=node_database_reader.user_id, secret_name=database_connection.password_ref
1532
+ )
1221
1533
  if encrypted_password is None:
1222
1534
  raise HTTPException(status_code=400, detail="Password not found")
1223
1535
  else:
1224
- database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
1225
- node_database_reader.user_id)
1536
+ database_reference_settings = get_local_database_connection(
1537
+ database_settings.database_connection_name, node_database_reader.user_id
1538
+ )
1226
1539
  database_connection = database_reference_settings
1227
1540
  encrypted_password = database_reference_settings.password.get_secret_value()
1228
1541
 
1229
1542
  def _func():
1230
- sql_source = BaseSqlSource(query=None if database_settings.query_mode == 'table' else database_settings.query,
1231
- table_name=database_settings.table_name,
1232
- schema_name=database_settings.schema_name,
1233
- fields=node_database_reader.fields,
1234
- )
1543
+ sql_source = BaseSqlSource(
1544
+ query=None if database_settings.query_mode == "table" else database_settings.query,
1545
+ table_name=database_settings.table_name,
1546
+ schema_name=database_settings.schema_name,
1547
+ fields=node_database_reader.fields,
1548
+ )
1235
1549
  database_external_read_settings = (
1236
1550
  sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
1237
1551
  node_database_reader=node_database_reader,
1238
1552
  password=encrypted_password,
1239
1553
  query=sql_source.query,
1240
- database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
1241
- else None),
1554
+ database_reference_settings=(
1555
+ database_reference_settings if database_settings.connection_mode == "reference" else None
1556
+ ),
1242
1557
  )
1243
1558
  )
1244
1559
 
1245
- external_database_fetcher = ExternalDatabaseFetcher(database_external_read_settings, wait_on_completion=False)
1560
+ external_database_fetcher = ExternalDatabaseFetcher(
1561
+ database_external_read_settings, wait_on_completion=False
1562
+ )
1246
1563
  node._fetch_cached_df = external_database_fetcher
1247
1564
  fl = FlowDataEngine(external_database_fetcher.get_result())
1248
1565
  node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
1249
1566
  return fl
1250
1567
 
1251
1568
  def schema_callback():
1252
- sql_source = SqlSource(connection_string=
1253
- sql_utils.construct_sql_uri(database_type=database_connection.database_type,
1254
- host=database_connection.host,
1255
- port=database_connection.port,
1256
- database=database_connection.database,
1257
- username=database_connection.username,
1258
- password=decrypt_secret(encrypted_password)),
1259
- query=None if database_settings.query_mode == 'table' else database_settings.query,
1260
- table_name=database_settings.table_name,
1261
- schema_name=database_settings.schema_name,
1262
- fields=node_database_reader.fields,
1263
- )
1569
+ sql_source = SqlSource(
1570
+ connection_string=sql_utils.construct_sql_uri(
1571
+ database_type=database_connection.database_type,
1572
+ host=database_connection.host,
1573
+ port=database_connection.port,
1574
+ database=database_connection.database,
1575
+ username=database_connection.username,
1576
+ password=decrypt_secret(encrypted_password),
1577
+ ),
1578
+ query=None if database_settings.query_mode == "table" else database_settings.query,
1579
+ table_name=database_settings.table_name,
1580
+ schema_name=database_settings.schema_name,
1581
+ fields=node_database_reader.fields,
1582
+ )
1264
1583
  return sql_source.get_schema()
1265
1584
 
1266
1585
  node = self.get_node(node_database_reader.node_id)
@@ -1270,16 +1589,20 @@ class FlowGraph:
1270
1589
  node.function = _func
1271
1590
  node.setting_input = node_database_reader
1272
1591
  node.node_settings.cache_results = node_database_reader.cache_results
1273
- if node_database_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1274
- self._flow_starts.append(node)
1592
+ self.add_node_to_starting_list(node)
1275
1593
  node.schema_callback = schema_callback
1276
1594
  else:
1277
- node = FlowNode(node_database_reader.node_id, function=_func,
1278
- setting_input=node_database_reader,
1279
- name=node_type, node_type=node_type, parent_uuid=self.uuid,
1280
- schema_callback=schema_callback)
1595
+ node = FlowNode(
1596
+ node_database_reader.node_id,
1597
+ function=_func,
1598
+ setting_input=node_database_reader,
1599
+ name=node_type,
1600
+ node_type=node_type,
1601
+ parent_uuid=self.uuid,
1602
+ schema_callback=schema_callback,
1603
+ )
1281
1604
  self._node_db[node_database_reader.node_id] = node
1282
- self._flow_starts.append(node)
1605
+ self.add_node_to_starting_list(node)
1283
1606
  self._node_ids.append(node_database_reader.node_id)
1284
1607
 
1285
1608
  def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
@@ -1290,7 +1613,7 @@ class FlowGraph:
1290
1613
  Args:
1291
1614
  external_source_input: The settings for the external SQL source node.
1292
1615
  """
1293
- logger.info('Adding sql source')
1616
+ logger.info("Adding sql source")
1294
1617
  self.add_external_source(external_source_input)
1295
1618
 
1296
1619
  def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
@@ -1301,19 +1624,20 @@ class FlowGraph:
1301
1624
  """
1302
1625
 
1303
1626
  node_type = "cloud_storage_writer"
1627
+
1304
1628
  def _func(df: FlowDataEngine):
1305
1629
  df.lazy = True
1306
- execute_remote = self.execution_location != 'local'
1630
+ execute_remote = self.execution_location != "local"
1307
1631
  cloud_connection_settings = get_cloud_connection_settings(
1308
1632
  connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
1309
1633
  user_id=node_cloud_storage_writer.user_id,
1310
- auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
1634
+ auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode,
1311
1635
  )
1312
1636
  full_cloud_storage_connection = FullCloudStorageConnection(
1313
1637
  storage_type=cloud_connection_settings.storage_type,
1314
1638
  auth_method=cloud_connection_settings.auth_method,
1315
1639
  aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
1316
- **CloudStorageReader.get_storage_options(cloud_connection_settings)
1640
+ **CloudStorageReader.get_storage_options(cloud_connection_settings),
1317
1641
  )
1318
1642
  if execute_remote:
1319
1643
  settings = get_cloud_storage_write_settings_worker_interface(
@@ -1321,7 +1645,8 @@ class FlowGraph:
1321
1645
  connection=full_cloud_storage_connection,
1322
1646
  lf=df.data_frame,
1323
1647
  flowfile_node_id=node_cloud_storage_writer.node_id,
1324
- flowfile_flow_id=self.flow_id)
1648
+ flowfile_flow_id=self.flow_id,
1649
+ )
1325
1650
  external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
1326
1651
  node._fetch_cached_df = external_database_writer
1327
1652
  external_database_writer.get_result()
@@ -1347,7 +1672,7 @@ class FlowGraph:
1347
1672
  node_type=node_type,
1348
1673
  setting_input=node_cloud_storage_writer,
1349
1674
  schema_callback=schema_callback,
1350
- input_node_ids=[node_cloud_storage_writer.depending_on_id]
1675
+ input_node_ids=[node_cloud_storage_writer.depending_on_id],
1351
1676
  )
1352
1677
 
1353
1678
  node = self.get_node(node_cloud_storage_writer.node_id)
@@ -1365,49 +1690,53 @@ class FlowGraph:
1365
1690
  def _func():
1366
1691
  logger.info("Starting to run the schema callback for cloud storage reader")
1367
1692
  self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
1368
- settings = CloudStorageReadSettingsInternal(read_settings=cloud_storage_read_settings,
1369
- connection=get_cloud_connection_settings(
1370
- connection_name=cloud_storage_read_settings.connection_name,
1371
- user_id=node_cloud_storage_reader.user_id,
1372
- auth_mode=cloud_storage_read_settings.auth_mode
1373
- ))
1693
+ settings = CloudStorageReadSettingsInternal(
1694
+ read_settings=cloud_storage_read_settings,
1695
+ connection=get_cloud_connection_settings(
1696
+ connection_name=cloud_storage_read_settings.connection_name,
1697
+ user_id=node_cloud_storage_reader.user_id,
1698
+ auth_mode=cloud_storage_read_settings.auth_mode,
1699
+ ),
1700
+ )
1374
1701
  fl = FlowDataEngine.from_cloud_storage_obj(settings)
1375
1702
  return fl
1376
1703
 
1377
- node = self.add_node_step(node_id=node_cloud_storage_reader.node_id,
1378
- function=_func,
1379
- cache_results=node_cloud_storage_reader.cache_results,
1380
- setting_input=node_cloud_storage_reader,
1381
- node_type=node_type,
1382
- )
1383
- if node_cloud_storage_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1384
- self._flow_starts.append(node)
1704
+ node = self.add_node_step(
1705
+ node_id=node_cloud_storage_reader.node_id,
1706
+ function=_func,
1707
+ cache_results=node_cloud_storage_reader.cache_results,
1708
+ setting_input=node_cloud_storage_reader,
1709
+ node_type=node_type,
1710
+ )
1711
+ self.add_node_to_starting_list(node)
1385
1712
 
1386
- def add_external_source(self,
1387
- external_source_input: input_schema.NodeExternalSource):
1713
+ def add_external_source(self, external_source_input: input_schema.NodeExternalSource):
1388
1714
  """Adds a node for a custom external data source.
1389
1715
 
1390
1716
  Args:
1391
1717
  external_source_input: The settings for the external source node.
1392
1718
  """
1393
1719
 
1394
- node_type = 'external_source'
1720
+ node_type = "external_source"
1395
1721
  external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
1396
- source_settings = (getattr(input_schema, snake_case_to_camel_case(external_source_input.identifier)).
1397
- model_validate(external_source_input.source_settings))
1398
- if hasattr(external_source_script, 'initial_getter'):
1399
- initial_getter = getattr(external_source_script, 'initial_getter')(source_settings)
1722
+ source_settings = getattr(
1723
+ input_schema, snake_case_to_camel_case(external_source_input.identifier)
1724
+ ).model_validate(external_source_input.source_settings)
1725
+ if hasattr(external_source_script, "initial_getter"):
1726
+ initial_getter = external_source_script.initial_getter(source_settings)
1400
1727
  else:
1401
1728
  initial_getter = None
1402
1729
  data_getter = external_source_script.getter(source_settings)
1403
- external_source = data_source_factory(source_type='custom',
1404
- data_getter=data_getter,
1405
- initial_data_getter=initial_getter,
1406
- orientation=external_source_input.source_settings.orientation,
1407
- schema=None)
1730
+ external_source = data_source_factory(
1731
+ source_type="custom",
1732
+ data_getter=data_getter,
1733
+ initial_data_getter=initial_getter,
1734
+ orientation=external_source_input.source_settings.orientation,
1735
+ schema=None,
1736
+ )
1408
1737
 
1409
1738
  def _func():
1410
- logger.info('Calling external source')
1739
+ logger.info("Calling external source")
1411
1740
  fl = FlowDataEngine.create_from_external_source(external_source=external_source)
1412
1741
  external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
1413
1742
  return fl
@@ -1419,31 +1748,39 @@ class FlowGraph:
1419
1748
  node.function = _func
1420
1749
  node.setting_input = external_source_input
1421
1750
  node.node_settings.cache_results = external_source_input.cache_results
1422
- if external_source_input.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1423
- self._flow_starts.append(node)
1751
+ self.add_node_to_starting_list(node)
1752
+
1424
1753
  else:
1425
- node = FlowNode(external_source_input.node_id, function=_func,
1426
- setting_input=external_source_input,
1427
- name=node_type, node_type=node_type, parent_uuid=self.uuid)
1754
+ node = FlowNode(
1755
+ external_source_input.node_id,
1756
+ function=_func,
1757
+ setting_input=external_source_input,
1758
+ name=node_type,
1759
+ node_type=node_type,
1760
+ parent_uuid=self.uuid,
1761
+ )
1428
1762
  self._node_db[external_source_input.node_id] = node
1429
- self._flow_starts.append(node)
1763
+ self.add_node_to_starting_list(node)
1430
1764
  self._node_ids.append(external_source_input.node_id)
1431
1765
  if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
1432
- logger.info('Using provided schema in the node')
1766
+ logger.info("Using provided schema in the node")
1433
1767
 
1434
1768
  def schema_callback():
1435
- return [FlowfileColumn.from_input(f.name, f.data_type) for f in
1436
- external_source_input.source_settings.fields]
1769
+ return [
1770
+ FlowfileColumn.from_input(f.name, f.data_type) for f in external_source_input.source_settings.fields
1771
+ ]
1437
1772
 
1438
1773
  node.schema_callback = schema_callback
1439
1774
  else:
1440
- logger.warning('Removing schema')
1775
+ logger.warning("Removing schema")
1441
1776
  node._schema_callback = None
1442
- self.add_node_step(node_id=external_source_input.node_id,
1443
- function=_func,
1444
- input_columns=[],
1445
- node_type=node_type,
1446
- setting_input=external_source_input)
1777
+ self.add_node_step(
1778
+ node_id=external_source_input.node_id,
1779
+ function=_func,
1780
+ input_columns=[],
1781
+ node_type=node_type,
1782
+ setting_input=external_source_input,
1783
+ )
1447
1784
 
1448
1785
  def add_read(self, input_file: input_schema.NodeRead):
1449
1786
  """Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
@@ -1451,24 +1788,29 @@ class FlowGraph:
1451
1788
  Args:
1452
1789
  input_file: The settings for the read operation.
1453
1790
  """
1454
-
1455
- if input_file.received_file.file_type in ('xlsx', 'excel') and input_file.received_file.sheet_name == '':
1791
+ if (
1792
+ input_file.received_file.file_type in ("xlsx", "excel")
1793
+ and input_file.received_file.table_settings.sheet_name == ""
1794
+ ):
1456
1795
  sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
1457
- input_file.received_file.sheet_name = sheet_name
1796
+ input_file.received_file.table_settings.sheet_name = sheet_name
1458
1797
 
1459
1798
  received_file = input_file.received_file
1460
1799
  input_file.received_file.set_absolute_filepath()
1461
1800
 
1462
1801
  def _func():
1463
1802
  input_file.received_file.set_absolute_filepath()
1464
- if input_file.received_file.file_type == 'parquet':
1803
+ if input_file.received_file.file_type == "parquet":
1465
1804
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1466
- elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.encoding:
1805
+ elif (
1806
+ input_file.received_file.file_type == "csv"
1807
+ and "utf" in input_file.received_file.table_settings.encoding
1808
+ ):
1467
1809
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1468
1810
  else:
1469
- input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
1470
- node_id=input_file.node_id,
1471
- flow_id=self.flow_id)
1811
+ input_data = FlowDataEngine.create_from_path_worker(
1812
+ input_file.received_file, node_id=input_file.node_id, flow_id=self.flow_id
1813
+ )
1472
1814
  input_data.name = input_file.received_file.name
1473
1815
  return input_data
1474
1816
 
@@ -1476,51 +1818,57 @@ class FlowGraph:
1476
1818
  schema_callback = None
1477
1819
  if node:
1478
1820
  start_hash = node.hash
1479
- node.node_type = 'read'
1480
- node.name = 'read'
1821
+ node.node_type = "read"
1822
+ node.name = "read"
1481
1823
  node.function = _func
1482
1824
  node.setting_input = input_file
1483
- if input_file.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1484
- self._flow_starts.append(node)
1825
+ self.add_node_to_starting_list(node)
1485
1826
 
1486
1827
  if start_hash != node.hash:
1487
- logger.info('Hash changed, updating schema')
1828
+ logger.info("Hash changed, updating schema")
1488
1829
  if len(received_file.fields) > 0:
1489
1830
  # If the file has fields defined, we can use them to create the schema
1490
1831
  def schema_callback():
1491
1832
  return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
1492
1833
 
1493
- elif input_file.received_file.file_type in ('csv', 'json', 'parquet'):
1834
+ elif input_file.received_file.file_type in ("csv", "json", "parquet"):
1494
1835
  # everything that can be scanned by polars
1495
1836
  def schema_callback():
1496
1837
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1497
1838
  return input_data.schema
1498
1839
 
1499
- elif input_file.received_file.file_type in ('xlsx', 'excel'):
1840
+ elif input_file.received_file.file_type in ("xlsx", "excel"):
1500
1841
  # If the file is an Excel file, we need to use the openpyxl engine to read the schema
1501
- schema_callback = get_xlsx_schema_callback(engine='openpyxl',
1502
- file_path=received_file.file_path,
1503
- sheet_name=received_file.sheet_name,
1504
- start_row=received_file.start_row,
1505
- end_row=received_file.end_row,
1506
- start_column=received_file.start_column,
1507
- end_column=received_file.end_column,
1508
- has_headers=received_file.has_headers)
1842
+ schema_callback = get_xlsx_schema_callback(
1843
+ engine="openpyxl",
1844
+ file_path=received_file.file_path,
1845
+ sheet_name=received_file.table_settings.sheet_name,
1846
+ start_row=received_file.table_settings.start_row,
1847
+ end_row=received_file.table_settings.end_row,
1848
+ start_column=received_file.table_settings.start_column,
1849
+ end_column=received_file.table_settings.end_column,
1850
+ has_headers=received_file.table_settings.has_headers,
1851
+ )
1509
1852
  else:
1510
1853
  schema_callback = None
1511
1854
  else:
1512
- node = FlowNode(input_file.node_id, function=_func,
1513
- setting_input=input_file,
1514
- name='read', node_type='read', parent_uuid=self.uuid)
1855
+ node = FlowNode(
1856
+ input_file.node_id,
1857
+ function=_func,
1858
+ setting_input=input_file,
1859
+ name="read",
1860
+ node_type="read",
1861
+ parent_uuid=self.uuid,
1862
+ )
1515
1863
  self._node_db[input_file.node_id] = node
1516
- self._flow_starts.append(node)
1864
+ self.add_node_to_starting_list(node)
1517
1865
  self._node_ids.append(input_file.node_id)
1518
1866
 
1519
1867
  if schema_callback is not None:
1520
1868
  node.schema_callback = schema_callback
1521
1869
  return self
1522
1870
 
1523
- def add_datasource(self, input_file: Union[input_schema.NodeDatasource, input_schema.NodeManualInput]) -> "FlowGraph":
1871
+ def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
1524
1872
  """Adds a data source node to the graph.
1525
1873
 
1526
1874
  This method serves as a factory for creating starting nodes, handling both
@@ -1534,25 +1882,30 @@ class FlowGraph:
1534
1882
  """
1535
1883
  if isinstance(input_file, input_schema.NodeManualInput):
1536
1884
  input_data = FlowDataEngine(input_file.raw_data_format)
1537
- ref = 'manual_input'
1885
+ ref = "manual_input"
1538
1886
  else:
1539
1887
  input_data = FlowDataEngine(path_ref=input_file.file_ref)
1540
- ref = 'datasource'
1888
+ ref = "datasource"
1541
1889
  node = self.get_node(input_file.node_id)
1542
1890
  if node:
1543
1891
  node.node_type = ref
1544
1892
  node.name = ref
1545
1893
  node.function = input_data
1546
1894
  node.setting_input = input_file
1547
- if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
1548
- self._flow_starts.append(node)
1895
+ self.add_node_to_starting_list(node)
1896
+
1549
1897
  else:
1550
1898
  input_data.collect()
1551
- node = FlowNode(input_file.node_id, function=input_data,
1552
- setting_input=input_file,
1553
- name=ref, node_type=ref, parent_uuid=self.uuid)
1899
+ node = FlowNode(
1900
+ input_file.node_id,
1901
+ function=input_data,
1902
+ setting_input=input_file,
1903
+ name=ref,
1904
+ node_type=ref,
1905
+ parent_uuid=self.uuid,
1906
+ )
1554
1907
  self._node_db[input_file.node_id] = node
1555
- self._flow_starts.append(node)
1908
+ self.add_node_to_starting_list(node)
1556
1909
  self._node_ids.append(input_file.node_id)
1557
1910
  return self
1558
1911
 
@@ -1567,7 +1920,7 @@ class FlowGraph:
1567
1920
  self.add_datasource(input_file)
1568
1921
 
1569
1922
  @property
1570
- def nodes(self) -> List[FlowNode]:
1923
+ def nodes(self) -> list[FlowNode]:
1571
1924
  """Gets a list of all FlowNode objects in the graph."""
1572
1925
 
1573
1926
  return list(self._node_db.values())
@@ -1577,7 +1930,7 @@ class FlowGraph:
1577
1930
  """Gets the current execution mode ('Development' or 'Performance')."""
1578
1931
  return self.flow_settings.execution_mode
1579
1932
 
1580
- def get_implicit_starter_nodes(self) -> List[FlowNode]:
1933
+ def get_implicit_starter_nodes(self) -> list[FlowNode]:
1581
1934
  """Finds nodes that can act as starting points but are not explicitly defined as such.
1582
1935
 
1583
1936
  Some nodes, like the Polars Code node, can function without an input. This
@@ -1623,17 +1976,31 @@ class FlowGraph:
1623
1976
  if not flow_node:
1624
1977
  raise Exception("Node not found found")
1625
1978
  skip_nodes, execution_order = compute_execution_plan(
1626
- nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1979
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
1627
1980
  )
1628
1981
  if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
1629
1982
  raise Exception("Node can not be executed because it does not have it's inputs")
1630
1983
 
1631
- def create_initial_run_information(self, number_of_nodes: int,
1632
- run_type: Literal["fetch_one", "full_run"]):
1984
+ def create_initial_run_information(self, number_of_nodes: int, run_type: Literal["fetch_one", "full_run"]):
1985
+ return RunInformation(
1986
+ flow_id=self.flow_id,
1987
+ start_time=datetime.datetime.now(),
1988
+ end_time=None,
1989
+ success=None,
1990
+ number_of_nodes=number_of_nodes,
1991
+ node_step_result=[],
1992
+ run_type=run_type,
1993
+ )
1994
+
1995
+ def create_empty_run_information(self) -> RunInformation:
1633
1996
  return RunInformation(
1634
- flow_id=self.flow_id, start_time=datetime.datetime.now(), end_time=None,
1635
- success=None, number_of_nodes=number_of_nodes, node_step_result=[],
1636
- run_type=run_type
1997
+ flow_id=self.flow_id,
1998
+ start_time=None,
1999
+ end_time=None,
2000
+ success=None,
2001
+ number_of_nodes=0,
2002
+ node_step_result=[],
2003
+ run_type="init",
1637
2004
  )
1638
2005
 
1639
2006
  def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
@@ -1647,14 +2014,16 @@ class FlowGraph:
1647
2014
  self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
1648
2015
  node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
1649
2016
  node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
1650
- logger.info(f'Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}')
2017
+ logger.info(f"Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}")
1651
2018
  try:
1652
2019
  self.latest_run_info.node_step_result.append(node_result)
1653
- flow_node.execute_node(run_location=self.flow_settings.execution_location,
1654
- performance_mode=False,
1655
- node_logger=node_logger,
1656
- optimize_for_downstream=False,
1657
- reset_cache=True)
2020
+ flow_node.execute_node(
2021
+ run_location=self.flow_settings.execution_location,
2022
+ performance_mode=False,
2023
+ node_logger=node_logger,
2024
+ optimize_for_downstream=False,
2025
+ reset_cache=True,
2026
+ )
1658
2027
  node_result.error = str(flow_node.results.errors)
1659
2028
  if self.flow_settings.is_canceled:
1660
2029
  node_result.success = None
@@ -1669,12 +2038,12 @@ class FlowGraph:
1669
2038
  self.flow_settings.is_running = False
1670
2039
  return self.get_run_info()
1671
2040
  except Exception as e:
1672
- node_result.error = 'Node did not run'
2041
+ node_result.error = "Node did not run"
1673
2042
  node_result.success = False
1674
2043
  node_result.end_timestamp = time()
1675
2044
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1676
2045
  node_result.is_running = False
1677
- node_logger.error(f'Error in node {flow_node.node_id}: {e}')
2046
+ node_logger.error(f"Error in node {flow_node.node_id}: {e}")
1678
2047
  finally:
1679
2048
  self.flow_settings.is_running = False
1680
2049
 
@@ -1691,39 +2060,38 @@ class FlowGraph:
1691
2060
  Exception: If the flow is already running.
1692
2061
  """
1693
2062
  if self.flow_settings.is_running:
1694
- raise Exception('Flow is already running')
2063
+ raise Exception("Flow is already running")
1695
2064
  try:
1696
-
1697
2065
  self.flow_settings.is_running = True
1698
2066
  self.flow_settings.is_canceled = False
1699
2067
  self.flow_logger.clear_log_file()
1700
- self.flow_logger.info('Starting to run flowfile flow...')
1701
-
2068
+ self.flow_logger.info("Starting to run flowfile flow...")
1702
2069
  skip_nodes, execution_order = compute_execution_plan(
1703
- nodes=self.nodes,
1704
- flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
2070
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
1705
2071
  )
1706
2072
 
1707
2073
  self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
1708
2074
 
1709
2075
  skip_node_message(self.flow_logger, skip_nodes)
1710
2076
  execution_order_message(self.flow_logger, execution_order)
1711
- performance_mode = self.flow_settings.execution_mode == 'Performance'
2077
+ performance_mode = self.flow_settings.execution_mode == "Performance"
1712
2078
 
1713
2079
  for node in execution_order:
1714
2080
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1715
2081
  if self.flow_settings.is_canceled:
1716
- self.flow_logger.info('Flow canceled')
2082
+ self.flow_logger.info("Flow canceled")
1717
2083
  break
1718
2084
  if node in skip_nodes:
1719
- node_logger.info(f'Skipping node {node.node_id}')
2085
+ node_logger.info(f"Skipping node {node.node_id}")
1720
2086
  continue
1721
2087
  node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1722
2088
  self.latest_run_info.node_step_result.append(node_result)
1723
- logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1724
- node.execute_node(run_location=self.flow_settings.execution_location,
1725
- performance_mode=performance_mode,
1726
- node_logger=node_logger)
2089
+ logger.info(f"Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}")
2090
+ node.execute_node(
2091
+ run_location=self.flow_settings.execution_location,
2092
+ performance_mode=performance_mode,
2093
+ node_logger=node_logger,
2094
+ )
1727
2095
  try:
1728
2096
  node_result.error = str(node.results.errors)
1729
2097
  if self.flow_settings.is_canceled:
@@ -1736,28 +2104,29 @@ class FlowGraph:
1736
2104
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1737
2105
  node_result.is_running = False
1738
2106
  except Exception as e:
1739
- node_result.error = 'Node did not run'
2107
+ node_result.error = "Node did not run"
1740
2108
  node_result.success = False
1741
2109
  node_result.end_timestamp = time()
1742
2110
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1743
2111
  node_result.is_running = False
1744
- node_logger.error(f'Error in node {node.node_id}: {e}')
2112
+ node_logger.error(f"Error in node {node.node_id}: {e}")
1745
2113
  if not node_result.success:
1746
2114
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1747
- node_logger.info(f'Completed node with success: {node_result.success}')
2115
+ node_logger.info(f"Completed node with success: {node_result.success}")
1748
2116
  self.latest_run_info.nodes_completed += 1
1749
- self.flow_logger.info('Flow completed!')
2117
+ self.latest_run_info.end_time = datetime.datetime.now()
2118
+ self.flow_logger.info("Flow completed!")
1750
2119
  self.end_datetime = datetime.datetime.now()
1751
2120
  self.flow_settings.is_running = False
1752
2121
  if self.flow_settings.is_canceled:
1753
- self.flow_logger.info('Flow canceled')
2122
+ self.flow_logger.info("Flow canceled")
1754
2123
  return self.get_run_info()
1755
2124
  except Exception as e:
1756
2125
  raise e
1757
2126
  finally:
1758
2127
  self.flow_settings.is_running = False
1759
2128
 
1760
- def get_run_info(self) -> RunInformation | None:
2129
+ def get_run_info(self) -> RunInformation:
1761
2130
  """Gets a summary of the most recent graph execution.
1762
2131
 
1763
2132
  Returns:
@@ -1765,7 +2134,7 @@ class FlowGraph:
1765
2134
  """
1766
2135
  is_running = self.flow_settings.is_running
1767
2136
  if self.latest_run_info is None:
1768
- return
2137
+ return self.create_empty_run_information()
1769
2138
 
1770
2139
  elif not is_running and self.latest_run_info.success is not None:
1771
2140
  return self.latest_run_info
@@ -1776,7 +2145,7 @@ class FlowGraph:
1776
2145
  return run_info
1777
2146
 
1778
2147
  @property
1779
- def node_connections(self) -> List[Tuple[int, int]]:
2148
+ def node_connections(self) -> list[tuple[int, int]]:
1780
2149
  """Computes and returns a list of all connections in the graph.
1781
2150
 
1782
2151
  Returns:
@@ -1786,8 +2155,9 @@ class FlowGraph:
1786
2155
  for node in self.nodes:
1787
2156
  outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
1788
2157
  incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
1789
- node_connections = [c for c in outgoing_connections + incoming_connections if (c[0] is not None
1790
- and c[1] is not None)]
2158
+ node_connections = [
2159
+ c for c in outgoing_connections + incoming_connections if (c[0] is not None and c[1] is not None)
2160
+ ]
1791
2161
  for node_connection in node_connections:
1792
2162
  if node_connection not in connections:
1793
2163
  connections.add(node_connection)
@@ -1806,22 +2176,60 @@ class FlowGraph:
1806
2176
  node = self._node_db[node_id]
1807
2177
  return node.get_node_data(flow_id=self.flow_id, include_example=include_example)
1808
2178
 
2179
+ def get_flowfile_data(self) -> schemas.FlowfileData:
2180
+ start_node_ids = {v.node_id for v in self._flow_starts}
2181
+
2182
+ nodes = []
2183
+ for node in self.nodes:
2184
+ node_info = node.get_node_information()
2185
+ flowfile_node = schemas.FlowfileNode(
2186
+ id=node_info.id,
2187
+ type=node_info.type,
2188
+ is_start_node=node.node_id in start_node_ids,
2189
+ description=node_info.description,
2190
+ x_position=int(node_info.x_position),
2191
+ y_position=int(node_info.y_position),
2192
+ left_input_id=node_info.left_input_id,
2193
+ right_input_id=node_info.right_input_id,
2194
+ input_ids=node_info.input_ids,
2195
+ outputs=node_info.outputs,
2196
+ setting_input=node_info.setting_input,
2197
+ )
2198
+ nodes.append(flowfile_node)
2199
+
2200
+ settings = schemas.FlowfileSettings(
2201
+ description=self.flow_settings.description,
2202
+ execution_mode=self.flow_settings.execution_mode,
2203
+ execution_location=self.flow_settings.execution_location,
2204
+ auto_save=self.flow_settings.auto_save,
2205
+ show_detailed_progress=self.flow_settings.show_detailed_progress,
2206
+ )
2207
+ return schemas.FlowfileData(
2208
+ flowfile_version=__version__,
2209
+ flowfile_id=self.flow_id,
2210
+ flowfile_name=self.__name__,
2211
+ flowfile_settings=settings,
2212
+ nodes=nodes,
2213
+ )
2214
+
1809
2215
  def get_node_storage(self) -> schemas.FlowInformation:
1810
2216
  """Serializes the entire graph's state into a storable format.
1811
2217
 
1812
2218
  Returns:
1813
2219
  A FlowInformation object representing the complete graph.
1814
2220
  """
1815
- node_information = {node.node_id: node.get_node_information() for
1816
- node in self.nodes if node.is_setup and node.is_correct}
2221
+ node_information = {
2222
+ node.node_id: node.get_node_information() for node in self.nodes if node.is_setup and node.is_correct
2223
+ }
1817
2224
 
1818
- return schemas.FlowInformation(flow_id=self.flow_id,
1819
- flow_name=self.__name__,
1820
- flow_settings=self.flow_settings,
1821
- data=node_information,
1822
- node_starts=[v.node_id for v in self._flow_starts],
1823
- node_connections=self.node_connections
1824
- )
2225
+ return schemas.FlowInformation(
2226
+ flow_id=self.flow_id,
2227
+ flow_name=self.__name__,
2228
+ flow_settings=self.flow_settings,
2229
+ data=node_information,
2230
+ node_starts=[v.node_id for v in self._flow_starts],
2231
+ node_connections=self.node_connections,
2232
+ )
1825
2233
 
1826
2234
  def cancel(self):
1827
2235
  """Cancels an ongoing graph execution."""
@@ -1838,19 +2246,67 @@ class FlowGraph:
1838
2246
  for node in self.nodes:
1839
2247
  node.remove_cache()
1840
2248
 
2249
+ def _handle_flow_renaming(self, new_name: str, new_path: Path):
2250
+ """
2251
+ Handle the rename of a flow when it is being saved.
2252
+ """
2253
+ if (
2254
+ self.flow_settings
2255
+ and self.flow_settings.path
2256
+ and Path(self.flow_settings.path).absolute() != new_path.absolute()
2257
+ ):
2258
+ self.__name__ = new_name
2259
+ self.flow_settings.save_location = str(new_path.absolute())
2260
+ self.flow_settings.name = new_name
2261
+ if self.flow_settings and not self.flow_settings.save_location:
2262
+ self.flow_settings.save_location = str(new_path.absolute())
2263
+ self.__name__ = new_name
2264
+ self.flow_settings.name = new_name
2265
+
1841
2266
  def save_flow(self, flow_path: str):
1842
2267
  """Saves the current state of the flow graph to a file.
1843
2268
 
2269
+ Supports multiple formats based on file extension:
2270
+ - .yaml / .yml: New YAML format
2271
+ - .json: JSON format
2272
+
1844
2273
  Args:
1845
2274
  flow_path: The path where the flow file will be saved.
1846
2275
  """
1847
2276
  logger.info("Saving flow to %s", flow_path)
1848
- os.makedirs(os.path.dirname(flow_path), exist_ok=True)
2277
+ path = Path(flow_path)
2278
+ os.makedirs(path.parent, exist_ok=True)
2279
+ suffix = path.suffix.lower()
2280
+ new_flow_name = path.name.replace(suffix, "")
2281
+ self._handle_flow_renaming(new_flow_name, path)
2282
+ self.flow_settings.modified_on = datetime.datetime.now().timestamp()
1849
2283
  try:
1850
- with open(flow_path, 'wb') as f:
1851
- pickle.dump(self.get_node_storage(), f)
2284
+ if suffix == ".flowfile":
2285
+ raise DeprecationWarning(
2286
+ "The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
2287
+ "Or stay on v0.4.1 if you still need .flowfile support.\n\n"
2288
+ )
2289
+ elif suffix in (".yaml", ".yml"):
2290
+ flowfile_data = self.get_flowfile_data()
2291
+ data = flowfile_data.model_dump(mode="json")
2292
+ with open(flow_path, "w", encoding="utf-8") as f:
2293
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
2294
+ elif suffix == ".json":
2295
+ flowfile_data = self.get_flowfile_data()
2296
+ data = flowfile_data.model_dump(mode="json")
2297
+ with open(flow_path, "w", encoding="utf-8") as f:
2298
+ json.dump(data, f, indent=2, ensure_ascii=False)
2299
+
2300
+ else:
2301
+ flowfile_data = self.get_flowfile_data()
2302
+ logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
2303
+ data = flowfile_data.model_dump(mode="json")
2304
+ with open(flow_path, "w", encoding="utf-8") as f:
2305
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
2306
+
1852
2307
  except Exception as e:
1853
2308
  logger.error(f"Error saving flow: {e}")
2309
+ raise
1854
2310
 
1855
2311
  self.flow_settings.path = flow_path
1856
2312
 
@@ -1863,11 +2319,7 @@ class FlowGraph:
1863
2319
  Returns:
1864
2320
  A dictionary representing the graph in Drawflow format.
1865
2321
  """
1866
- result = {
1867
- 'Home': {
1868
- "data": {}
1869
- }
1870
- }
2322
+ result = {"Home": {"data": {}}}
1871
2323
  flow_info: schemas.FlowInformation = self.get_node_storage()
1872
2324
 
1873
2325
  for node_id, node_info in flow_info.data.items():
@@ -1886,7 +2338,7 @@ class FlowGraph:
1886
2338
  "inputs": {},
1887
2339
  "outputs": {},
1888
2340
  "pos_x": pos_x,
1889
- "pos_y": pos_y
2341
+ "pos_y": pos_y,
1890
2342
  }
1891
2343
  except Exception as e:
1892
2344
  logger.error(e)
@@ -1900,24 +2352,27 @@ class FlowGraph:
1900
2352
  leading_to_node = self.get_node(output_node_id)
1901
2353
  input_types = leading_to_node.get_input_type(node_info.id)
1902
2354
  for input_type in input_types:
1903
- if input_type == 'main':
1904
- input_frontend_id = 'input_1'
1905
- elif input_type == 'right':
1906
- input_frontend_id = 'input_2'
1907
- elif input_type == 'left':
1908
- input_frontend_id = 'input_3'
2355
+ if input_type == "main":
2356
+ input_frontend_id = "input_1"
2357
+ elif input_type == "right":
2358
+ input_frontend_id = "input_2"
2359
+ elif input_type == "left":
2360
+ input_frontend_id = "input_3"
1909
2361
  else:
1910
- input_frontend_id = 'input_1'
2362
+ input_frontend_id = "input_1"
1911
2363
  connection = {"node": str(output_node_id), "input": input_frontend_id}
1912
2364
  connections.append(connection)
1913
2365
 
1914
- result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
1915
- "connections": connections}
2366
+ result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {"connections": connections}
1916
2367
  else:
1917
2368
  result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
1918
2369
 
1919
2370
  # Add input to the node based on `depending_on_id` in your backend data
1920
- if node_info.left_input_id is not None or node_info.right_input_id is not None or node_info.input_ids is not None:
2371
+ if (
2372
+ node_info.left_input_id is not None
2373
+ or node_info.right_input_id is not None
2374
+ or node_info.input_ids is not None
2375
+ ):
1921
2376
  main_inputs = node_info.main_input_ids
1922
2377
  result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
1923
2378
  "connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
@@ -1938,8 +2393,8 @@ class FlowGraph:
1938
2393
  Returns:
1939
2394
  A VueFlowInput object.
1940
2395
  """
1941
- edges: List[schemas.NodeEdge] = []
1942
- nodes: List[schemas.NodeInput] = []
2396
+ edges: list[schemas.NodeEdge] = []
2397
+ nodes: list[schemas.NodeInput] = []
1943
2398
  for node in self.nodes:
1944
2399
  nodes.append(node.get_node_input())
1945
2400
  edges.extend(node.get_edge_input())
@@ -1951,7 +2406,9 @@ class FlowGraph:
1951
2406
  for node in self.nodes:
1952
2407
  node.reset(True)
1953
2408
 
1954
- def copy_node(self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str) -> None:
2409
+ def copy_node(
2410
+ self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str
2411
+ ) -> None:
1955
2412
  """Creates a copy of an existing node.
1956
2413
 
1957
2414
  Args:
@@ -1964,9 +2421,7 @@ class FlowGraph:
1964
2421
  if isinstance(existing_setting_input, input_schema.NodePromise):
1965
2422
  return
1966
2423
 
1967
- combined_settings = combine_existing_settings_and_new_settings(
1968
- existing_setting_input, new_node_settings
1969
- )
2424
+ combined_settings = combine_existing_settings_and_new_settings(existing_setting_input, new_node_settings)
1970
2425
  getattr(self, f"add_{node_type}")(combined_settings)
1971
2426
 
1972
2427
  def generate_code(self):
@@ -1974,6 +2429,7 @@ class FlowGraph:
1974
2429
  This method exports the flow graph to a Polars-compatible format.
1975
2430
  """
1976
2431
  from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
2432
+
1977
2433
  print(export_flow_to_polars(self))
1978
2434
 
1979
2435
 
@@ -1992,13 +2448,7 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
1992
2448
  copied_setting_input = deepcopy(setting_input)
1993
2449
 
1994
2450
  # Update only attributes that exist on new_settings
1995
- fields_to_update = (
1996
- "node_id",
1997
- "pos_x",
1998
- "pos_y",
1999
- "description",
2000
- "flow_id"
2001
- )
2451
+ fields_to_update = ("node_id", "pos_x", "pos_y", "description", "flow_id")
2002
2452
 
2003
2453
  for field in fields_to_update:
2004
2454
  if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
@@ -2014,12 +2464,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
2014
2464
  flow: The FlowGraph instance to modify.
2015
2465
  node_connection: An object defining the source and target of the connection.
2016
2466
  """
2017
- logger.info('adding a connection')
2467
+ logger.info("adding a connection")
2018
2468
  from_node = flow.get_node(node_connection.output_connection.node_id)
2019
2469
  to_node = flow.get_node(node_connection.input_connection.node_id)
2020
- logger.info(f'from_node={from_node}, to_node={to_node}')
2470
+ logger.info(f"from_node={from_node}, to_node={to_node}")
2021
2471
  if not (from_node and to_node):
2022
- raise HTTPException(404, 'Not not available')
2472
+ raise HTTPException(404, "Not not available")
2023
2473
  else:
2024
2474
  to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
2025
2475