Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +178 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
  140. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
  143. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
  149. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. flowfile_core/__init__.py +13 -6
  154. flowfile_core/auth/jwt.py +51 -16
  155. flowfile_core/auth/models.py +32 -7
  156. flowfile_core/auth/password.py +89 -0
  157. flowfile_core/auth/secrets.py +8 -6
  158. flowfile_core/configs/__init__.py +9 -7
  159. flowfile_core/configs/flow_logger.py +15 -14
  160. flowfile_core/configs/node_store/__init__.py +72 -4
  161. flowfile_core/configs/node_store/nodes.py +155 -172
  162. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  163. flowfile_core/configs/settings.py +28 -15
  164. flowfile_core/database/connection.py +7 -6
  165. flowfile_core/database/init_db.py +96 -2
  166. flowfile_core/database/models.py +3 -1
  167. flowfile_core/fileExplorer/__init__.py +17 -0
  168. flowfile_core/fileExplorer/funcs.py +123 -57
  169. flowfile_core/fileExplorer/utils.py +10 -11
  170. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  171. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  172. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  173. flowfile_core/flowfile/analytics/utils.py +1 -1
  174. flowfile_core/flowfile/code_generator/code_generator.py +358 -244
  175. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  176. flowfile_core/flowfile/connection_manager/models.py +1 -1
  177. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  178. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/extensions.py +17 -12
  180. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  181. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  182. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
  183. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  184. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  188. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  189. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  190. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  191. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  192. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  193. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  194. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  195. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  196. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  197. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  199. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  200. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  201. flowfile_core/flowfile/flow_graph.py +918 -571
  202. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  203. flowfile_core/flowfile/flow_node/flow_node.py +330 -233
  204. flowfile_core/flowfile/flow_node/models.py +53 -41
  205. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  206. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  207. flowfile_core/flowfile/handler.py +80 -30
  208. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  209. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  210. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  211. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  212. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  213. flowfile_core/flowfile/node_designer/ui_components.py +135 -34
  214. flowfile_core/flowfile/schema_callbacks.py +71 -51
  215. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  216. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  217. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  218. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  219. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  220. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  221. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  222. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  223. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  224. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  225. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  226. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  227. flowfile_core/flowfile/util/node_skipper.py +4 -4
  228. flowfile_core/flowfile/utils.py +19 -21
  229. flowfile_core/main.py +26 -19
  230. flowfile_core/routes/auth.py +284 -11
  231. flowfile_core/routes/cloud_connections.py +25 -25
  232. flowfile_core/routes/logs.py +21 -29
  233. flowfile_core/routes/public.py +3 -3
  234. flowfile_core/routes/routes.py +70 -34
  235. flowfile_core/routes/secrets.py +25 -27
  236. flowfile_core/routes/user_defined_components.py +483 -4
  237. flowfile_core/run_lock.py +0 -1
  238. flowfile_core/schemas/__init__.py +4 -6
  239. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  240. flowfile_core/schemas/cloud_storage_schemas.py +59 -53
  241. flowfile_core/schemas/input_schema.py +231 -144
  242. flowfile_core/schemas/output_model.py +49 -34
  243. flowfile_core/schemas/schemas.py +116 -89
  244. flowfile_core/schemas/transform_schema.py +518 -263
  245. flowfile_core/schemas/yaml_types.py +21 -7
  246. flowfile_core/secret_manager/secret_manager.py +17 -13
  247. flowfile_core/types.py +29 -9
  248. flowfile_core/utils/arrow_reader.py +7 -6
  249. flowfile_core/utils/excel_file_manager.py +3 -3
  250. flowfile_core/utils/fileManager.py +7 -7
  251. flowfile_core/utils/fl_executor.py +8 -10
  252. flowfile_core/utils/utils.py +4 -4
  253. flowfile_core/utils/validate_setup.py +5 -4
  254. flowfile_frame/__init__.py +106 -51
  255. flowfile_frame/adapters.py +2 -9
  256. flowfile_frame/adding_expr.py +73 -32
  257. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  258. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  259. flowfile_frame/config.py +2 -5
  260. flowfile_frame/expr.py +311 -218
  261. flowfile_frame/expr.pyi +160 -159
  262. flowfile_frame/expr_name.py +23 -23
  263. flowfile_frame/flow_frame.py +571 -476
  264. flowfile_frame/flow_frame.pyi +123 -104
  265. flowfile_frame/flow_frame_methods.py +227 -246
  266. flowfile_frame/group_frame.py +50 -20
  267. flowfile_frame/join.py +2 -2
  268. flowfile_frame/lazy.py +129 -87
  269. flowfile_frame/lazy_methods.py +83 -30
  270. flowfile_frame/list_name_space.py +55 -50
  271. flowfile_frame/selectors.py +148 -68
  272. flowfile_frame/series.py +9 -7
  273. flowfile_frame/utils.py +19 -21
  274. flowfile_worker/__init__.py +12 -7
  275. flowfile_worker/configs.py +11 -19
  276. flowfile_worker/create/__init__.py +14 -9
  277. flowfile_worker/create/funcs.py +114 -77
  278. flowfile_worker/create/models.py +46 -43
  279. flowfile_worker/create/pl_types.py +14 -15
  280. flowfile_worker/create/read_excel_tables.py +34 -41
  281. flowfile_worker/create/utils.py +22 -19
  282. flowfile_worker/external_sources/s3_source/main.py +18 -51
  283. flowfile_worker/external_sources/s3_source/models.py +34 -27
  284. flowfile_worker/external_sources/sql_source/main.py +8 -5
  285. flowfile_worker/external_sources/sql_source/models.py +13 -9
  286. flowfile_worker/flow_logger.py +10 -8
  287. flowfile_worker/funcs.py +214 -155
  288. flowfile_worker/main.py +11 -17
  289. flowfile_worker/models.py +35 -28
  290. flowfile_worker/process_manager.py +2 -3
  291. flowfile_worker/routes.py +121 -90
  292. flowfile_worker/secrets.py +9 -6
  293. flowfile_worker/spawner.py +80 -49
  294. flowfile_worker/utils.py +3 -2
  295. shared/__init__.py +2 -7
  296. shared/storage_config.py +25 -13
  297. test_utils/postgres/commands.py +3 -2
  298. test_utils/postgres/fixtures.py +9 -9
  299. test_utils/s3/commands.py +1 -1
  300. test_utils/s3/data_generator.py +3 -4
  301. test_utils/s3/demo_data_generator.py +4 -7
  302. test_utils/s3/fixtures.py +7 -5
  303. tools/migrate/__init__.py +1 -1
  304. tools/migrate/__main__.py +16 -29
  305. tools/migrate/legacy_schemas.py +251 -190
  306. tools/migrate/migrate.py +193 -181
  307. tools/migrate/tests/conftest.py +1 -3
  308. tools/migrate/tests/test_migrate.py +36 -41
  309. tools/migrate/tests/test_migration_e2e.py +28 -29
  310. tools/migrate/tests/test_node_migrations.py +50 -20
  311. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  312. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  313. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  314. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  315. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  316. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  317. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  318. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  319. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  320. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  321. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  322. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  323. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  324. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  325. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  326. flowfile-0.5.1.dist-info/RECORD +0 -388
  327. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
  328. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
  329. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,80 +1,103 @@
1
1
  import datetime
2
-
3
- import os
4
- import yaml
5
2
  import json
6
-
7
- import polars as pl
3
+ import os
4
+ from collections.abc import Callable
5
+ from copy import deepcopy
6
+ from functools import partial
7
+ from importlib.metadata import PackageNotFoundError, version
8
8
  from pathlib import Path
9
+ from time import time
10
+ from typing import Any, Literal, Union
11
+ from uuid import uuid1
9
12
 
10
13
  import fastexcel
14
+ import polars as pl
15
+ import yaml
11
16
  from fastapi.exceptions import HTTPException
12
- from time import time
13
- from functools import partial
14
- from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
15
- from uuid import uuid1
16
- from copy import deepcopy
17
17
  from pyarrow.parquet import ParquetFile
18
+
18
19
  from flowfile_core.configs import logger
19
20
  from flowfile_core.configs.flow_logger import FlowLogger
20
- from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
21
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
22
-
21
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
22
+ from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
23
+ from flowfile_core.flowfile.database_connection_manager.db_connections import (
24
+ get_local_cloud_connection,
25
+ get_local_database_connection,
26
+ )
23
27
  from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
24
- from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
25
- from flowfile_core.utils.arrow_reader import get_read_top_n
26
28
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
27
- from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (get_open_xlsx_datatypes,
28
- get_calamine_xlsx_data_types)
29
-
30
- from flowfile_core.flowfile.schema_callbacks import (calculate_fuzzy_match_schema, pre_calculate_pivot_schema)
29
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
30
+ from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
31
+ from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (
32
+ get_calamine_xlsx_data_types,
33
+ get_open_xlsx_datatypes,
34
+ )
35
+ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
36
+ ExternalCloudWriter,
37
+ ExternalDatabaseFetcher,
38
+ ExternalDatabaseWriter,
39
+ ExternalDfFetcher,
40
+ )
41
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
42
+ from flowfile_core.flowfile.graph_tree.graph_tree import (
43
+ add_un_drawn_nodes,
44
+ build_flow_paths,
45
+ build_node_info,
46
+ calculate_depth,
47
+ define_node_connections,
48
+ draw_merged_paths,
49
+ draw_standalone_paths,
50
+ group_nodes_by_depth,
51
+ )
52
+ from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
53
+ from flowfile_core.flowfile.schema_callbacks import calculate_fuzzy_match_schema, pre_calculate_pivot_schema
31
54
  from flowfile_core.flowfile.sources import external_sources
55
+ from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
56
+ from flowfile_core.flowfile.sources.external_sources.sql_source import models as sql_models
57
+ from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils
58
+ from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import BaseSqlSource, SqlSource
59
+ from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
60
+ from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
61
+ from flowfile_core.flowfile.utils import snake_case_to_camel_case
32
62
  from flowfile_core.schemas import input_schema, schemas, transform_schema
63
+ from flowfile_core.schemas.cloud_storage_schemas import (
64
+ AuthMethod,
65
+ CloudStorageReadSettingsInternal,
66
+ CloudStorageWriteSettingsInternal,
67
+ FullCloudStorageConnection,
68
+ get_cloud_storage_write_settings_worker_interface,
69
+ )
33
70
  from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
34
- from flowfile_core.schemas.cloud_storage_schemas import (CloudStorageReadSettingsInternal,
35
- CloudStorageWriteSettingsInternal,
36
- FullCloudStorageConnection,
37
- get_cloud_storage_write_settings_worker_interface, AuthMethod)
38
- from flowfile_core.flowfile.utils import snake_case_to_camel_case
39
- from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
40
- from flowfile_core.flowfile.flow_node.flow_node import FlowNode
41
- from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
42
- from flowfile_core.flowfile.graph_tree.graph_tree import (add_un_drawn_nodes, build_flow_paths,
43
- build_node_info, calculate_depth,
44
- define_node_connections, draw_merged_paths,
45
- draw_standalone_paths, group_nodes_by_depth)
46
- from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
47
- from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalDatabaseFetcher,
48
- ExternalDatabaseWriter,
49
- ExternalDfFetcher,
50
- ExternalCloudWriter)
51
- from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
52
- from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
53
- from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
54
- from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
55
- get_local_cloud_connection)
56
- from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
57
- from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
58
- from importlib.metadata import version, PackageNotFoundError
71
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
72
+ from flowfile_core.secret_manager.secret_manager import decrypt_secret, get_encrypted_secret
73
+ from flowfile_core.utils.arrow_reader import get_read_top_n
59
74
 
60
75
  try:
61
76
  __version__ = version("Flowfile")
62
77
  except PackageNotFoundError:
63
- __version__ = "0.0.0-dev"
78
+ __version__ = "0.5.0"
64
79
 
65
80
 
66
81
  def represent_list_json(dumper, data):
67
82
  """Use inline style for short simple lists, block style for complex ones."""
68
83
  if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
69
- return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True)
70
- return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=False)
84
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
85
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
71
86
 
72
87
 
73
88
  yaml.add_representer(list, represent_list_json)
74
89
 
75
90
 
76
- def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
77
- end_row: int, end_column: int, has_headers: bool):
91
+ def get_xlsx_schema(
92
+ engine: str,
93
+ file_path: str,
94
+ sheet_name: str,
95
+ start_row: int,
96
+ start_column: int,
97
+ end_row: int,
98
+ end_column: int,
99
+ has_headers: bool,
100
+ ):
78
101
  """Calculates the schema of an XLSX file by reading a sample of rows.
79
102
 
80
103
  Args:
@@ -91,27 +114,29 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
91
114
  A list of FlowfileColumn objects representing the schema.
92
115
  """
93
116
  try:
94
- logger.info('Starting to calculate the schema')
95
- if engine == 'openpyxl':
117
+ logger.info("Starting to calculate the schema")
118
+ if engine == "openpyxl":
96
119
  max_col = end_column if end_column > 0 else None
97
- return get_open_xlsx_datatypes(file_path=file_path,
98
- sheet_name=sheet_name,
99
- min_row=start_row + 1,
100
- min_col=start_column + 1,
101
- max_row=100,
102
- max_col=max_col, has_headers=has_headers)
103
- elif engine == 'calamine':
104
- return get_calamine_xlsx_data_types(file_path=file_path,
105
- sheet_name=sheet_name,
106
- start_row=start_row,
107
- end_row=end_row)
108
- logger.info('done calculating the schema')
120
+ return get_open_xlsx_datatypes(
121
+ file_path=file_path,
122
+ sheet_name=sheet_name,
123
+ min_row=start_row + 1,
124
+ min_col=start_column + 1,
125
+ max_row=100,
126
+ max_col=max_col,
127
+ has_headers=has_headers,
128
+ )
129
+ elif engine == "calamine":
130
+ return get_calamine_xlsx_data_types(
131
+ file_path=file_path, sheet_name=sheet_name, start_row=start_row, end_row=end_row
132
+ )
133
+ logger.info("done calculating the schema")
109
134
  except Exception as e:
110
135
  logger.error(e)
111
136
  return []
112
137
 
113
138
 
114
- def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
139
+ def skip_node_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
115
140
  """Logs a warning message listing all nodes that will be skipped during execution.
116
141
 
117
142
  Args:
@@ -120,10 +145,10 @@ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
120
145
  """
121
146
  if len(nodes) > 0:
122
147
  msg = "\n".join(str(node) for node in nodes)
123
- flow_logger.warning(f'skipping nodes:\n{msg}')
148
+ flow_logger.warning(f"skipping nodes:\n{msg}")
124
149
 
125
150
 
126
- def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
151
+ def execution_order_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
127
152
  """Logs an informational message showing the determined execution order of nodes.
128
153
 
129
154
  Args:
@@ -131,11 +156,19 @@ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> N
131
156
  nodes: A list of FlowNode objects in the order they will be executed.
132
157
  """
133
158
  msg = "\n".join(str(node) for node in nodes)
134
- flow_logger.info(f'execution order:\n{msg}')
135
-
136
-
137
- def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
138
- end_row: int, end_column: int, has_headers: bool):
159
+ flow_logger.info(f"execution order:\n{msg}")
160
+
161
+
162
+ def get_xlsx_schema_callback(
163
+ engine: str,
164
+ file_path: str,
165
+ sheet_name: str,
166
+ start_row: int,
167
+ start_column: int,
168
+ end_row: int,
169
+ end_column: int,
170
+ has_headers: bool,
171
+ ):
139
172
  """Creates a partially applied function for lazy calculation of an XLSX schema.
140
173
 
141
174
  Args:
@@ -151,12 +184,22 @@ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start
151
184
  Returns:
152
185
  A callable function that, when called, will execute `get_xlsx_schema`.
153
186
  """
154
- return partial(get_xlsx_schema, engine=engine, file_path=file_path, sheet_name=sheet_name, start_row=start_row,
155
- start_column=start_column, end_row=end_row, end_column=end_column, has_headers=has_headers)
187
+ return partial(
188
+ get_xlsx_schema,
189
+ engine=engine,
190
+ file_path=file_path,
191
+ sheet_name=sheet_name,
192
+ start_row=start_row,
193
+ start_column=start_column,
194
+ end_row=end_row,
195
+ end_column=end_column,
196
+ has_headers=has_headers,
197
+ )
156
198
 
157
199
 
158
- def get_cloud_connection_settings(connection_name: str,
159
- user_id: int, auth_mode: AuthMethod) -> FullCloudStorageConnection:
200
+ def get_cloud_connection_settings(
201
+ connection_name: str, user_id: int, auth_mode: AuthMethod
202
+ ) -> FullCloudStorageConnection:
160
203
  """Retrieves cloud storage connection settings, falling back to environment variables if needed.
161
204
 
162
205
  Args:
@@ -186,32 +229,44 @@ class FlowGraph:
186
229
 
187
230
  It manages nodes, connections, and the execution of the entire flow.
188
231
  """
232
+
189
233
  uuid: str
190
- depends_on: Dict[int, Union[ParquetFile, FlowDataEngine, "FlowGraph", pl.DataFrame,]]
234
+ depends_on: dict[
235
+ int,
236
+ Union[
237
+ ParquetFile,
238
+ FlowDataEngine,
239
+ "FlowGraph",
240
+ pl.DataFrame,
241
+ ],
242
+ ]
191
243
  _flow_id: int
192
244
  _input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
193
- _input_cols: List[str]
194
- _output_cols: List[str]
195
- _node_db: Dict[Union[str, int], FlowNode]
196
- _node_ids: List[Union[str, int]]
197
- _results: Optional[FlowDataEngine] = None
245
+ _input_cols: list[str]
246
+ _output_cols: list[str]
247
+ _node_db: dict[str | int, FlowNode]
248
+ _node_ids: list[str | int]
249
+ _results: FlowDataEngine | None = None
198
250
  cache_results: bool = False
199
- schema: Optional[List[FlowfileColumn]] = None
251
+ schema: list[FlowfileColumn] | None = None
200
252
  has_over_row_function: bool = False
201
- _flow_starts: List[Union[int, str]] = None
202
- latest_run_info: Optional[RunInformation] = None
253
+ _flow_starts: list[int | str] = None
254
+ latest_run_info: RunInformation | None = None
203
255
  start_datetime: datetime = None
204
256
  end_datetime: datetime = None
205
257
  _flow_settings: schemas.FlowSettings = None
206
258
  flow_logger: FlowLogger
207
259
 
208
- def __init__(self,
209
- flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
210
- name: str = None, input_cols: List[str] = None,
211
- output_cols: List[str] = None,
212
- path_ref: str = None,
213
- input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
214
- cache_results: bool = False):
260
+ def __init__(
261
+ self,
262
+ flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
263
+ name: str = None,
264
+ input_cols: list[str] = None,
265
+ output_cols: list[str] = None,
266
+ path_ref: str = None,
267
+ input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
268
+ cache_results: bool = False,
269
+ ):
215
270
  """Initializes a new FlowGraph instance.
216
271
 
217
272
  Args:
@@ -233,7 +288,7 @@ class FlowGraph:
233
288
  self.latest_run_info = None
234
289
  self._flow_id = flow_settings.flow_id
235
290
  self.flow_logger = FlowLogger(flow_settings.flow_id)
236
- self._flow_starts: List[FlowNode] = []
291
+ self._flow_starts: list[FlowNode] = []
237
292
  self._results = None
238
293
  self.schema = None
239
294
  self.has_over_row_function = False
@@ -255,13 +310,21 @@ class FlowGraph:
255
310
 
256
311
  @flow_settings.setter
257
312
  def flow_settings(self, flow_settings: schemas.FlowSettings):
258
- if (
259
- (self._flow_settings.execution_location != flow_settings.execution_location) or
260
- (self._flow_settings.execution_mode != flow_settings.execution_mode)
313
+ if (self._flow_settings.execution_location != flow_settings.execution_location) or (
314
+ self._flow_settings.execution_mode != flow_settings.execution_mode
261
315
  ):
262
316
  self.reset()
263
317
  self._flow_settings = flow_settings
264
318
 
319
+ def add_node_to_starting_list(self, node: FlowNode) -> None:
320
+ """Adds a node to the list of starting nodes for the flow if not already present.
321
+
322
+ Args:
323
+ node: The FlowNode to add as a starting node.
324
+ """
325
+ if node.node_id not in {self_node.node_id for self_node in self._flow_starts}:
326
+ self._flow_starts.append(node)
327
+
265
328
  def add_node_promise(self, node_promise: input_schema.NodePromise):
266
329
  """Adds a placeholder node to the graph that is not yet fully configured.
267
330
 
@@ -270,13 +333,31 @@ class FlowGraph:
270
333
  Args:
271
334
  node_promise: A promise object containing basic node information.
272
335
  """
336
+
273
337
  def placeholder(n: FlowNode = None):
274
338
  if n is None:
275
339
  return FlowDataEngine()
276
340
  return n
277
341
 
278
- self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=placeholder,
279
- setting_input=node_promise)
342
+ self.add_node_step(
343
+ node_id=node_promise.node_id,
344
+ node_type=node_promise.node_type,
345
+ function=placeholder,
346
+ setting_input=node_promise,
347
+ )
348
+ if node_promise.is_user_defined:
349
+ node_needs_settings: bool
350
+ custom_node = CUSTOM_NODE_STORE.get(node_promise.node_type)
351
+ if custom_node is None:
352
+ raise Exception(f"Custom node type '{node_promise.node_type}' not found in registry.")
353
+ settings_schema = custom_node.model_fields["settings_schema"].default
354
+ node_needs_settings = settings_schema is not None and not settings_schema.is_empty()
355
+ if not node_needs_settings:
356
+ user_defined_node_settings = input_schema.UserDefinedNode(settings={}, **node_promise.model_dump())
357
+ initialized_model = custom_node()
358
+ self.add_user_defined_node(
359
+ custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings
360
+ )
280
361
 
281
362
  def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
282
363
  """Calculates and applies a layered layout to all nodes in the graph.
@@ -304,20 +385,24 @@ class FlowGraph:
304
385
  updated_count = 0
305
386
  for node_id, (pos_x, pos_y) in new_positions.items():
306
387
  node = self.get_node(node_id)
307
- if node and hasattr(node, 'setting_input'):
388
+ if node and hasattr(node, "setting_input"):
308
389
  setting = node.setting_input
309
- if hasattr(setting, 'pos_x') and hasattr(setting, 'pos_y'):
390
+ if hasattr(setting, "pos_x") and hasattr(setting, "pos_y"):
310
391
  setting.pos_x = pos_x
311
392
  setting.pos_y = pos_y
312
393
  updated_count += 1
313
394
  else:
314
- self.flow_logger.warning(f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes.")
395
+ self.flow_logger.warning(
396
+ f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes."
397
+ )
315
398
  elif node:
316
399
  self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
317
400
  # else: Node not found, already warned by calculate_layered_layout
318
401
 
319
402
  end_time = time()
320
- self.flow_logger.info(f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds.")
403
+ self.flow_logger.info(
404
+ f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds."
405
+ )
321
406
 
322
407
  except Exception as e:
323
408
  self.flow_logger.error(f"Error applying layout: {e}")
@@ -337,13 +422,13 @@ class FlowGraph:
337
422
  """
338
423
  self._flow_id = new_id
339
424
  for node in self.nodes:
340
- if hasattr(node.setting_input, 'flow_id'):
425
+ if hasattr(node.setting_input, "flow_id"):
341
426
  node.setting_input.flow_id = new_id
342
427
  self.flow_settings.flow_id = new_id
343
428
 
344
429
  def __repr__(self):
345
430
  """Provides the official string representation of the FlowGraph instance."""
346
- settings_str = " -" + '\n -'.join(f"{k}: {v}" for k, v in self.flow_settings)
431
+ settings_str = " -" + "\n -".join(f"{k}: {v}" for k, v in self.flow_settings)
347
432
  return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
348
433
 
349
434
  def print_tree(self):
@@ -361,7 +446,7 @@ class FlowGraph:
361
446
 
362
447
  # Group nodes by depth
363
448
  depth_groups, max_depth = group_nodes_by_depth(node_info)
364
-
449
+
365
450
  # Sort nodes within each depth group
366
451
  for depth in depth_groups:
367
452
  depth_groups[depth].sort()
@@ -371,7 +456,7 @@ class FlowGraph:
371
456
 
372
457
  # Track which nodes connect to what
373
458
  merge_points = define_node_connections(node_info)
374
-
459
+
375
460
  # Build the flow paths
376
461
 
377
462
  # Find the maximum label length for each depth level
@@ -380,15 +465,15 @@ class FlowGraph:
380
465
  if depth in depth_groups:
381
466
  max_len = max(len(node_info[nid].label) for nid in depth_groups[depth])
382
467
  max_label_length[depth] = max_len
383
-
468
+
384
469
  # Draw the paths
385
470
  drawn_nodes = set()
386
471
  merge_drawn = set()
387
-
472
+
388
473
  # Group paths by their merge points
389
474
  paths_by_merge = {}
390
475
  standalone_paths = []
391
-
476
+
392
477
  # Build flow paths
393
478
  paths = build_flow_paths(node_info, self._flow_starts, merge_points)
394
479
 
@@ -410,22 +495,22 @@ class FlowGraph:
410
495
 
411
496
  # Add undrawn nodes
412
497
  add_un_drawn_nodes(drawn_nodes, node_info, lines)
413
-
498
+
414
499
  try:
415
500
  skip_nodes, ordered_nodes = compute_execution_plan(
416
- nodes=self.nodes,
417
- flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
501
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
502
+ )
418
503
  if ordered_nodes:
419
504
  for i, node in enumerate(ordered_nodes, 1):
420
505
  lines.append(f" {i:3d}. {node_info[node.node_id].label}")
421
506
  except Exception as e:
422
507
  lines.append(f" Could not determine execution order: {e}")
423
-
508
+
424
509
  # Print everything
425
510
  output = "\n".join(lines)
426
-
511
+
427
512
  print(output)
428
-
513
+
429
514
  def get_nodes_overview(self):
430
515
  """Gets a list of dictionary representations for all nodes in the graph."""
431
516
  output = []
@@ -433,7 +518,7 @@ class FlowGraph:
433
518
  output.append(v.get_repr())
434
519
  return output
435
520
 
436
- def remove_from_output_cols(self, columns: List[str]):
521
+ def remove_from_output_cols(self, columns: list[str]):
437
522
  """Removes specified columns from the list of expected output columns.
438
523
 
439
524
  Args:
@@ -442,7 +527,7 @@ class FlowGraph:
442
527
  cols = set(columns)
443
528
  self._output_cols = [c for c in self._output_cols if c not in cols]
444
529
 
445
- def get_node(self, node_id: Union[int, str] = None) -> FlowNode | None:
530
+ def get_node(self, node_id: int | str = None) -> FlowNode | None:
446
531
  """Retrieves a node from the graph by its ID.
447
532
 
448
533
  Args:
@@ -456,24 +541,43 @@ class FlowGraph:
456
541
  node = self._node_db.get(node_id)
457
542
  if node is not None:
458
543
  return node
459
-
460
- def add_user_defined_node(self, *,
461
- custom_node: CustomNodeBase,
462
- user_defined_node_settings: input_schema.UserDefinedNode
463
- ):
464
-
465
- def _func(*fdes: FlowDataEngine) -> FlowDataEngine | None:
466
- output = custom_node.process(*(fde.data_frame for fde in fdes))
467
- if isinstance(output, pl.LazyFrame | pl.DataFrame):
544
+
545
+ def add_user_defined_node(
546
+ self, *, custom_node: CustomNodeBase, user_defined_node_settings: input_schema.UserDefinedNode
547
+ ):
548
+ """Adds a user-defined custom node to the graph.
549
+
550
+ Args:
551
+ custom_node: The custom node instance to add.
552
+ user_defined_node_settings: The settings for the user-defined node.
553
+ """
554
+
555
+ def _func(*flow_data_engine: FlowDataEngine) -> FlowDataEngine | None:
556
+ user_id = user_defined_node_settings.user_id
557
+ if user_id is not None:
558
+ custom_node.set_execution_context(user_id)
559
+ if custom_node.settings_schema:
560
+ custom_node.settings_schema.set_secret_context(user_id, custom_node.accessed_secrets)
561
+
562
+ output = custom_node.process(*(fde.data_frame for fde in flow_data_engine))
563
+
564
+ accessed_secrets = custom_node.get_accessed_secrets()
565
+ if accessed_secrets:
566
+ logger.info(f"Node '{user_defined_node_settings.node_id}' accessed secrets: {accessed_secrets}")
567
+ if isinstance(output, (pl.LazyFrame, pl.DataFrame)):
468
568
  return FlowDataEngine(output)
469
569
  return None
470
-
471
- self.add_node_step(node_id=user_defined_node_settings.node_id,
472
- function=_func,
473
- setting_input=user_defined_node_settings,
474
- input_node_ids=user_defined_node_settings.depending_on_ids,
475
- node_type=custom_node.item,
476
- )
570
+
571
+ self.add_node_step(
572
+ node_id=user_defined_node_settings.node_id,
573
+ function=_func,
574
+ setting_input=user_defined_node_settings,
575
+ input_node_ids=user_defined_node_settings.depending_on_ids,
576
+ node_type=custom_node.item,
577
+ )
578
+ if custom_node.number_of_inputs == 0:
579
+ node = self.get_node(user_defined_node_settings.node_id)
580
+ self.add_node_to_starting_list(node)
477
581
 
478
582
  def add_pivot(self, pivot_settings: input_schema.NodePivot):
479
583
  """Adds a pivot node to the graph.
@@ -485,11 +589,13 @@ class FlowGraph:
485
589
  def _func(fl: FlowDataEngine):
486
590
  return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
487
591
 
488
- self.add_node_step(node_id=pivot_settings.node_id,
489
- function=_func,
490
- node_type='pivot',
491
- setting_input=pivot_settings,
492
- input_node_ids=[pivot_settings.depending_on_id])
592
+ self.add_node_step(
593
+ node_id=pivot_settings.node_id,
594
+ function=_func,
595
+ node_type="pivot",
596
+ setting_input=pivot_settings,
597
+ input_node_ids=[pivot_settings.depending_on_id],
598
+ )
493
599
 
494
600
  node = self.get_node(pivot_settings.node_id)
495
601
 
@@ -498,6 +604,7 @@ class FlowGraph:
498
604
  input_data.lazy = True # ensure the dataset is lazy
499
605
  input_lf = input_data.data_frame # get the lazy frame
500
606
  return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
607
+
501
608
  node.schema_callback = schema_callback
502
609
 
503
610
  def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
@@ -510,11 +617,13 @@ class FlowGraph:
510
617
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
511
618
  return fl.unpivot(unpivot_settings.unpivot_input)
512
619
 
513
- self.add_node_step(node_id=unpivot_settings.node_id,
514
- function=_func,
515
- node_type='unpivot',
516
- setting_input=unpivot_settings,
517
- input_node_ids=[unpivot_settings.depending_on_id])
620
+ self.add_node_step(
621
+ node_id=unpivot_settings.node_id,
622
+ function=_func,
623
+ node_type="unpivot",
624
+ setting_input=unpivot_settings,
625
+ input_node_ids=[unpivot_settings.depending_on_id],
626
+ )
518
627
 
519
628
  def add_union(self, union_settings: input_schema.NodeUnion):
520
629
  """Adds a union node to combine multiple data streams.
@@ -524,14 +633,16 @@ class FlowGraph:
524
633
  """
525
634
 
526
635
  def _func(*flowfile_tables: FlowDataEngine):
527
- dfs: List[pl.LazyFrame] | List[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
528
- return FlowDataEngine(pl.concat(dfs, how='diagonal_relaxed'))
636
+ dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
637
+ return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
529
638
 
530
- self.add_node_step(node_id=union_settings.node_id,
531
- function=_func,
532
- node_type=f'union',
533
- setting_input=union_settings,
534
- input_node_ids=union_settings.depending_on_ids)
639
+ self.add_node_step(
640
+ node_id=union_settings.node_id,
641
+ function=_func,
642
+ node_type="union",
643
+ setting_input=union_settings,
644
+ input_node_ids=union_settings.depending_on_ids,
645
+ )
535
646
 
536
647
  def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
537
648
  """Adds a data exploration/analysis node based on a node promise.
@@ -559,13 +670,14 @@ class FlowGraph:
559
670
  flowfile_table = flowfile_table.get_sample(sample_size, random=True)
560
671
  external_sampler = ExternalDfFetcher(
561
672
  lf=flowfile_table.data_frame,
562
- file_ref="__gf_walker"+node.hash,
673
+ file_ref="__gf_walker" + node.hash,
563
674
  wait_on_completion=True,
564
675
  node_id=node.node_id,
565
676
  flow_id=self.flow_id,
566
677
  )
567
- node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref,
568
- n=min(sample_size, number_of_records))
678
+ node.results.analysis_data_generator = get_read_top_n(
679
+ external_sampler.status.file_ref, n=min(sample_size, number_of_records)
680
+ )
569
681
  return flowfile_table
570
682
 
571
683
  def schema_callback():
@@ -574,11 +686,15 @@ class FlowGraph:
574
686
  input_node = node.all_inputs[0]
575
687
  return input_node.schema
576
688
  else:
577
- return [FlowfileColumn.from_input('col_1', 'na')]
689
+ return [FlowfileColumn.from_input("col_1", "na")]
578
690
 
579
- self.add_node_step(node_id=node_analysis.node_id, node_type='explore_data',
580
- function=analysis_preparation,
581
- setting_input=node_analysis, schema_callback=schema_callback)
691
+ self.add_node_step(
692
+ node_id=node_analysis.node_id,
693
+ node_type="explore_data",
694
+ function=analysis_preparation,
695
+ setting_input=node_analysis,
696
+ schema_callback=schema_callback,
697
+ )
582
698
  node = self.get_node(node_analysis.node_id)
583
699
 
584
700
  def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
@@ -591,19 +707,20 @@ class FlowGraph:
591
707
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
592
708
  return fl.do_group_by(group_by_settings.groupby_input, False)
593
709
 
594
- self.add_node_step(node_id=group_by_settings.node_id,
595
- function=_func,
596
- node_type=f'group_by',
597
- setting_input=group_by_settings,
598
- input_node_ids=[group_by_settings.depending_on_id])
710
+ self.add_node_step(
711
+ node_id=group_by_settings.node_id,
712
+ function=_func,
713
+ node_type="group_by",
714
+ setting_input=group_by_settings,
715
+ input_node_ids=[group_by_settings.depending_on_id],
716
+ )
599
717
 
600
718
  node = self.get_node(group_by_settings.node_id)
601
719
 
602
720
  def schema_callback():
603
-
604
721
  output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
605
722
  depends_on = node.node_inputs.main_inputs[0]
606
- input_schema_dict: Dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
723
+ input_schema_dict: dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
607
724
  output_schema = []
608
725
  for old_name, new_name, data_type in output_columns:
609
726
  data_type = input_schema_dict[old_name] if data_type is None else data_type
@@ -618,38 +735,148 @@ class FlowGraph:
618
735
  Args:
619
736
  filter_settings: The settings for the filter operation.
620
737
  """
738
+ from flowfile_core.schemas.transform_schema import FilterOperator
739
+
740
+ def _build_basic_filter_expression(
741
+ basic_filter: transform_schema.BasicFilter, field_data_type: str | None = None
742
+ ) -> str:
743
+ """Build a filter expression string from a BasicFilter object.
744
+
745
+ Uses the Flowfile expression language that is compatible with polars_expr_transformer.
746
+
747
+ Args:
748
+ basic_filter: The basic filter configuration.
749
+ field_data_type: The data type of the field (optional, for smart quoting).
750
+
751
+ Returns:
752
+ A filter expression string compatible with polars_expr_transformer.
753
+ """
754
+ field = f"[{basic_filter.field}]"
755
+ value = basic_filter.value
756
+ value2 = basic_filter.value2
757
+
758
+ is_numeric_value = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
759
+ should_quote = field_data_type == "str" or not is_numeric_value
760
+
761
+ try:
762
+ operator = basic_filter.get_operator()
763
+ except (ValueError, AttributeError):
764
+ operator = FilterOperator.from_symbol(str(basic_filter.operator))
765
+
766
+ if operator == FilterOperator.EQUALS:
767
+ if should_quote:
768
+ return f'{field}="{value}"'
769
+ return f"{field}={value}"
770
+
771
+ elif operator == FilterOperator.NOT_EQUALS:
772
+ if should_quote:
773
+ return f'{field}!="{value}"'
774
+ return f"{field}!={value}"
775
+
776
+ elif operator == FilterOperator.GREATER_THAN:
777
+ if should_quote:
778
+ return f'{field}>"{value}"'
779
+ return f"{field}>{value}"
780
+
781
+ elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
782
+ if should_quote:
783
+ return f'{field}>="{value}"'
784
+ return f"{field}>={value}"
785
+
786
+ elif operator == FilterOperator.LESS_THAN:
787
+ if should_quote:
788
+ return f'{field}<"{value}"'
789
+ return f"{field}<{value}"
790
+
791
+ elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
792
+ if should_quote:
793
+ return f'{field}<="{value}"'
794
+ return f"{field}<={value}"
795
+
796
+ elif operator == FilterOperator.CONTAINS:
797
+ return f'contains({field}, "{value}")'
798
+
799
+ elif operator == FilterOperator.NOT_CONTAINS:
800
+ return f'contains({field}, "{value}") = false'
801
+
802
+ elif operator == FilterOperator.STARTS_WITH:
803
+ return f'left({field}, {len(value)}) = "{value}"'
804
+
805
+ elif operator == FilterOperator.ENDS_WITH:
806
+ return f'right({field}, {len(value)}) = "{value}"'
807
+
808
+ elif operator == FilterOperator.IS_NULL:
809
+ return f"is_empty({field})"
810
+
811
+ elif operator == FilterOperator.IS_NOT_NULL:
812
+ return f"is_not_empty({field})"
813
+
814
+ elif operator == FilterOperator.IN:
815
+ values = [v.strip() for v in value.split(",")]
816
+ if len(values) == 1:
817
+ if should_quote:
818
+ return f'{field}="{values[0]}"'
819
+ return f"{field}={values[0]}"
820
+ if should_quote:
821
+ conditions = [f'({field}="{v}")' for v in values]
822
+ else:
823
+ conditions = [f"({field}={v})" for v in values]
824
+ return " | ".join(conditions)
825
+
826
+ elif operator == FilterOperator.NOT_IN:
827
+ values = [v.strip() for v in value.split(",")]
828
+ if len(values) == 1:
829
+ if should_quote:
830
+ return f'{field}!="{values[0]}"'
831
+ return f"{field}!={values[0]}"
832
+ if should_quote:
833
+ conditions = [f'({field}!="{v}")' for v in values]
834
+ else:
835
+ conditions = [f"({field}!={v})" for v in values]
836
+ return " & ".join(conditions)
621
837
 
622
- is_advanced = filter_settings.filter_input.filter_type == 'advanced'
623
- if is_advanced:
624
- predicate = filter_settings.filter_input.advanced_filter
625
- else:
626
- _basic_filter = filter_settings.filter_input.basic_filter
627
- filter_settings.filter_input.advanced_filter = (f'[{_basic_filter.field}]{_basic_filter.filter_type}"'
628
- f'{_basic_filter.filter_value}"')
838
+ elif operator == FilterOperator.BETWEEN:
839
+ if value2 is None:
840
+ raise ValueError("BETWEEN operator requires value2")
841
+ if should_quote:
842
+ return f'({field}>="{value}") & ({field}<="{value2}")'
843
+ return f"({field}>={value}) & ({field}<={value2})"
844
+
845
+ else:
846
+ # Fallback for unknown operators - use legacy format
847
+ if should_quote:
848
+ return f'{field}{operator.to_symbol()}"{value}"'
849
+ return f"{field}{operator.to_symbol()}{value}"
629
850
 
630
851
  def _func(fl: FlowDataEngine):
631
- is_advanced = filter_settings.filter_input.filter_type == 'advanced'
852
+ is_advanced = filter_settings.filter_input.is_advanced()
853
+
632
854
  if is_advanced:
855
+ predicate = filter_settings.filter_input.advanced_filter
633
856
  return fl.do_filter(predicate)
634
857
  else:
635
858
  basic_filter = filter_settings.filter_input.basic_filter
636
- if basic_filter.filter_value.isnumeric():
859
+ if basic_filter is None:
860
+ logger.warning("Basic filter is None, returning unfiltered data")
861
+ return fl
862
+
863
+ try:
637
864
  field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
638
- if field_data_type == 'str':
639
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
640
- else:
641
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}{basic_filter.filter_value}'
642
- else:
643
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
644
- filter_settings.filter_input.advanced_filter = _f
645
- return fl.do_filter(_f)
865
+ except Exception:
866
+ field_data_type = None
646
867
 
647
- self.add_node_step(filter_settings.node_id, _func,
648
- node_type='filter',
649
- renew_schema=False,
650
- setting_input=filter_settings,
651
- input_node_ids=[filter_settings.depending_on_id]
652
- )
868
+ expression = _build_basic_filter_expression(basic_filter, field_data_type)
869
+ filter_settings.filter_input.advanced_filter = expression
870
+ return fl.do_filter(expression)
871
+
872
+ self.add_node_step(
873
+ filter_settings.node_id,
874
+ _func,
875
+ node_type="filter",
876
+ renew_schema=False,
877
+ setting_input=filter_settings,
878
+ input_node_ids=[filter_settings.depending_on_id],
879
+ )
653
880
 
654
881
  def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
655
882
  """Adds a filter node to the graph.
@@ -661,11 +888,13 @@ class FlowGraph:
661
888
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
662
889
  return fl.get_record_count()
663
890
 
664
- self.add_node_step(node_id=node_number_of_records.node_id,
665
- function=_func,
666
- node_type='record_count',
667
- setting_input=node_number_of_records,
668
- input_node_ids=[node_number_of_records.depending_on_id])
891
+ self.add_node_step(
892
+ node_id=node_number_of_records.node_id,
893
+ function=_func,
894
+ node_type="record_count",
895
+ setting_input=node_number_of_records,
896
+ input_node_ids=[node_number_of_records.depending_on_id],
897
+ )
669
898
 
670
899
  def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
671
900
  """Adds a node that executes custom Polars code.
@@ -676,11 +905,14 @@ class FlowGraph:
676
905
 
677
906
  def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
678
907
  return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
679
- self.add_node_step(node_id=node_polars_code.node_id,
680
- function=_func,
681
- node_type='polars_code',
682
- setting_input=node_polars_code,
683
- input_node_ids=node_polars_code.depending_on_ids)
908
+
909
+ self.add_node_step(
910
+ node_id=node_polars_code.node_id,
911
+ function=_func,
912
+ node_type="polars_code",
913
+ setting_input=node_polars_code,
914
+ input_node_ids=node_polars_code.depending_on_ids,
915
+ )
684
916
 
685
917
  try:
686
918
  polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
@@ -688,9 +920,7 @@ class FlowGraph:
688
920
  node = self.get_node(node_id=node_polars_code.node_id)
689
921
  node.results.errors = str(e)
690
922
 
691
- def add_dependency_on_polars_lazy_frame(self,
692
- lazy_frame: pl.LazyFrame,
693
- node_id: int):
923
+ def add_dependency_on_polars_lazy_frame(self, lazy_frame: pl.LazyFrame, node_id: int):
694
924
  """Adds a special node that directly injects a Polars LazyFrame into the graph.
695
925
 
696
926
  Note: This is intended for backend use and will not work in the UI editor.
@@ -699,13 +929,16 @@ class FlowGraph:
699
929
  lazy_frame: The Polars LazyFrame to inject.
700
930
  node_id: The ID for the new node.
701
931
  """
932
+
702
933
  def _func():
703
934
  return FlowDataEngine(lazy_frame)
704
- node_promise = input_schema.NodePromise(flow_id=self.flow_id,
705
- node_id=node_id, node_type="polars_lazy_frame",
706
- is_setup=True)
707
- self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func,
708
- setting_input=node_promise)
935
+
936
+ node_promise = input_schema.NodePromise(
937
+ flow_id=self.flow_id, node_id=node_id, node_type="polars_lazy_frame", is_setup=True
938
+ )
939
+ self.add_node_step(
940
+ node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func, setting_input=node_promise
941
+ )
709
942
 
710
943
  def add_unique(self, unique_settings: input_schema.NodeUnique):
711
944
  """Adds a node to find and remove duplicate rows.
@@ -717,12 +950,14 @@ class FlowGraph:
717
950
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
718
951
  return fl.make_unique(unique_settings.unique_input)
719
952
 
720
- self.add_node_step(node_id=unique_settings.node_id,
721
- function=_func,
722
- input_columns=[],
723
- node_type='unique',
724
- setting_input=unique_settings,
725
- input_node_ids=[unique_settings.depending_on_id])
953
+ self.add_node_step(
954
+ node_id=unique_settings.node_id,
955
+ function=_func,
956
+ input_columns=[],
957
+ node_type="unique",
958
+ setting_input=unique_settings,
959
+ input_node_ids=[unique_settings.depending_on_id],
960
+ )
726
961
 
727
962
  def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
728
963
  """Adds a node that solves graph-like problems within the data.
@@ -735,14 +970,17 @@ class FlowGraph:
735
970
  graph_solver_settings: The settings object defining the graph inputs
736
971
  and the specific algorithm to apply.
737
972
  """
973
+
738
974
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
739
975
  return fl.solve_graph(graph_solver_settings.graph_solver_input)
740
976
 
741
- self.add_node_step(node_id=graph_solver_settings.node_id,
742
- function=_func,
743
- node_type='graph_solver',
744
- setting_input=graph_solver_settings,
745
- input_node_ids=[graph_solver_settings.depending_on_id])
977
+ self.add_node_step(
978
+ node_id=graph_solver_settings.node_id,
979
+ function=_func,
980
+ node_type="graph_solver",
981
+ setting_input=graph_solver_settings,
982
+ input_node_ids=[graph_solver_settings.depending_on_id],
983
+ )
746
984
 
747
985
  def add_formula(self, function_settings: input_schema.NodeFormula):
748
986
  """Adds a node that applies a formula to create or modify a column.
@@ -757,24 +995,28 @@ class FlowGraph:
757
995
  else:
758
996
  output_type = None
759
997
  if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
760
- new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
761
- data_type=str(output_type))]
998
+ new_col = [
999
+ FlowfileColumn.from_input(column_name=function_settings.function.field.name, data_type=str(output_type))
1000
+ ]
762
1001
  else:
763
- new_col = [FlowfileColumn.from_input(function_settings.function.field.name, 'String')]
1002
+ new_col = [FlowfileColumn.from_input(function_settings.function.field.name, "String")]
764
1003
 
765
1004
  def _func(fl: FlowDataEngine):
766
- return fl.apply_sql_formula(func=function_settings.function.function,
767
- col_name=function_settings.function.field.name,
768
- output_data_type=output_type)
769
-
770
- self.add_node_step(function_settings.node_id, _func,
771
- output_schema=new_col,
772
- node_type='formula',
773
- renew_schema=False,
774
- setting_input=function_settings,
775
- input_node_ids=[function_settings.depending_on_id]
776
- )
777
- # TODO: Add validation here
1005
+ return fl.apply_sql_formula(
1006
+ func=function_settings.function.function,
1007
+ col_name=function_settings.function.field.name,
1008
+ output_data_type=output_type,
1009
+ )
1010
+
1011
+ self.add_node_step(
1012
+ function_settings.node_id,
1013
+ _func,
1014
+ output_schema=new_col,
1015
+ node_type="formula",
1016
+ renew_schema=False,
1017
+ setting_input=function_settings,
1018
+ input_node_ids=[function_settings.depending_on_id],
1019
+ )
778
1020
  if error != "":
779
1021
  node = self.get_node(function_settings.node_id)
780
1022
  node.results.errors = error
@@ -791,22 +1033,27 @@ class FlowGraph:
791
1033
  Returns:
792
1034
  The `FlowGraph` instance for method chaining.
793
1035
  """
1036
+
794
1037
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
795
1038
  for left_select in cross_join_settings.cross_join_input.left_select.renames:
796
1039
  left_select.is_available = True if left_select.old_name in main.schema else False
797
1040
  for right_select in cross_join_settings.cross_join_input.right_select.renames:
798
1041
  right_select.is_available = True if right_select.old_name in right.schema else False
799
- return main.do_cross_join(cross_join_input=cross_join_settings.cross_join_input,
800
- auto_generate_selection=cross_join_settings.auto_generate_selection,
801
- verify_integrity=False,
802
- other=right)
803
-
804
- self.add_node_step(node_id=cross_join_settings.node_id,
805
- function=_func,
806
- input_columns=[],
807
- node_type='cross_join',
808
- setting_input=cross_join_settings,
809
- input_node_ids=cross_join_settings.depending_on_ids)
1042
+ return main.do_cross_join(
1043
+ cross_join_input=cross_join_settings.cross_join_input,
1044
+ auto_generate_selection=cross_join_settings.auto_generate_selection,
1045
+ verify_integrity=False,
1046
+ other=right,
1047
+ )
1048
+
1049
+ self.add_node_step(
1050
+ node_id=cross_join_settings.node_id,
1051
+ function=_func,
1052
+ input_columns=[],
1053
+ node_type="cross_join",
1054
+ setting_input=cross_join_settings,
1055
+ input_node_ids=cross_join_settings.depending_on_ids,
1056
+ )
810
1057
  return self
811
1058
 
812
1059
  def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
@@ -818,22 +1065,27 @@ class FlowGraph:
818
1065
  Returns:
819
1066
  The `FlowGraph` instance for method chaining.
820
1067
  """
1068
+
821
1069
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
822
1070
  for left_select in join_settings.join_input.left_select.renames:
823
1071
  left_select.is_available = True if left_select.old_name in main.schema else False
824
1072
  for right_select in join_settings.join_input.right_select.renames:
825
1073
  right_select.is_available = True if right_select.old_name in right.schema else False
826
- return main.join(join_input=join_settings.join_input,
827
- auto_generate_selection=join_settings.auto_generate_selection,
828
- verify_integrity=False,
829
- other=right)
830
-
831
- self.add_node_step(node_id=join_settings.node_id,
832
- function=_func,
833
- input_columns=[],
834
- node_type='join',
835
- setting_input=join_settings,
836
- input_node_ids=join_settings.depending_on_ids)
1074
+ return main.join(
1075
+ join_input=join_settings.join_input,
1076
+ auto_generate_selection=join_settings.auto_generate_selection,
1077
+ verify_integrity=False,
1078
+ other=right,
1079
+ )
1080
+
1081
+ self.add_node_step(
1082
+ node_id=join_settings.node_id,
1083
+ function=_func,
1084
+ input_columns=[],
1085
+ node_type="join",
1086
+ setting_input=join_settings,
1087
+ input_node_ids=join_settings.depending_on_ids,
1088
+ )
837
1089
  return self
838
1090
 
839
1091
  def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
@@ -849,31 +1101,43 @@ class FlowGraph:
849
1101
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
850
1102
  node = self.get_node(node_id=fuzzy_settings.node_id)
851
1103
  if self.execution_location == "local":
852
- return main.fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
853
- other=right,
854
- node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id))
1104
+ return main.fuzzy_join(
1105
+ fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
1106
+ other=right,
1107
+ node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id),
1108
+ )
855
1109
 
856
- f = main.start_fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input), other=right, file_ref=node.hash,
857
- flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
1110
+ f = main.start_fuzzy_join(
1111
+ fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
1112
+ other=right,
1113
+ file_ref=node.hash,
1114
+ flow_id=self.flow_id,
1115
+ node_id=fuzzy_settings.node_id,
1116
+ )
858
1117
  logger.info("Started the fuzzy match action")
859
1118
  node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
860
1119
  return FlowDataEngine(f.get_result())
861
1120
 
862
1121
  def schema_callback():
863
- fm_input_copy = FuzzyMatchInputManager(fuzzy_settings.join_input) # Deepcopy create an unique object per func
1122
+ fm_input_copy = FuzzyMatchInputManager(
1123
+ fuzzy_settings.join_input
1124
+ ) # Deepcopy create an unique object per func
864
1125
  node = self.get_node(node_id=fuzzy_settings.node_id)
865
- return calculate_fuzzy_match_schema(fm_input_copy,
866
- left_schema=node.node_inputs.main_inputs[0].schema,
867
- right_schema=node.node_inputs.right_input.schema
868
- )
869
-
870
- self.add_node_step(node_id=fuzzy_settings.node_id,
871
- function=_func,
872
- input_columns=[],
873
- node_type='fuzzy_match',
874
- setting_input=fuzzy_settings,
875
- input_node_ids=fuzzy_settings.depending_on_ids,
876
- schema_callback=schema_callback)
1126
+ return calculate_fuzzy_match_schema(
1127
+ fm_input_copy,
1128
+ left_schema=node.node_inputs.main_inputs[0].schema,
1129
+ right_schema=node.node_inputs.right_input.schema,
1130
+ )
1131
+
1132
+ self.add_node_step(
1133
+ node_id=fuzzy_settings.node_id,
1134
+ function=_func,
1135
+ input_columns=[],
1136
+ node_type="fuzzy_match",
1137
+ setting_input=fuzzy_settings,
1138
+ input_node_ids=fuzzy_settings.depending_on_ids,
1139
+ schema_callback=schema_callback,
1140
+ )
877
1141
 
878
1142
  return self
879
1143
 
@@ -890,14 +1154,17 @@ class FlowGraph:
890
1154
  Returns:
891
1155
  The `FlowGraph` instance for method chaining.
892
1156
  """
1157
+
893
1158
  def _func(table: FlowDataEngine) -> FlowDataEngine:
894
1159
  return table.split(node_text_to_rows.text_to_rows_input)
895
1160
 
896
- self.add_node_step(node_id=node_text_to_rows.node_id,
897
- function=_func,
898
- node_type='text_to_rows',
899
- setting_input=node_text_to_rows,
900
- input_node_ids=[node_text_to_rows.depending_on_id])
1161
+ self.add_node_step(
1162
+ node_id=node_text_to_rows.node_id,
1163
+ function=_func,
1164
+ node_type="text_to_rows",
1165
+ setting_input=node_text_to_rows,
1166
+ input_node_ids=[node_text_to_rows.depending_on_id],
1167
+ )
901
1168
  return self
902
1169
 
903
1170
  def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
@@ -913,11 +1180,13 @@ class FlowGraph:
913
1180
  def _func(table: FlowDataEngine) -> FlowDataEngine:
914
1181
  return table.do_sort(sort_settings.sort_input)
915
1182
 
916
- self.add_node_step(node_id=sort_settings.node_id,
917
- function=_func,
918
- node_type='sort',
919
- setting_input=sort_settings,
920
- input_node_ids=[sort_settings.depending_on_id])
1183
+ self.add_node_step(
1184
+ node_id=sort_settings.node_id,
1185
+ function=_func,
1186
+ node_type="sort",
1187
+ setting_input=sort_settings,
1188
+ input_node_ids=[sort_settings.depending_on_id],
1189
+ )
921
1190
  return self
922
1191
 
923
1192
  def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
@@ -929,15 +1198,17 @@ class FlowGraph:
929
1198
  Returns:
930
1199
  The `FlowGraph` instance for method chaining.
931
1200
  """
1201
+
932
1202
  def _func(table: FlowDataEngine) -> FlowDataEngine:
933
1203
  return table.get_sample(sample_settings.sample_size)
934
1204
 
935
- self.add_node_step(node_id=sample_settings.node_id,
936
- function=_func,
937
- node_type='sample',
938
- setting_input=sample_settings,
939
- input_node_ids=[sample_settings.depending_on_id]
940
- )
1205
+ self.add_node_step(
1206
+ node_id=sample_settings.node_id,
1207
+ function=_func,
1208
+ node_type="sample",
1209
+ setting_input=sample_settings,
1210
+ input_node_ids=[sample_settings.depending_on_id],
1211
+ )
941
1212
  return self
942
1213
 
943
1214
  def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
@@ -954,12 +1225,13 @@ class FlowGraph:
954
1225
  def _func(table: FlowDataEngine) -> FlowDataEngine:
955
1226
  return table.add_record_id(record_id_settings.record_id_input)
956
1227
 
957
- self.add_node_step(node_id=record_id_settings.node_id,
958
- function=_func,
959
- node_type='record_id',
960
- setting_input=record_id_settings,
961
- input_node_ids=[record_id_settings.depending_on_id]
962
- )
1228
+ self.add_node_step(
1229
+ node_id=record_id_settings.node_id,
1230
+ function=_func,
1231
+ node_type="record_id",
1232
+ setting_input=record_id_settings,
1233
+ input_node_ids=[record_id_settings.depending_on_id],
1234
+ )
963
1235
  return self
964
1236
 
965
1237
  def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
@@ -991,16 +1263,19 @@ class FlowGraph:
991
1263
  for i in ids_to_remove:
992
1264
  v = select_cols.pop(i)
993
1265
  del v
994
- return table.do_select(select_inputs=transform_schema.SelectInputs(select_cols),
995
- keep_missing=select_settings.keep_missing)
996
-
997
- self.add_node_step(node_id=select_settings.node_id,
998
- function=_func,
999
- input_columns=[],
1000
- node_type='select',
1001
- drop_columns=list(drop_cols),
1002
- setting_input=select_settings,
1003
- input_node_ids=[select_settings.depending_on_id])
1266
+ return table.do_select(
1267
+ select_inputs=transform_schema.SelectInputs(select_cols), keep_missing=select_settings.keep_missing
1268
+ )
1269
+
1270
+ self.add_node_step(
1271
+ node_id=select_settings.node_id,
1272
+ function=_func,
1273
+ input_columns=[],
1274
+ node_type="select",
1275
+ drop_columns=list(drop_cols),
1276
+ setting_input=select_settings,
1277
+ input_node_ids=[select_settings.depending_on_id],
1278
+ )
1004
1279
  return self
1005
1280
 
1006
1281
  @property
@@ -1008,7 +1283,7 @@ class FlowGraph:
1008
1283
  """Checks if the graph has any nodes."""
1009
1284
  return len(self._node_ids) > 0
1010
1285
 
1011
- def delete_node(self, node_id: Union[int, str]):
1286
+ def delete_node(self, node_id: int | str):
1012
1287
  """Deletes a node from the graph and updates all its connections.
1013
1288
 
1014
1289
  Args:
@@ -1023,7 +1298,7 @@ class FlowGraph:
1023
1298
  if node:
1024
1299
  logger.info(f"Found node: {node_id}, processing deletion")
1025
1300
 
1026
- lead_to_steps: List[FlowNode] = node.leads_to_nodes
1301
+ lead_to_steps: list[FlowNode] = node.leads_to_nodes
1027
1302
  logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
1028
1303
 
1029
1304
  if len(lead_to_steps) > 0:
@@ -1032,7 +1307,7 @@ class FlowGraph:
1032
1307
  lead_to_step.delete_input_node(node_id, complete=True)
1033
1308
 
1034
1309
  if not node.is_start:
1035
- depends_on: List[FlowNode] = node.node_inputs.get_all_inputs()
1310
+ depends_on: list[FlowNode] = node.node_inputs.get_all_inputs()
1036
1311
  logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
1037
1312
 
1038
1313
  for depend_on in depends_on:
@@ -1052,18 +1327,20 @@ class FlowGraph:
1052
1327
  """Checks if the graph has an initial input data source."""
1053
1328
  return self._input_data is not None
1054
1329
 
1055
- def add_node_step(self,
1056
- node_id: Union[int, str],
1057
- function: Callable,
1058
- input_columns: List[str] = None,
1059
- output_schema: List[FlowfileColumn] = None,
1060
- node_type: str = None,
1061
- drop_columns: List[str] = None,
1062
- renew_schema: bool = True,
1063
- setting_input: Any = None,
1064
- cache_results: bool = None,
1065
- schema_callback: Callable = None,
1066
- input_node_ids: List[int] = None) -> FlowNode:
1330
+ def add_node_step(
1331
+ self,
1332
+ node_id: int | str,
1333
+ function: Callable,
1334
+ input_columns: list[str] = None,
1335
+ output_schema: list[FlowfileColumn] = None,
1336
+ node_type: str = None,
1337
+ drop_columns: list[str] = None,
1338
+ renew_schema: bool = True,
1339
+ setting_input: Any = None,
1340
+ cache_results: bool = None,
1341
+ schema_callback: Callable = None,
1342
+ input_node_ids: list[int] = None,
1343
+ ) -> FlowNode:
1067
1344
  """The core method for adding or updating a node in the graph.
1068
1345
 
1069
1346
  Args:
@@ -1096,29 +1373,33 @@ class FlowGraph:
1096
1373
  if isinstance(input_columns, str):
1097
1374
  input_columns = [input_columns]
1098
1375
  if (
1099
- input_nodes is not None or
1100
- function.__name__ in ('placeholder', 'analysis_preparation') or
1101
- node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
1376
+ input_nodes is not None
1377
+ or function.__name__ in ("placeholder", "analysis_preparation")
1378
+ or node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
1102
1379
  ):
1103
1380
  if not existing_node:
1104
- node = FlowNode(node_id=node_id,
1105
- function=function,
1106
- output_schema=output_schema,
1107
- input_columns=input_columns,
1108
- drop_columns=drop_columns,
1109
- renew_schema=renew_schema,
1110
- setting_input=setting_input,
1111
- node_type=node_type,
1112
- name=function.__name__,
1113
- schema_callback=schema_callback,
1114
- parent_uuid=self.uuid)
1381
+ node = FlowNode(
1382
+ node_id=node_id,
1383
+ function=function,
1384
+ output_schema=output_schema,
1385
+ input_columns=input_columns,
1386
+ drop_columns=drop_columns,
1387
+ renew_schema=renew_schema,
1388
+ setting_input=setting_input,
1389
+ node_type=node_type,
1390
+ name=function.__name__,
1391
+ schema_callback=schema_callback,
1392
+ parent_uuid=self.uuid,
1393
+ )
1115
1394
  else:
1116
- existing_node.update_node(function=function,
1117
- output_schema=output_schema,
1118
- input_columns=input_columns,
1119
- drop_columns=drop_columns,
1120
- setting_input=setting_input,
1121
- schema_callback=schema_callback)
1395
+ existing_node.update_node(
1396
+ function=function,
1397
+ output_schema=output_schema,
1398
+ input_columns=input_columns,
1399
+ drop_columns=drop_columns,
1400
+ setting_input=setting_input,
1401
+ schema_callback=schema_callback,
1402
+ )
1122
1403
  node = existing_node
1123
1404
  else:
1124
1405
  raise Exception("No data initialized")
@@ -1126,7 +1407,7 @@ class FlowGraph:
1126
1407
  self._node_ids.append(node_id)
1127
1408
  return node
1128
1409
 
1129
- def add_include_cols(self, include_columns: List[str]):
1410
+ def add_include_cols(self, include_columns: list[str]):
1130
1411
  """Adds columns to both the input and output column lists.
1131
1412
 
1132
1413
  Args:
@@ -1147,23 +1428,30 @@ class FlowGraph:
1147
1428
  """
1148
1429
 
1149
1430
  def _func(df: FlowDataEngine):
1150
- execute_remote = self.execution_location != 'local'
1151
- df.output(output_fs=output_file.output_settings, flow_id=self.flow_id, node_id=output_file.node_id,
1152
- execute_remote=execute_remote)
1431
+ execute_remote = self.execution_location != "local"
1432
+ df.output(
1433
+ output_fs=output_file.output_settings,
1434
+ flow_id=self.flow_id,
1435
+ node_id=output_file.node_id,
1436
+ execute_remote=execute_remote,
1437
+ )
1153
1438
  return df
1154
1439
 
1155
1440
  def schema_callback():
1156
1441
  input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
1157
1442
 
1158
1443
  return input_node.schema
1159
- input_node_id = getattr(output_file, "depending_on_id") if hasattr(output_file, 'depending_on_id') else None
1160
- self.add_node_step(node_id=output_file.node_id,
1161
- function=_func,
1162
- input_columns=[],
1163
- node_type='output',
1164
- setting_input=output_file,
1165
- schema_callback=schema_callback,
1166
- input_node_ids=[input_node_id])
1444
+
1445
+ input_node_id = output_file.depending_on_id if hasattr(output_file, "depending_on_id") else None
1446
+ self.add_node_step(
1447
+ node_id=output_file.node_id,
1448
+ function=_func,
1449
+ input_columns=[],
1450
+ node_type="output",
1451
+ setting_input=output_file,
1452
+ schema_callback=schema_callback,
1453
+ input_node_ids=[input_node_id],
1454
+ )
1167
1455
 
1168
1456
  def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
1169
1457
  """Adds a node to write data to a database.
@@ -1172,18 +1460,20 @@ class FlowGraph:
1172
1460
  node_database_writer: The settings for the database writer node.
1173
1461
  """
1174
1462
 
1175
- node_type = 'database_writer'
1463
+ node_type = "database_writer"
1176
1464
  database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
1177
- database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
1178
- if database_settings.connection_mode == 'inline':
1465
+ database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
1466
+ if database_settings.connection_mode == "inline":
1179
1467
  database_connection: input_schema.DatabaseConnection = database_settings.database_connection
1180
- encrypted_password = get_encrypted_secret(current_user_id=node_database_writer.user_id,
1181
- secret_name=database_connection.password_ref)
1468
+ encrypted_password = get_encrypted_secret(
1469
+ current_user_id=node_database_writer.user_id, secret_name=database_connection.password_ref
1470
+ )
1182
1471
  if encrypted_password is None:
1183
1472
  raise HTTPException(status_code=400, detail="Password not found")
1184
1473
  else:
1185
- database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
1186
- node_database_writer.user_id)
1474
+ database_reference_settings = get_local_database_connection(
1475
+ database_settings.database_connection_name, node_database_writer.user_id
1476
+ )
1187
1477
  encrypted_password = database_reference_settings.password.get_secret_value()
1188
1478
 
1189
1479
  def _func(df: FlowDataEngine):
@@ -1192,14 +1482,20 @@ class FlowGraph:
1192
1482
  sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
1193
1483
  node_database_writer=node_database_writer,
1194
1484
  password=encrypted_password,
1195
- table_name=(database_settings.schema_name+'.'+database_settings.table_name
1196
- if database_settings.schema_name else database_settings.table_name),
1197
- database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
1198
- else None),
1199
- lf=df.data_frame
1485
+ table_name=(
1486
+ database_settings.schema_name + "." + database_settings.table_name
1487
+ if database_settings.schema_name
1488
+ else database_settings.table_name
1489
+ ),
1490
+ database_reference_settings=(
1491
+ database_reference_settings if database_settings.connection_mode == "reference" else None
1492
+ ),
1493
+ lf=df.data_frame,
1200
1494
  )
1201
1495
  )
1202
- external_database_writer = ExternalDatabaseWriter(database_external_write_settings, wait_on_completion=False)
1496
+ external_database_writer = ExternalDatabaseWriter(
1497
+ database_external_write_settings, wait_on_completion=False
1498
+ )
1203
1499
  node._fetch_cached_df = external_database_writer
1204
1500
  external_database_writer.get_result()
1205
1501
  return df
@@ -1226,56 +1522,64 @@ class FlowGraph:
1226
1522
  """
1227
1523
 
1228
1524
  logger.info("Adding database reader")
1229
- node_type = 'database_reader'
1525
+ node_type = "database_reader"
1230
1526
  database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
1231
- database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
1232
- if database_settings.connection_mode == 'inline':
1527
+ database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
1528
+ if database_settings.connection_mode == "inline":
1233
1529
  database_connection: input_schema.DatabaseConnection = database_settings.database_connection
1234
- encrypted_password = get_encrypted_secret(current_user_id=node_database_reader.user_id,
1235
- secret_name=database_connection.password_ref)
1530
+ encrypted_password = get_encrypted_secret(
1531
+ current_user_id=node_database_reader.user_id, secret_name=database_connection.password_ref
1532
+ )
1236
1533
  if encrypted_password is None:
1237
1534
  raise HTTPException(status_code=400, detail="Password not found")
1238
1535
  else:
1239
- database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
1240
- node_database_reader.user_id)
1536
+ database_reference_settings = get_local_database_connection(
1537
+ database_settings.database_connection_name, node_database_reader.user_id
1538
+ )
1241
1539
  database_connection = database_reference_settings
1242
1540
  encrypted_password = database_reference_settings.password.get_secret_value()
1243
1541
 
1244
1542
  def _func():
1245
- sql_source = BaseSqlSource(query=None if database_settings.query_mode == 'table' else database_settings.query,
1246
- table_name=database_settings.table_name,
1247
- schema_name=database_settings.schema_name,
1248
- fields=node_database_reader.fields,
1249
- )
1543
+ sql_source = BaseSqlSource(
1544
+ query=None if database_settings.query_mode == "table" else database_settings.query,
1545
+ table_name=database_settings.table_name,
1546
+ schema_name=database_settings.schema_name,
1547
+ fields=node_database_reader.fields,
1548
+ )
1250
1549
  database_external_read_settings = (
1251
1550
  sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
1252
1551
  node_database_reader=node_database_reader,
1253
1552
  password=encrypted_password,
1254
1553
  query=sql_source.query,
1255
- database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
1256
- else None),
1554
+ database_reference_settings=(
1555
+ database_reference_settings if database_settings.connection_mode == "reference" else None
1556
+ ),
1257
1557
  )
1258
1558
  )
1259
1559
 
1260
- external_database_fetcher = ExternalDatabaseFetcher(database_external_read_settings, wait_on_completion=False)
1560
+ external_database_fetcher = ExternalDatabaseFetcher(
1561
+ database_external_read_settings, wait_on_completion=False
1562
+ )
1261
1563
  node._fetch_cached_df = external_database_fetcher
1262
1564
  fl = FlowDataEngine(external_database_fetcher.get_result())
1263
1565
  node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
1264
1566
  return fl
1265
1567
 
1266
1568
  def schema_callback():
1267
- sql_source = SqlSource(connection_string=
1268
- sql_utils.construct_sql_uri(database_type=database_connection.database_type,
1269
- host=database_connection.host,
1270
- port=database_connection.port,
1271
- database=database_connection.database,
1272
- username=database_connection.username,
1273
- password=decrypt_secret(encrypted_password)),
1274
- query=None if database_settings.query_mode == 'table' else database_settings.query,
1275
- table_name=database_settings.table_name,
1276
- schema_name=database_settings.schema_name,
1277
- fields=node_database_reader.fields,
1278
- )
1569
+ sql_source = SqlSource(
1570
+ connection_string=sql_utils.construct_sql_uri(
1571
+ database_type=database_connection.database_type,
1572
+ host=database_connection.host,
1573
+ port=database_connection.port,
1574
+ database=database_connection.database,
1575
+ username=database_connection.username,
1576
+ password=decrypt_secret(encrypted_password),
1577
+ ),
1578
+ query=None if database_settings.query_mode == "table" else database_settings.query,
1579
+ table_name=database_settings.table_name,
1580
+ schema_name=database_settings.schema_name,
1581
+ fields=node_database_reader.fields,
1582
+ )
1279
1583
  return sql_source.get_schema()
1280
1584
 
1281
1585
  node = self.get_node(node_database_reader.node_id)
@@ -1285,16 +1589,20 @@ class FlowGraph:
1285
1589
  node.function = _func
1286
1590
  node.setting_input = node_database_reader
1287
1591
  node.node_settings.cache_results = node_database_reader.cache_results
1288
- if node_database_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1289
- self._flow_starts.append(node)
1592
+ self.add_node_to_starting_list(node)
1290
1593
  node.schema_callback = schema_callback
1291
1594
  else:
1292
- node = FlowNode(node_database_reader.node_id, function=_func,
1293
- setting_input=node_database_reader,
1294
- name=node_type, node_type=node_type, parent_uuid=self.uuid,
1295
- schema_callback=schema_callback)
1595
+ node = FlowNode(
1596
+ node_database_reader.node_id,
1597
+ function=_func,
1598
+ setting_input=node_database_reader,
1599
+ name=node_type,
1600
+ node_type=node_type,
1601
+ parent_uuid=self.uuid,
1602
+ schema_callback=schema_callback,
1603
+ )
1296
1604
  self._node_db[node_database_reader.node_id] = node
1297
- self._flow_starts.append(node)
1605
+ self.add_node_to_starting_list(node)
1298
1606
  self._node_ids.append(node_database_reader.node_id)
1299
1607
 
1300
1608
  def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
@@ -1305,7 +1613,7 @@ class FlowGraph:
1305
1613
  Args:
1306
1614
  external_source_input: The settings for the external SQL source node.
1307
1615
  """
1308
- logger.info('Adding sql source')
1616
+ logger.info("Adding sql source")
1309
1617
  self.add_external_source(external_source_input)
1310
1618
 
1311
1619
  def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
@@ -1316,19 +1624,20 @@ class FlowGraph:
1316
1624
  """
1317
1625
 
1318
1626
  node_type = "cloud_storage_writer"
1627
+
1319
1628
  def _func(df: FlowDataEngine):
1320
1629
  df.lazy = True
1321
- execute_remote = self.execution_location != 'local'
1630
+ execute_remote = self.execution_location != "local"
1322
1631
  cloud_connection_settings = get_cloud_connection_settings(
1323
1632
  connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
1324
1633
  user_id=node_cloud_storage_writer.user_id,
1325
- auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
1634
+ auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode,
1326
1635
  )
1327
1636
  full_cloud_storage_connection = FullCloudStorageConnection(
1328
1637
  storage_type=cloud_connection_settings.storage_type,
1329
1638
  auth_method=cloud_connection_settings.auth_method,
1330
1639
  aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
1331
- **CloudStorageReader.get_storage_options(cloud_connection_settings)
1640
+ **CloudStorageReader.get_storage_options(cloud_connection_settings),
1332
1641
  )
1333
1642
  if execute_remote:
1334
1643
  settings = get_cloud_storage_write_settings_worker_interface(
@@ -1336,7 +1645,8 @@ class FlowGraph:
1336
1645
  connection=full_cloud_storage_connection,
1337
1646
  lf=df.data_frame,
1338
1647
  flowfile_node_id=node_cloud_storage_writer.node_id,
1339
- flowfile_flow_id=self.flow_id)
1648
+ flowfile_flow_id=self.flow_id,
1649
+ )
1340
1650
  external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
1341
1651
  node._fetch_cached_df = external_database_writer
1342
1652
  external_database_writer.get_result()
@@ -1362,7 +1672,7 @@ class FlowGraph:
1362
1672
  node_type=node_type,
1363
1673
  setting_input=node_cloud_storage_writer,
1364
1674
  schema_callback=schema_callback,
1365
- input_node_ids=[node_cloud_storage_writer.depending_on_id]
1675
+ input_node_ids=[node_cloud_storage_writer.depending_on_id],
1366
1676
  )
1367
1677
 
1368
1678
  node = self.get_node(node_cloud_storage_writer.node_id)
@@ -1380,49 +1690,53 @@ class FlowGraph:
1380
1690
  def _func():
1381
1691
  logger.info("Starting to run the schema callback for cloud storage reader")
1382
1692
  self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
1383
- settings = CloudStorageReadSettingsInternal(read_settings=cloud_storage_read_settings,
1384
- connection=get_cloud_connection_settings(
1385
- connection_name=cloud_storage_read_settings.connection_name,
1386
- user_id=node_cloud_storage_reader.user_id,
1387
- auth_mode=cloud_storage_read_settings.auth_mode
1388
- ))
1693
+ settings = CloudStorageReadSettingsInternal(
1694
+ read_settings=cloud_storage_read_settings,
1695
+ connection=get_cloud_connection_settings(
1696
+ connection_name=cloud_storage_read_settings.connection_name,
1697
+ user_id=node_cloud_storage_reader.user_id,
1698
+ auth_mode=cloud_storage_read_settings.auth_mode,
1699
+ ),
1700
+ )
1389
1701
  fl = FlowDataEngine.from_cloud_storage_obj(settings)
1390
1702
  return fl
1391
1703
 
1392
- node = self.add_node_step(node_id=node_cloud_storage_reader.node_id,
1393
- function=_func,
1394
- cache_results=node_cloud_storage_reader.cache_results,
1395
- setting_input=node_cloud_storage_reader,
1396
- node_type=node_type,
1397
- )
1398
- if node_cloud_storage_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1399
- self._flow_starts.append(node)
1704
+ node = self.add_node_step(
1705
+ node_id=node_cloud_storage_reader.node_id,
1706
+ function=_func,
1707
+ cache_results=node_cloud_storage_reader.cache_results,
1708
+ setting_input=node_cloud_storage_reader,
1709
+ node_type=node_type,
1710
+ )
1711
+ self.add_node_to_starting_list(node)
1400
1712
 
1401
- def add_external_source(self,
1402
- external_source_input: input_schema.NodeExternalSource):
1713
+ def add_external_source(self, external_source_input: input_schema.NodeExternalSource):
1403
1714
  """Adds a node for a custom external data source.
1404
1715
 
1405
1716
  Args:
1406
1717
  external_source_input: The settings for the external source node.
1407
1718
  """
1408
1719
 
1409
- node_type = 'external_source'
1720
+ node_type = "external_source"
1410
1721
  external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
1411
- source_settings = (getattr(input_schema, snake_case_to_camel_case(external_source_input.identifier)).
1412
- model_validate(external_source_input.source_settings))
1413
- if hasattr(external_source_script, 'initial_getter'):
1414
- initial_getter = getattr(external_source_script, 'initial_getter')(source_settings)
1722
+ source_settings = getattr(
1723
+ input_schema, snake_case_to_camel_case(external_source_input.identifier)
1724
+ ).model_validate(external_source_input.source_settings)
1725
+ if hasattr(external_source_script, "initial_getter"):
1726
+ initial_getter = external_source_script.initial_getter(source_settings)
1415
1727
  else:
1416
1728
  initial_getter = None
1417
1729
  data_getter = external_source_script.getter(source_settings)
1418
- external_source = data_source_factory(source_type='custom',
1419
- data_getter=data_getter,
1420
- initial_data_getter=initial_getter,
1421
- orientation=external_source_input.source_settings.orientation,
1422
- schema=None)
1730
+ external_source = data_source_factory(
1731
+ source_type="custom",
1732
+ data_getter=data_getter,
1733
+ initial_data_getter=initial_getter,
1734
+ orientation=external_source_input.source_settings.orientation,
1735
+ schema=None,
1736
+ )
1423
1737
 
1424
1738
  def _func():
1425
- logger.info('Calling external source')
1739
+ logger.info("Calling external source")
1426
1740
  fl = FlowDataEngine.create_from_external_source(external_source=external_source)
1427
1741
  external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
1428
1742
  return fl
@@ -1434,31 +1748,39 @@ class FlowGraph:
1434
1748
  node.function = _func
1435
1749
  node.setting_input = external_source_input
1436
1750
  node.node_settings.cache_results = external_source_input.cache_results
1437
- if external_source_input.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1438
- self._flow_starts.append(node)
1751
+ self.add_node_to_starting_list(node)
1752
+
1439
1753
  else:
1440
- node = FlowNode(external_source_input.node_id, function=_func,
1441
- setting_input=external_source_input,
1442
- name=node_type, node_type=node_type, parent_uuid=self.uuid)
1754
+ node = FlowNode(
1755
+ external_source_input.node_id,
1756
+ function=_func,
1757
+ setting_input=external_source_input,
1758
+ name=node_type,
1759
+ node_type=node_type,
1760
+ parent_uuid=self.uuid,
1761
+ )
1443
1762
  self._node_db[external_source_input.node_id] = node
1444
- self._flow_starts.append(node)
1763
+ self.add_node_to_starting_list(node)
1445
1764
  self._node_ids.append(external_source_input.node_id)
1446
1765
  if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
1447
- logger.info('Using provided schema in the node')
1766
+ logger.info("Using provided schema in the node")
1448
1767
 
1449
1768
  def schema_callback():
1450
- return [FlowfileColumn.from_input(f.name, f.data_type) for f in
1451
- external_source_input.source_settings.fields]
1769
+ return [
1770
+ FlowfileColumn.from_input(f.name, f.data_type) for f in external_source_input.source_settings.fields
1771
+ ]
1452
1772
 
1453
1773
  node.schema_callback = schema_callback
1454
1774
  else:
1455
- logger.warning('Removing schema')
1775
+ logger.warning("Removing schema")
1456
1776
  node._schema_callback = None
1457
- self.add_node_step(node_id=external_source_input.node_id,
1458
- function=_func,
1459
- input_columns=[],
1460
- node_type=node_type,
1461
- setting_input=external_source_input)
1777
+ self.add_node_step(
1778
+ node_id=external_source_input.node_id,
1779
+ function=_func,
1780
+ input_columns=[],
1781
+ node_type=node_type,
1782
+ setting_input=external_source_input,
1783
+ )
1462
1784
 
1463
1785
  def add_read(self, input_file: input_schema.NodeRead):
1464
1786
  """Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
@@ -1466,8 +1788,10 @@ class FlowGraph:
1466
1788
  Args:
1467
1789
  input_file: The settings for the read operation.
1468
1790
  """
1469
- if (input_file.received_file.file_type in ('xlsx', 'excel') and
1470
- input_file.received_file.table_settings.sheet_name == ''):
1791
+ if (
1792
+ input_file.received_file.file_type in ("xlsx", "excel")
1793
+ and input_file.received_file.table_settings.sheet_name == ""
1794
+ ):
1471
1795
  sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
1472
1796
  input_file.received_file.table_settings.sheet_name = sheet_name
1473
1797
 
@@ -1476,14 +1800,17 @@ class FlowGraph:
1476
1800
 
1477
1801
  def _func():
1478
1802
  input_file.received_file.set_absolute_filepath()
1479
- if input_file.received_file.file_type == 'parquet':
1803
+ if input_file.received_file.file_type == "parquet":
1480
1804
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1481
- elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.table_settings.encoding:
1805
+ elif (
1806
+ input_file.received_file.file_type == "csv"
1807
+ and "utf" in input_file.received_file.table_settings.encoding
1808
+ ):
1482
1809
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1483
1810
  else:
1484
- input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
1485
- node_id=input_file.node_id,
1486
- flow_id=self.flow_id)
1811
+ input_data = FlowDataEngine.create_from_path_worker(
1812
+ input_file.received_file, node_id=input_file.node_id, flow_id=self.flow_id
1813
+ )
1487
1814
  input_data.name = input_file.received_file.name
1488
1815
  return input_data
1489
1816
 
@@ -1491,51 +1818,57 @@ class FlowGraph:
1491
1818
  schema_callback = None
1492
1819
  if node:
1493
1820
  start_hash = node.hash
1494
- node.node_type = 'read'
1495
- node.name = 'read'
1821
+ node.node_type = "read"
1822
+ node.name = "read"
1496
1823
  node.function = _func
1497
1824
  node.setting_input = input_file
1498
- if input_file.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1499
- self._flow_starts.append(node)
1825
+ self.add_node_to_starting_list(node)
1500
1826
 
1501
1827
  if start_hash != node.hash:
1502
- logger.info('Hash changed, updating schema')
1828
+ logger.info("Hash changed, updating schema")
1503
1829
  if len(received_file.fields) > 0:
1504
1830
  # If the file has fields defined, we can use them to create the schema
1505
1831
  def schema_callback():
1506
1832
  return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
1507
1833
 
1508
- elif input_file.received_file.file_type in ('csv', 'json', 'parquet'):
1834
+ elif input_file.received_file.file_type in ("csv", "json", "parquet"):
1509
1835
  # everything that can be scanned by polars
1510
1836
  def schema_callback():
1511
1837
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1512
1838
  return input_data.schema
1513
1839
 
1514
- elif input_file.received_file.file_type in ('xlsx', 'excel'):
1840
+ elif input_file.received_file.file_type in ("xlsx", "excel"):
1515
1841
  # If the file is an Excel file, we need to use the openpyxl engine to read the schema
1516
- schema_callback = get_xlsx_schema_callback(engine='openpyxl',
1517
- file_path=received_file.file_path,
1518
- sheet_name=received_file.table_settings.sheet_name,
1519
- start_row=received_file.table_settings.start_row,
1520
- end_row=received_file.table_settings.end_row,
1521
- start_column=received_file.table_settings.start_column,
1522
- end_column=received_file.table_settings.end_column,
1523
- has_headers=received_file.table_settings.has_headers)
1842
+ schema_callback = get_xlsx_schema_callback(
1843
+ engine="openpyxl",
1844
+ file_path=received_file.file_path,
1845
+ sheet_name=received_file.table_settings.sheet_name,
1846
+ start_row=received_file.table_settings.start_row,
1847
+ end_row=received_file.table_settings.end_row,
1848
+ start_column=received_file.table_settings.start_column,
1849
+ end_column=received_file.table_settings.end_column,
1850
+ has_headers=received_file.table_settings.has_headers,
1851
+ )
1524
1852
  else:
1525
1853
  schema_callback = None
1526
1854
  else:
1527
- node = FlowNode(input_file.node_id, function=_func,
1528
- setting_input=input_file,
1529
- name='read', node_type='read', parent_uuid=self.uuid)
1855
+ node = FlowNode(
1856
+ input_file.node_id,
1857
+ function=_func,
1858
+ setting_input=input_file,
1859
+ name="read",
1860
+ node_type="read",
1861
+ parent_uuid=self.uuid,
1862
+ )
1530
1863
  self._node_db[input_file.node_id] = node
1531
- self._flow_starts.append(node)
1864
+ self.add_node_to_starting_list(node)
1532
1865
  self._node_ids.append(input_file.node_id)
1533
1866
 
1534
1867
  if schema_callback is not None:
1535
1868
  node.schema_callback = schema_callback
1536
1869
  return self
1537
1870
 
1538
- def add_datasource(self, input_file: Union[input_schema.NodeDatasource, input_schema.NodeManualInput]) -> "FlowGraph":
1871
+ def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
1539
1872
  """Adds a data source node to the graph.
1540
1873
 
1541
1874
  This method serves as a factory for creating starting nodes, handling both
@@ -1549,25 +1882,30 @@ class FlowGraph:
1549
1882
  """
1550
1883
  if isinstance(input_file, input_schema.NodeManualInput):
1551
1884
  input_data = FlowDataEngine(input_file.raw_data_format)
1552
- ref = 'manual_input'
1885
+ ref = "manual_input"
1553
1886
  else:
1554
1887
  input_data = FlowDataEngine(path_ref=input_file.file_ref)
1555
- ref = 'datasource'
1888
+ ref = "datasource"
1556
1889
  node = self.get_node(input_file.node_id)
1557
1890
  if node:
1558
1891
  node.node_type = ref
1559
1892
  node.name = ref
1560
1893
  node.function = input_data
1561
1894
  node.setting_input = input_file
1562
- if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
1563
- self._flow_starts.append(node)
1895
+ self.add_node_to_starting_list(node)
1896
+
1564
1897
  else:
1565
1898
  input_data.collect()
1566
- node = FlowNode(input_file.node_id, function=input_data,
1567
- setting_input=input_file,
1568
- name=ref, node_type=ref, parent_uuid=self.uuid)
1899
+ node = FlowNode(
1900
+ input_file.node_id,
1901
+ function=input_data,
1902
+ setting_input=input_file,
1903
+ name=ref,
1904
+ node_type=ref,
1905
+ parent_uuid=self.uuid,
1906
+ )
1569
1907
  self._node_db[input_file.node_id] = node
1570
- self._flow_starts.append(node)
1908
+ self.add_node_to_starting_list(node)
1571
1909
  self._node_ids.append(input_file.node_id)
1572
1910
  return self
1573
1911
 
@@ -1582,7 +1920,7 @@ class FlowGraph:
1582
1920
  self.add_datasource(input_file)
1583
1921
 
1584
1922
  @property
1585
- def nodes(self) -> List[FlowNode]:
1923
+ def nodes(self) -> list[FlowNode]:
1586
1924
  """Gets a list of all FlowNode objects in the graph."""
1587
1925
 
1588
1926
  return list(self._node_db.values())
@@ -1592,7 +1930,7 @@ class FlowGraph:
1592
1930
  """Gets the current execution mode ('Development' or 'Performance')."""
1593
1931
  return self.flow_settings.execution_mode
1594
1932
 
1595
- def get_implicit_starter_nodes(self) -> List[FlowNode]:
1933
+ def get_implicit_starter_nodes(self) -> list[FlowNode]:
1596
1934
  """Finds nodes that can act as starting points but are not explicitly defined as such.
1597
1935
 
1598
1936
  Some nodes, like the Polars Code node, can function without an input. This
@@ -1638,24 +1976,31 @@ class FlowGraph:
1638
1976
  if not flow_node:
1639
1977
  raise Exception("Node not found found")
1640
1978
  skip_nodes, execution_order = compute_execution_plan(
1641
- nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1979
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
1642
1980
  )
1643
1981
  if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
1644
1982
  raise Exception("Node can not be executed because it does not have it's inputs")
1645
1983
 
1646
- def create_initial_run_information(self, number_of_nodes: int,
1647
- run_type: Literal["fetch_one", "full_run"]):
1984
+ def create_initial_run_information(self, number_of_nodes: int, run_type: Literal["fetch_one", "full_run"]):
1648
1985
  return RunInformation(
1649
- flow_id=self.flow_id, start_time=datetime.datetime.now(), end_time=None,
1650
- success=None, number_of_nodes=number_of_nodes, node_step_result=[],
1651
- run_type=run_type
1986
+ flow_id=self.flow_id,
1987
+ start_time=datetime.datetime.now(),
1988
+ end_time=None,
1989
+ success=None,
1990
+ number_of_nodes=number_of_nodes,
1991
+ node_step_result=[],
1992
+ run_type=run_type,
1652
1993
  )
1653
1994
 
1654
1995
  def create_empty_run_information(self) -> RunInformation:
1655
1996
  return RunInformation(
1656
- flow_id=self.flow_id, start_time=None, end_time=None,
1657
- success=None, number_of_nodes=0, node_step_result=[],
1658
- run_type="init"
1997
+ flow_id=self.flow_id,
1998
+ start_time=None,
1999
+ end_time=None,
2000
+ success=None,
2001
+ number_of_nodes=0,
2002
+ node_step_result=[],
2003
+ run_type="init",
1659
2004
  )
1660
2005
 
1661
2006
  def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
@@ -1669,14 +2014,16 @@ class FlowGraph:
1669
2014
  self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
1670
2015
  node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
1671
2016
  node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
1672
- logger.info(f'Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}')
2017
+ logger.info(f"Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}")
1673
2018
  try:
1674
2019
  self.latest_run_info.node_step_result.append(node_result)
1675
- flow_node.execute_node(run_location=self.flow_settings.execution_location,
1676
- performance_mode=False,
1677
- node_logger=node_logger,
1678
- optimize_for_downstream=False,
1679
- reset_cache=True)
2020
+ flow_node.execute_node(
2021
+ run_location=self.flow_settings.execution_location,
2022
+ performance_mode=False,
2023
+ node_logger=node_logger,
2024
+ optimize_for_downstream=False,
2025
+ reset_cache=True,
2026
+ )
1680
2027
  node_result.error = str(flow_node.results.errors)
1681
2028
  if self.flow_settings.is_canceled:
1682
2029
  node_result.success = None
@@ -1691,12 +2038,12 @@ class FlowGraph:
1691
2038
  self.flow_settings.is_running = False
1692
2039
  return self.get_run_info()
1693
2040
  except Exception as e:
1694
- node_result.error = 'Node did not run'
2041
+ node_result.error = "Node did not run"
1695
2042
  node_result.success = False
1696
2043
  node_result.end_timestamp = time()
1697
2044
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1698
2045
  node_result.is_running = False
1699
- node_logger.error(f'Error in node {flow_node.node_id}: {e}')
2046
+ node_logger.error(f"Error in node {flow_node.node_id}: {e}")
1700
2047
  finally:
1701
2048
  self.flow_settings.is_running = False
1702
2049
 
@@ -1713,39 +2060,38 @@ class FlowGraph:
1713
2060
  Exception: If the flow is already running.
1714
2061
  """
1715
2062
  if self.flow_settings.is_running:
1716
- raise Exception('Flow is already running')
2063
+ raise Exception("Flow is already running")
1717
2064
  try:
1718
-
1719
2065
  self.flow_settings.is_running = True
1720
2066
  self.flow_settings.is_canceled = False
1721
2067
  self.flow_logger.clear_log_file()
1722
- self.flow_logger.info('Starting to run flowfile flow...')
1723
-
2068
+ self.flow_logger.info("Starting to run flowfile flow...")
1724
2069
  skip_nodes, execution_order = compute_execution_plan(
1725
- nodes=self.nodes,
1726
- flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
2070
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
1727
2071
  )
1728
2072
 
1729
2073
  self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
1730
2074
 
1731
2075
  skip_node_message(self.flow_logger, skip_nodes)
1732
2076
  execution_order_message(self.flow_logger, execution_order)
1733
- performance_mode = self.flow_settings.execution_mode == 'Performance'
2077
+ performance_mode = self.flow_settings.execution_mode == "Performance"
1734
2078
 
1735
2079
  for node in execution_order:
1736
2080
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1737
2081
  if self.flow_settings.is_canceled:
1738
- self.flow_logger.info('Flow canceled')
2082
+ self.flow_logger.info("Flow canceled")
1739
2083
  break
1740
2084
  if node in skip_nodes:
1741
- node_logger.info(f'Skipping node {node.node_id}')
2085
+ node_logger.info(f"Skipping node {node.node_id}")
1742
2086
  continue
1743
2087
  node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1744
2088
  self.latest_run_info.node_step_result.append(node_result)
1745
- logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1746
- node.execute_node(run_location=self.flow_settings.execution_location,
1747
- performance_mode=performance_mode,
1748
- node_logger=node_logger)
2089
+ logger.info(f"Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}")
2090
+ node.execute_node(
2091
+ run_location=self.flow_settings.execution_location,
2092
+ performance_mode=performance_mode,
2093
+ node_logger=node_logger,
2094
+ )
1749
2095
  try:
1750
2096
  node_result.error = str(node.results.errors)
1751
2097
  if self.flow_settings.is_canceled:
@@ -1758,22 +2104,22 @@ class FlowGraph:
1758
2104
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1759
2105
  node_result.is_running = False
1760
2106
  except Exception as e:
1761
- node_result.error = 'Node did not run'
2107
+ node_result.error = "Node did not run"
1762
2108
  node_result.success = False
1763
2109
  node_result.end_timestamp = time()
1764
2110
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1765
2111
  node_result.is_running = False
1766
- node_logger.error(f'Error in node {node.node_id}: {e}')
2112
+ node_logger.error(f"Error in node {node.node_id}: {e}")
1767
2113
  if not node_result.success:
1768
2114
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1769
- node_logger.info(f'Completed node with success: {node_result.success}')
2115
+ node_logger.info(f"Completed node with success: {node_result.success}")
1770
2116
  self.latest_run_info.nodes_completed += 1
1771
2117
  self.latest_run_info.end_time = datetime.datetime.now()
1772
- self.flow_logger.info('Flow completed!')
2118
+ self.flow_logger.info("Flow completed!")
1773
2119
  self.end_datetime = datetime.datetime.now()
1774
2120
  self.flow_settings.is_running = False
1775
2121
  if self.flow_settings.is_canceled:
1776
- self.flow_logger.info('Flow canceled')
2122
+ self.flow_logger.info("Flow canceled")
1777
2123
  return self.get_run_info()
1778
2124
  except Exception as e:
1779
2125
  raise e
@@ -1799,7 +2145,7 @@ class FlowGraph:
1799
2145
  return run_info
1800
2146
 
1801
2147
  @property
1802
- def node_connections(self) -> List[Tuple[int, int]]:
2148
+ def node_connections(self) -> list[tuple[int, int]]:
1803
2149
  """Computes and returns a list of all connections in the graph.
1804
2150
 
1805
2151
  Returns:
@@ -1809,8 +2155,9 @@ class FlowGraph:
1809
2155
  for node in self.nodes:
1810
2156
  outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
1811
2157
  incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
1812
- node_connections = [c for c in outgoing_connections + incoming_connections if (c[0] is not None
1813
- and c[1] is not None)]
2158
+ node_connections = [
2159
+ c for c in outgoing_connections + incoming_connections if (c[0] is not None and c[1] is not None)
2160
+ ]
1814
2161
  for node_connection in node_connections:
1815
2162
  if node_connection not in connections:
1816
2163
  connections.add(node_connection)
@@ -1871,16 +2218,18 @@ class FlowGraph:
1871
2218
  Returns:
1872
2219
  A FlowInformation object representing the complete graph.
1873
2220
  """
1874
- node_information = {node.node_id: node.get_node_information() for
1875
- node in self.nodes if node.is_setup and node.is_correct}
2221
+ node_information = {
2222
+ node.node_id: node.get_node_information() for node in self.nodes if node.is_setup and node.is_correct
2223
+ }
1876
2224
 
1877
- return schemas.FlowInformation(flow_id=self.flow_id,
1878
- flow_name=self.__name__,
1879
- flow_settings=self.flow_settings,
1880
- data=node_information,
1881
- node_starts=[v.node_id for v in self._flow_starts],
1882
- node_connections=self.node_connections
1883
- )
2225
+ return schemas.FlowInformation(
2226
+ flow_id=self.flow_id,
2227
+ flow_name=self.__name__,
2228
+ flow_settings=self.flow_settings,
2229
+ data=node_information,
2230
+ node_starts=[v.node_id for v in self._flow_starts],
2231
+ node_connections=self.node_connections,
2232
+ )
1884
2233
 
1885
2234
  def cancel(self):
1886
2235
  """Cancels an ongoing graph execution."""
@@ -1901,7 +2250,11 @@ class FlowGraph:
1901
2250
  """
1902
2251
  Handle the rename of a flow when it is being saved.
1903
2252
  """
1904
- if self.flow_settings and self.flow_settings.path and Path(self.flow_settings.path).absolute() != new_path.absolute():
2253
+ if (
2254
+ self.flow_settings
2255
+ and self.flow_settings.path
2256
+ and Path(self.flow_settings.path).absolute() != new_path.absolute()
2257
+ ):
1905
2258
  self.__name__ = new_name
1906
2259
  self.flow_settings.save_location = str(new_path.absolute())
1907
2260
  self.flow_settings.name = new_name
@@ -1928,27 +2281,27 @@ class FlowGraph:
1928
2281
  self._handle_flow_renaming(new_flow_name, path)
1929
2282
  self.flow_settings.modified_on = datetime.datetime.now().timestamp()
1930
2283
  try:
1931
- if suffix == '.flowfile':
2284
+ if suffix == ".flowfile":
1932
2285
  raise DeprecationWarning(
1933
- f"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
2286
+ "The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
1934
2287
  "Or stay on v0.4.1 if you still need .flowfile support.\n\n"
1935
2288
  )
1936
- elif suffix in ('.yaml', '.yml'):
2289
+ elif suffix in (".yaml", ".yml"):
1937
2290
  flowfile_data = self.get_flowfile_data()
1938
- data = flowfile_data.model_dump(mode='json')
1939
- with open(flow_path, 'w', encoding='utf-8') as f:
2291
+ data = flowfile_data.model_dump(mode="json")
2292
+ with open(flow_path, "w", encoding="utf-8") as f:
1940
2293
  yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1941
- elif suffix == '.json':
2294
+ elif suffix == ".json":
1942
2295
  flowfile_data = self.get_flowfile_data()
1943
- data = flowfile_data.model_dump(mode='json')
1944
- with open(flow_path, 'w', encoding='utf-8') as f:
2296
+ data = flowfile_data.model_dump(mode="json")
2297
+ with open(flow_path, "w", encoding="utf-8") as f:
1945
2298
  json.dump(data, f, indent=2, ensure_ascii=False)
1946
2299
 
1947
2300
  else:
1948
2301
  flowfile_data = self.get_flowfile_data()
1949
2302
  logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
1950
- data = flowfile_data.model_dump(mode='json')
1951
- with open(flow_path, 'w', encoding='utf-8') as f:
2303
+ data = flowfile_data.model_dump(mode="json")
2304
+ with open(flow_path, "w", encoding="utf-8") as f:
1952
2305
  yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1953
2306
 
1954
2307
  except Exception as e:
@@ -1966,11 +2319,7 @@ class FlowGraph:
1966
2319
  Returns:
1967
2320
  A dictionary representing the graph in Drawflow format.
1968
2321
  """
1969
- result = {
1970
- 'Home': {
1971
- "data": {}
1972
- }
1973
- }
2322
+ result = {"Home": {"data": {}}}
1974
2323
  flow_info: schemas.FlowInformation = self.get_node_storage()
1975
2324
 
1976
2325
  for node_id, node_info in flow_info.data.items():
@@ -1989,7 +2338,7 @@ class FlowGraph:
1989
2338
  "inputs": {},
1990
2339
  "outputs": {},
1991
2340
  "pos_x": pos_x,
1992
- "pos_y": pos_y
2341
+ "pos_y": pos_y,
1993
2342
  }
1994
2343
  except Exception as e:
1995
2344
  logger.error(e)
@@ -2003,24 +2352,27 @@ class FlowGraph:
2003
2352
  leading_to_node = self.get_node(output_node_id)
2004
2353
  input_types = leading_to_node.get_input_type(node_info.id)
2005
2354
  for input_type in input_types:
2006
- if input_type == 'main':
2007
- input_frontend_id = 'input_1'
2008
- elif input_type == 'right':
2009
- input_frontend_id = 'input_2'
2010
- elif input_type == 'left':
2011
- input_frontend_id = 'input_3'
2355
+ if input_type == "main":
2356
+ input_frontend_id = "input_1"
2357
+ elif input_type == "right":
2358
+ input_frontend_id = "input_2"
2359
+ elif input_type == "left":
2360
+ input_frontend_id = "input_3"
2012
2361
  else:
2013
- input_frontend_id = 'input_1'
2362
+ input_frontend_id = "input_1"
2014
2363
  connection = {"node": str(output_node_id), "input": input_frontend_id}
2015
2364
  connections.append(connection)
2016
2365
 
2017
- result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
2018
- "connections": connections}
2366
+ result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {"connections": connections}
2019
2367
  else:
2020
2368
  result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
2021
2369
 
2022
2370
  # Add input to the node based on `depending_on_id` in your backend data
2023
- if node_info.left_input_id is not None or node_info.right_input_id is not None or node_info.input_ids is not None:
2371
+ if (
2372
+ node_info.left_input_id is not None
2373
+ or node_info.right_input_id is not None
2374
+ or node_info.input_ids is not None
2375
+ ):
2024
2376
  main_inputs = node_info.main_input_ids
2025
2377
  result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
2026
2378
  "connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
@@ -2041,8 +2393,8 @@ class FlowGraph:
2041
2393
  Returns:
2042
2394
  A VueFlowInput object.
2043
2395
  """
2044
- edges: List[schemas.NodeEdge] = []
2045
- nodes: List[schemas.NodeInput] = []
2396
+ edges: list[schemas.NodeEdge] = []
2397
+ nodes: list[schemas.NodeInput] = []
2046
2398
  for node in self.nodes:
2047
2399
  nodes.append(node.get_node_input())
2048
2400
  edges.extend(node.get_edge_input())
@@ -2054,7 +2406,9 @@ class FlowGraph:
2054
2406
  for node in self.nodes:
2055
2407
  node.reset(True)
2056
2408
 
2057
- def copy_node(self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str) -> None:
2409
+ def copy_node(
2410
+ self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str
2411
+ ) -> None:
2058
2412
  """Creates a copy of an existing node.
2059
2413
 
2060
2414
  Args:
@@ -2067,9 +2421,7 @@ class FlowGraph:
2067
2421
  if isinstance(existing_setting_input, input_schema.NodePromise):
2068
2422
  return
2069
2423
 
2070
- combined_settings = combine_existing_settings_and_new_settings(
2071
- existing_setting_input, new_node_settings
2072
- )
2424
+ combined_settings = combine_existing_settings_and_new_settings(existing_setting_input, new_node_settings)
2073
2425
  getattr(self, f"add_{node_type}")(combined_settings)
2074
2426
 
2075
2427
  def generate_code(self):
@@ -2077,6 +2429,7 @@ class FlowGraph:
2077
2429
  This method exports the flow graph to a Polars-compatible format.
2078
2430
  """
2079
2431
  from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
2432
+
2080
2433
  print(export_flow_to_polars(self))
2081
2434
 
2082
2435
 
@@ -2095,13 +2448,7 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
2095
2448
  copied_setting_input = deepcopy(setting_input)
2096
2449
 
2097
2450
  # Update only attributes that exist on new_settings
2098
- fields_to_update = (
2099
- "node_id",
2100
- "pos_x",
2101
- "pos_y",
2102
- "description",
2103
- "flow_id"
2104
- )
2451
+ fields_to_update = ("node_id", "pos_x", "pos_y", "description", "flow_id")
2105
2452
 
2106
2453
  for field in fields_to_update:
2107
2454
  if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
@@ -2117,12 +2464,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
2117
2464
  flow: The FlowGraph instance to modify.
2118
2465
  node_connection: An object defining the source and target of the connection.
2119
2466
  """
2120
- logger.info('adding a connection')
2467
+ logger.info("adding a connection")
2121
2468
  from_node = flow.get_node(node_connection.output_connection.node_id)
2122
2469
  to_node = flow.get_node(node_connection.input_connection.node_id)
2123
- logger.info(f'from_node={from_node}, to_node={to_node}')
2470
+ logger.info(f"from_node={from_node}, to_node={to_node}")
2124
2471
  if not (from_node and to_node):
2125
- raise HTTPException(404, 'Not not available')
2472
+ raise HTTPException(404, "Not not available")
2126
2473
  else:
2127
2474
  to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
2128
2475