Flowfile 0.5.1__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (346) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +194 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  8. flowfile/web/static/assets/AdminView-f9847d67.js +713 -0
  9. flowfile/web/static/assets/CloudConnectionView-cf85f943.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-faace55b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  12. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-d86ecaa7.js} +10 -8
  13. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-0f4d9a44.js} +10 -8
  14. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  15. flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
  16. flowfile/web/static/assets/ColumnActionInput-f4189ae0.js +330 -0
  17. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  18. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-e66b33da.js} +3 -5
  19. flowfile/web/static/assets/ContextMenu-49463352.js +9 -0
  20. flowfile/web/static/assets/ContextMenu-dd5f3f25.js +9 -0
  21. flowfile/web/static/assets/ContextMenu-f709b884.js +9 -0
  22. flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-a1bd6314.js +59 -0
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-24694b8f.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-569d45ff.js} +43 -24
  26. flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-c20a1e16.css} +23 -21
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-cfc08938.js} +5 -4
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-5bf8c75b.css} +41 -46
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-701feabb.js} +25 -15
  31. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-0482e5b5.js} +11 -11
  32. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  33. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-16721989.js} +17 -10
  34. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-bdcf2c8b.css} +29 -27
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-49abb835.css} +783 -663
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-f64749fb.js} +1292 -3253
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-61bd2990.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-9ea6e871.css} +9 -9
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-e2735b13.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-2535c3b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-7ac7373f.css} +20 -20
  43. flowfile/web/static/assets/Filter-2cdbc93c.js +287 -0
  44. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-fcda3c2c.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-f8d3b7d3.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-4b4d7db9.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-72eaa695.js} +14 -12
  51. flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
  52. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-8aa0598b.js} +9 -7
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-e40f0ffa.js} +13 -11
  55. flowfile/web/static/assets/LoginView-5111c9ae.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-9b6f3224.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ef28e19e.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-83b3bbfd.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-94cd4dd3.css +1429 -0
  62. flowfile/web/static/assets/NodeDesigner-d2b7ee2b.js +2712 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-1d789794.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-7775f83e.js} +5 -2
  65. flowfile/web/static/assets/Output-692dd25d.css +37 -0
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-cefef801.js} +14 -10
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-bab1b75b.js} +12 -10
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  71. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-e7941f91.js} +3 -3
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-fba09336.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-740e40fa.js} +18 -9
  75. flowfile/web/static/assets/PopOver-862d7e28.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-64a3f259.js → Read-225cc63f.js} +16 -12
  78. flowfile/web/static/assets/{Read-e808b239.css → Read-90f366bc.css} +15 -15
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-ffc71eca.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-a70bb8df.js} +9 -7
  81. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-15a421f5.js} +3 -3
  82. flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-6c26afc7.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  85. flowfile/web/static/assets/SecretSelector-ceed9496.js +113 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-214d255a.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-8fc29999.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  90. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-3f70e4c3.js} +3 -3
  92. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-83090218.js} +3 -3
  93. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  94. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-9f0d1725.js} +3 -3
  95. flowfile/web/static/assets/SetupView-3fa0aa03.js +160 -0
  96. flowfile/web/static/assets/SetupView-e2da3442.css +230 -0
  97. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-a4a568cb.js} +2 -2
  98. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-c8ebdd33.js} +1 -1
  99. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-be533e71.js} +7 -4
  100. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  101. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-154dad81.js} +9 -7
  102. flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
  103. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-454e2bda.js} +2 -2
  104. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-e86510d0.js} +5 -2
  105. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  106. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-ea73433d.js} +11 -10
  107. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-9d7b30f1.js} +2 -2
  108. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-00f2580e.js} +1 -1
  109. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-394a1f78.css} +14 -14
  110. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-b72a2c72.js} +4 -4
  111. flowfile/web/static/assets/{Union-bfe9b996.js → Union-1e44f263.js} +8 -6
  112. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  113. flowfile/web/static/assets/Unique-2b705521.css +3 -0
  114. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-a3bc6d0a.js} +13 -10
  115. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-b6ad6427.css} +7 -7
  116. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-e27935fc.js} +11 -9
  117. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-72497680.js} +3 -3
  118. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  119. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  120. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-d9ab70a3.js} +4 -4
  121. flowfile/web/static/assets/{api-cf1221f0.js → api-a2102880.js} +1 -1
  122. flowfile/web/static/assets/{api-c1bad5ca.js → api-f75042b0.js} +1 -1
  123. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-1d6acbd9.css} +41 -41
  124. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-2798a109.js} +3 -3
  125. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-cf7d7d93.js} +11 -10
  126. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-fe9f7e18.css} +77 -65
  127. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-14eac1c3.js} +5 -5
  128. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  129. flowfile/web/static/assets/{index-5429bbf8.js → index-387a6f18.js} +41806 -40958
  130. flowfile/web/static/assets/index-6b367bb5.js +38 -0
  131. flowfile/web/static/assets/{index-50508d4d.css → index-e96ab018.css} +2184 -569
  132. flowfile/web/static/assets/index-f0a6e5a5.js +2696 -0
  133. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  134. flowfile/web/static/assets/nodeInput-ed2ae8d7.js +2 -0
  135. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-3c1757e8.js} +3 -3
  136. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  137. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-686e1f48.js} +3 -3
  138. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  139. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  140. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-df28faa7.js} +4 -4
  141. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  142. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-e37eee21.js} +3 -3
  143. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  144. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-a13f14bb.js} +5 -5
  145. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-344cf746.js} +3 -3
  146. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  147. flowfile/web/static/assets/secrets.api-ae198c5c.js +65 -0
  148. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-6b4b0767.js} +5 -5
  149. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  150. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-31ba0e0b.js} +31 -640
  151. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-4469c8ff.js} +1 -1
  152. flowfile/web/static/index.html +2 -2
  153. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/METADATA +3 -4
  154. flowfile-0.5.4.dist-info/RECORD +407 -0
  155. flowfile_core/__init__.py +13 -6
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +64 -19
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +145 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/__init__.py +11 -0
  177. flowfile_core/flowfile/code_generator/code_generator.py +706 -247
  178. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  179. flowfile_core/flowfile/connection_manager/models.py +1 -1
  180. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  181. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  182. flowfile_core/flowfile/extensions.py +17 -12
  183. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  184. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  185. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +493 -423
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  190. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  191. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  192. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  193. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  194. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  195. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  196. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  197. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  198. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  201. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  202. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  203. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  204. flowfile_core/flowfile/flow_graph.py +920 -571
  205. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  206. flowfile_core/flowfile/flow_node/flow_node.py +379 -258
  207. flowfile_core/flowfile/flow_node/models.py +53 -41
  208. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  209. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  210. flowfile_core/flowfile/handler.py +80 -30
  211. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  212. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  213. flowfile_core/flowfile/node_designer/__init__.py +19 -13
  214. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  215. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  216. flowfile_core/flowfile/node_designer/ui_components.py +278 -34
  217. flowfile_core/flowfile/schema_callbacks.py +71 -51
  218. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  219. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  220. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  221. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  223. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  224. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  225. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  226. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  227. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  228. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  229. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  230. flowfile_core/flowfile/util/node_skipper.py +4 -4
  231. flowfile_core/flowfile/utils.py +19 -21
  232. flowfile_core/main.py +26 -19
  233. flowfile_core/routes/auth.py +284 -11
  234. flowfile_core/routes/cloud_connections.py +25 -25
  235. flowfile_core/routes/logs.py +21 -29
  236. flowfile_core/routes/public.py +46 -4
  237. flowfile_core/routes/routes.py +70 -34
  238. flowfile_core/routes/secrets.py +25 -27
  239. flowfile_core/routes/user_defined_components.py +483 -4
  240. flowfile_core/run_lock.py +0 -1
  241. flowfile_core/schemas/__init__.py +4 -6
  242. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  243. flowfile_core/schemas/cloud_storage_schemas.py +96 -66
  244. flowfile_core/schemas/input_schema.py +231 -144
  245. flowfile_core/schemas/output_model.py +49 -34
  246. flowfile_core/schemas/schemas.py +116 -89
  247. flowfile_core/schemas/transform_schema.py +518 -263
  248. flowfile_core/schemas/yaml_types.py +21 -7
  249. flowfile_core/secret_manager/secret_manager.py +123 -18
  250. flowfile_core/types.py +29 -9
  251. flowfile_core/utils/arrow_reader.py +7 -6
  252. flowfile_core/utils/excel_file_manager.py +3 -3
  253. flowfile_core/utils/fileManager.py +7 -7
  254. flowfile_core/utils/fl_executor.py +8 -10
  255. flowfile_core/utils/utils.py +4 -4
  256. flowfile_core/utils/validate_setup.py +5 -4
  257. flowfile_frame/__init__.py +117 -51
  258. flowfile_frame/adapters.py +2 -9
  259. flowfile_frame/adding_expr.py +73 -32
  260. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  261. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  262. flowfile_frame/config.py +2 -5
  263. flowfile_frame/database/__init__.py +36 -0
  264. flowfile_frame/database/connection_manager.py +205 -0
  265. flowfile_frame/database/frame_helpers.py +249 -0
  266. flowfile_frame/expr.py +311 -218
  267. flowfile_frame/expr.pyi +160 -159
  268. flowfile_frame/expr_name.py +23 -23
  269. flowfile_frame/flow_frame.py +571 -476
  270. flowfile_frame/flow_frame.pyi +123 -104
  271. flowfile_frame/flow_frame_methods.py +227 -246
  272. flowfile_frame/group_frame.py +50 -20
  273. flowfile_frame/join.py +2 -2
  274. flowfile_frame/lazy.py +129 -87
  275. flowfile_frame/lazy_methods.py +83 -30
  276. flowfile_frame/list_name_space.py +55 -50
  277. flowfile_frame/selectors.py +148 -68
  278. flowfile_frame/series.py +9 -7
  279. flowfile_frame/utils.py +19 -21
  280. flowfile_worker/__init__.py +12 -7
  281. flowfile_worker/configs.py +41 -33
  282. flowfile_worker/create/__init__.py +14 -9
  283. flowfile_worker/create/funcs.py +114 -77
  284. flowfile_worker/create/models.py +46 -43
  285. flowfile_worker/create/pl_types.py +14 -15
  286. flowfile_worker/create/read_excel_tables.py +34 -41
  287. flowfile_worker/create/utils.py +22 -19
  288. flowfile_worker/external_sources/s3_source/main.py +18 -51
  289. flowfile_worker/external_sources/s3_source/models.py +34 -27
  290. flowfile_worker/external_sources/sql_source/main.py +8 -5
  291. flowfile_worker/external_sources/sql_source/models.py +13 -9
  292. flowfile_worker/flow_logger.py +10 -8
  293. flowfile_worker/funcs.py +214 -155
  294. flowfile_worker/main.py +11 -17
  295. flowfile_worker/models.py +35 -28
  296. flowfile_worker/process_manager.py +2 -3
  297. flowfile_worker/routes.py +121 -90
  298. flowfile_worker/secrets.py +114 -21
  299. flowfile_worker/spawner.py +89 -54
  300. flowfile_worker/utils.py +3 -2
  301. shared/__init__.py +2 -7
  302. shared/storage_config.py +25 -13
  303. test_utils/postgres/commands.py +3 -2
  304. test_utils/postgres/fixtures.py +9 -9
  305. test_utils/s3/commands.py +1 -1
  306. test_utils/s3/data_generator.py +3 -4
  307. test_utils/s3/demo_data_generator.py +4 -7
  308. test_utils/s3/fixtures.py +7 -5
  309. tools/migrate/__init__.py +1 -1
  310. tools/migrate/__main__.py +16 -29
  311. tools/migrate/legacy_schemas.py +251 -190
  312. tools/migrate/migrate.py +193 -181
  313. tools/migrate/tests/conftest.py +1 -3
  314. tools/migrate/tests/test_migrate.py +36 -41
  315. tools/migrate/tests/test_migration_e2e.py +28 -29
  316. tools/migrate/tests/test_node_migrations.py +50 -20
  317. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  318. flowfile/web/static/assets/ContextMenu-23e909da.js +0 -41
  319. flowfile/web/static/assets/ContextMenu-4c74eef1.css +0 -26
  320. flowfile/web/static/assets/ContextMenu-63cfa99b.css +0 -26
  321. flowfile/web/static/assets/ContextMenu-70ae0c79.js +0 -41
  322. flowfile/web/static/assets/ContextMenu-c13f91d0.css +0 -26
  323. flowfile/web/static/assets/ContextMenu-f149cf7c.js +0 -41
  324. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  325. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  326. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  327. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  328. flowfile/web/static/assets/GroupBy-b9505323.css +0 -51
  329. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  330. flowfile/web/static/assets/Output-283fe388.css +0 -37
  331. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  332. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  333. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +0 -27
  334. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  335. flowfile/web/static/assets/Sort-3643d625.css +0 -51
  336. flowfile/web/static/assets/Unique-f9fb0809.css +0 -51
  337. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  338. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  339. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  340. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  341. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  342. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  343. flowfile-0.5.1.dist-info/RECORD +0 -388
  344. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/WHEEL +0 -0
  345. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/entry_points.txt +0 -0
  346. {flowfile-0.5.1.dist-info → flowfile-0.5.4.dist-info}/licenses/LICENSE +0 -0
@@ -1,80 +1,103 @@
1
1
  import datetime
2
-
3
- import os
4
- import yaml
5
2
  import json
6
-
7
- import polars as pl
3
+ import os
4
+ from collections.abc import Callable
5
+ from copy import deepcopy
6
+ from functools import partial
7
+ from importlib.metadata import PackageNotFoundError, version
8
8
  from pathlib import Path
9
+ from time import time
10
+ from typing import Any, Literal, Union
11
+ from uuid import uuid1
9
12
 
10
13
  import fastexcel
14
+ import polars as pl
15
+ import yaml
11
16
  from fastapi.exceptions import HTTPException
12
- from time import time
13
- from functools import partial
14
- from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
15
- from uuid import uuid1
16
- from copy import deepcopy
17
17
  from pyarrow.parquet import ParquetFile
18
+
18
19
  from flowfile_core.configs import logger
19
20
  from flowfile_core.configs.flow_logger import FlowLogger
20
- from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
21
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
22
-
21
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
22
+ from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
23
+ from flowfile_core.flowfile.database_connection_manager.db_connections import (
24
+ get_local_cloud_connection,
25
+ get_local_database_connection,
26
+ )
23
27
  from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
24
- from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
25
- from flowfile_core.utils.arrow_reader import get_read_top_n
26
28
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
27
- from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (get_open_xlsx_datatypes,
28
- get_calamine_xlsx_data_types)
29
-
30
- from flowfile_core.flowfile.schema_callbacks import (calculate_fuzzy_match_schema, pre_calculate_pivot_schema)
29
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
30
+ from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
31
+ from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (
32
+ get_calamine_xlsx_data_types,
33
+ get_open_xlsx_datatypes,
34
+ )
35
+ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
36
+ ExternalCloudWriter,
37
+ ExternalDatabaseFetcher,
38
+ ExternalDatabaseWriter,
39
+ ExternalDfFetcher,
40
+ )
41
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
42
+ from flowfile_core.flowfile.graph_tree.graph_tree import (
43
+ add_un_drawn_nodes,
44
+ build_flow_paths,
45
+ build_node_info,
46
+ calculate_depth,
47
+ define_node_connections,
48
+ draw_merged_paths,
49
+ draw_standalone_paths,
50
+ group_nodes_by_depth,
51
+ )
52
+ from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
53
+ from flowfile_core.flowfile.schema_callbacks import calculate_fuzzy_match_schema, pre_calculate_pivot_schema
31
54
  from flowfile_core.flowfile.sources import external_sources
55
+ from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
56
+ from flowfile_core.flowfile.sources.external_sources.sql_source import models as sql_models
57
+ from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils
58
+ from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import BaseSqlSource, SqlSource
59
+ from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
60
+ from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
61
+ from flowfile_core.flowfile.utils import snake_case_to_camel_case
32
62
  from flowfile_core.schemas import input_schema, schemas, transform_schema
63
+ from flowfile_core.schemas.cloud_storage_schemas import (
64
+ AuthMethod,
65
+ CloudStorageReadSettingsInternal,
66
+ CloudStorageWriteSettingsInternal,
67
+ FullCloudStorageConnection,
68
+ get_cloud_storage_write_settings_worker_interface,
69
+ )
33
70
  from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
34
- from flowfile_core.schemas.cloud_storage_schemas import (CloudStorageReadSettingsInternal,
35
- CloudStorageWriteSettingsInternal,
36
- FullCloudStorageConnection,
37
- get_cloud_storage_write_settings_worker_interface, AuthMethod)
38
- from flowfile_core.flowfile.utils import snake_case_to_camel_case
39
- from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
40
- from flowfile_core.flowfile.flow_node.flow_node import FlowNode
41
- from flowfile_core.flowfile.util.execution_orderer import compute_execution_plan
42
- from flowfile_core.flowfile.graph_tree.graph_tree import (add_un_drawn_nodes, build_flow_paths,
43
- build_node_info, calculate_depth,
44
- define_node_connections, draw_merged_paths,
45
- draw_standalone_paths, group_nodes_by_depth)
46
- from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
47
- from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalDatabaseFetcher,
48
- ExternalDatabaseWriter,
49
- ExternalDfFetcher,
50
- ExternalCloudWriter)
51
- from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
52
- from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
53
- from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
54
- from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
55
- get_local_cloud_connection)
56
- from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
57
- from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
58
- from importlib.metadata import version, PackageNotFoundError
71
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
72
+ from flowfile_core.secret_manager.secret_manager import decrypt_secret, get_encrypted_secret
73
+ from flowfile_core.utils.arrow_reader import get_read_top_n
59
74
 
60
75
  try:
61
76
  __version__ = version("Flowfile")
62
77
  except PackageNotFoundError:
63
- __version__ = "0.0.0-dev"
78
+ __version__ = "0.5.0"
64
79
 
65
80
 
66
81
  def represent_list_json(dumper, data):
67
82
  """Use inline style for short simple lists, block style for complex ones."""
68
83
  if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
69
- return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True)
70
- return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=False)
84
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=True)
85
+ return dumper.represent_sequence("tag:yaml.org,2002:seq", data, flow_style=False)
71
86
 
72
87
 
73
88
  yaml.add_representer(list, represent_list_json)
74
89
 
75
90
 
76
- def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
77
- end_row: int, end_column: int, has_headers: bool):
91
+ def get_xlsx_schema(
92
+ engine: str,
93
+ file_path: str,
94
+ sheet_name: str,
95
+ start_row: int,
96
+ start_column: int,
97
+ end_row: int,
98
+ end_column: int,
99
+ has_headers: bool,
100
+ ):
78
101
  """Calculates the schema of an XLSX file by reading a sample of rows.
79
102
 
80
103
  Args:
@@ -91,27 +114,29 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
91
114
  A list of FlowfileColumn objects representing the schema.
92
115
  """
93
116
  try:
94
- logger.info('Starting to calculate the schema')
95
- if engine == 'openpyxl':
117
+ logger.info("Starting to calculate the schema")
118
+ if engine == "openpyxl":
96
119
  max_col = end_column if end_column > 0 else None
97
- return get_open_xlsx_datatypes(file_path=file_path,
98
- sheet_name=sheet_name,
99
- min_row=start_row + 1,
100
- min_col=start_column + 1,
101
- max_row=100,
102
- max_col=max_col, has_headers=has_headers)
103
- elif engine == 'calamine':
104
- return get_calamine_xlsx_data_types(file_path=file_path,
105
- sheet_name=sheet_name,
106
- start_row=start_row,
107
- end_row=end_row)
108
- logger.info('done calculating the schema')
120
+ return get_open_xlsx_datatypes(
121
+ file_path=file_path,
122
+ sheet_name=sheet_name,
123
+ min_row=start_row + 1,
124
+ min_col=start_column + 1,
125
+ max_row=100,
126
+ max_col=max_col,
127
+ has_headers=has_headers,
128
+ )
129
+ elif engine == "calamine":
130
+ return get_calamine_xlsx_data_types(
131
+ file_path=file_path, sheet_name=sheet_name, start_row=start_row, end_row=end_row
132
+ )
133
+ logger.info("done calculating the schema")
109
134
  except Exception as e:
110
135
  logger.error(e)
111
136
  return []
112
137
 
113
138
 
114
- def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
139
+ def skip_node_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
115
140
  """Logs a warning message listing all nodes that will be skipped during execution.
116
141
 
117
142
  Args:
@@ -120,10 +145,10 @@ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
120
145
  """
121
146
  if len(nodes) > 0:
122
147
  msg = "\n".join(str(node) for node in nodes)
123
- flow_logger.warning(f'skipping nodes:\n{msg}')
148
+ flow_logger.warning(f"skipping nodes:\n{msg}")
124
149
 
125
150
 
126
- def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
151
+ def execution_order_message(flow_logger: FlowLogger, nodes: list[FlowNode]) -> None:
127
152
  """Logs an informational message showing the determined execution order of nodes.
128
153
 
129
154
  Args:
@@ -131,11 +156,19 @@ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> N
131
156
  nodes: A list of FlowNode objects in the order they will be executed.
132
157
  """
133
158
  msg = "\n".join(str(node) for node in nodes)
134
- flow_logger.info(f'execution order:\n{msg}')
135
-
136
-
137
- def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
138
- end_row: int, end_column: int, has_headers: bool):
159
+ flow_logger.info(f"execution order:\n{msg}")
160
+
161
+
162
+ def get_xlsx_schema_callback(
163
+ engine: str,
164
+ file_path: str,
165
+ sheet_name: str,
166
+ start_row: int,
167
+ start_column: int,
168
+ end_row: int,
169
+ end_column: int,
170
+ has_headers: bool,
171
+ ):
139
172
  """Creates a partially applied function for lazy calculation of an XLSX schema.
140
173
 
141
174
  Args:
@@ -151,12 +184,22 @@ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start
151
184
  Returns:
152
185
  A callable function that, when called, will execute `get_xlsx_schema`.
153
186
  """
154
- return partial(get_xlsx_schema, engine=engine, file_path=file_path, sheet_name=sheet_name, start_row=start_row,
155
- start_column=start_column, end_row=end_row, end_column=end_column, has_headers=has_headers)
187
+ return partial(
188
+ get_xlsx_schema,
189
+ engine=engine,
190
+ file_path=file_path,
191
+ sheet_name=sheet_name,
192
+ start_row=start_row,
193
+ start_column=start_column,
194
+ end_row=end_row,
195
+ end_column=end_column,
196
+ has_headers=has_headers,
197
+ )
156
198
 
157
199
 
158
- def get_cloud_connection_settings(connection_name: str,
159
- user_id: int, auth_mode: AuthMethod) -> FullCloudStorageConnection:
200
+ def get_cloud_connection_settings(
201
+ connection_name: str, user_id: int, auth_mode: AuthMethod
202
+ ) -> FullCloudStorageConnection:
160
203
  """Retrieves cloud storage connection settings, falling back to environment variables if needed.
161
204
 
162
205
  Args:
@@ -186,32 +229,44 @@ class FlowGraph:
186
229
 
187
230
  It manages nodes, connections, and the execution of the entire flow.
188
231
  """
232
+
189
233
  uuid: str
190
- depends_on: Dict[int, Union[ParquetFile, FlowDataEngine, "FlowGraph", pl.DataFrame,]]
234
+ depends_on: dict[
235
+ int,
236
+ Union[
237
+ ParquetFile,
238
+ FlowDataEngine,
239
+ "FlowGraph",
240
+ pl.DataFrame,
241
+ ],
242
+ ]
191
243
  _flow_id: int
192
244
  _input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
193
- _input_cols: List[str]
194
- _output_cols: List[str]
195
- _node_db: Dict[Union[str, int], FlowNode]
196
- _node_ids: List[Union[str, int]]
197
- _results: Optional[FlowDataEngine] = None
245
+ _input_cols: list[str]
246
+ _output_cols: list[str]
247
+ _node_db: dict[str | int, FlowNode]
248
+ _node_ids: list[str | int]
249
+ _results: FlowDataEngine | None = None
198
250
  cache_results: bool = False
199
- schema: Optional[List[FlowfileColumn]] = None
251
+ schema: list[FlowfileColumn] | None = None
200
252
  has_over_row_function: bool = False
201
- _flow_starts: List[Union[int, str]] = None
202
- latest_run_info: Optional[RunInformation] = None
253
+ _flow_starts: list[int | str] = None
254
+ latest_run_info: RunInformation | None = None
203
255
  start_datetime: datetime = None
204
256
  end_datetime: datetime = None
205
257
  _flow_settings: schemas.FlowSettings = None
206
258
  flow_logger: FlowLogger
207
259
 
208
- def __init__(self,
209
- flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
210
- name: str = None, input_cols: List[str] = None,
211
- output_cols: List[str] = None,
212
- path_ref: str = None,
213
- input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
214
- cache_results: bool = False):
260
+ def __init__(
261
+ self,
262
+ flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
263
+ name: str = None,
264
+ input_cols: list[str] = None,
265
+ output_cols: list[str] = None,
266
+ path_ref: str = None,
267
+ input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
268
+ cache_results: bool = False,
269
+ ):
215
270
  """Initializes a new FlowGraph instance.
216
271
 
217
272
  Args:
@@ -233,7 +288,7 @@ class FlowGraph:
233
288
  self.latest_run_info = None
234
289
  self._flow_id = flow_settings.flow_id
235
290
  self.flow_logger = FlowLogger(flow_settings.flow_id)
236
- self._flow_starts: List[FlowNode] = []
291
+ self._flow_starts: list[FlowNode] = []
237
292
  self._results = None
238
293
  self.schema = None
239
294
  self.has_over_row_function = False
@@ -255,13 +310,21 @@ class FlowGraph:
255
310
 
256
311
  @flow_settings.setter
257
312
  def flow_settings(self, flow_settings: schemas.FlowSettings):
258
- if (
259
- (self._flow_settings.execution_location != flow_settings.execution_location) or
260
- (self._flow_settings.execution_mode != flow_settings.execution_mode)
313
+ if (self._flow_settings.execution_location != flow_settings.execution_location) or (
314
+ self._flow_settings.execution_mode != flow_settings.execution_mode
261
315
  ):
262
316
  self.reset()
263
317
  self._flow_settings = flow_settings
264
318
 
319
+ def add_node_to_starting_list(self, node: FlowNode) -> None:
320
+ """Adds a node to the list of starting nodes for the flow if not already present.
321
+
322
+ Args:
323
+ node: The FlowNode to add as a starting node.
324
+ """
325
+ if node.node_id not in {self_node.node_id for self_node in self._flow_starts}:
326
+ self._flow_starts.append(node)
327
+
265
328
  def add_node_promise(self, node_promise: input_schema.NodePromise):
266
329
  """Adds a placeholder node to the graph that is not yet fully configured.
267
330
 
@@ -270,13 +333,31 @@ class FlowGraph:
270
333
  Args:
271
334
  node_promise: A promise object containing basic node information.
272
335
  """
336
+
273
337
  def placeholder(n: FlowNode = None):
274
338
  if n is None:
275
339
  return FlowDataEngine()
276
340
  return n
277
341
 
278
- self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=placeholder,
279
- setting_input=node_promise)
342
+ self.add_node_step(
343
+ node_id=node_promise.node_id,
344
+ node_type=node_promise.node_type,
345
+ function=placeholder,
346
+ setting_input=node_promise,
347
+ )
348
+ if node_promise.is_user_defined:
349
+ node_needs_settings: bool
350
+ custom_node = CUSTOM_NODE_STORE.get(node_promise.node_type)
351
+ if custom_node is None:
352
+ raise Exception(f"Custom node type '{node_promise.node_type}' not found in registry.")
353
+ settings_schema = custom_node.model_fields["settings_schema"].default
354
+ node_needs_settings = settings_schema is not None and not settings_schema.is_empty()
355
+ if not node_needs_settings:
356
+ user_defined_node_settings = input_schema.UserDefinedNode(settings={}, **node_promise.model_dump())
357
+ initialized_model = custom_node()
358
+ self.add_user_defined_node(
359
+ custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings
360
+ )
280
361
 
281
362
  def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
282
363
  """Calculates and applies a layered layout to all nodes in the graph.
@@ -304,20 +385,24 @@ class FlowGraph:
304
385
  updated_count = 0
305
386
  for node_id, (pos_x, pos_y) in new_positions.items():
306
387
  node = self.get_node(node_id)
307
- if node and hasattr(node, 'setting_input'):
388
+ if node and hasattr(node, "setting_input"):
308
389
  setting = node.setting_input
309
- if hasattr(setting, 'pos_x') and hasattr(setting, 'pos_y'):
390
+ if hasattr(setting, "pos_x") and hasattr(setting, "pos_y"):
310
391
  setting.pos_x = pos_x
311
392
  setting.pos_y = pos_y
312
393
  updated_count += 1
313
394
  else:
314
- self.flow_logger.warning(f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes.")
395
+ self.flow_logger.warning(
396
+ f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes."
397
+ )
315
398
  elif node:
316
399
  self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
317
400
  # else: Node not found, already warned by calculate_layered_layout
318
401
 
319
402
  end_time = time()
320
- self.flow_logger.info(f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds.")
403
+ self.flow_logger.info(
404
+ f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds."
405
+ )
321
406
 
322
407
  except Exception as e:
323
408
  self.flow_logger.error(f"Error applying layout: {e}")
@@ -337,13 +422,13 @@ class FlowGraph:
337
422
  """
338
423
  self._flow_id = new_id
339
424
  for node in self.nodes:
340
- if hasattr(node.setting_input, 'flow_id'):
425
+ if hasattr(node.setting_input, "flow_id"):
341
426
  node.setting_input.flow_id = new_id
342
427
  self.flow_settings.flow_id = new_id
343
428
 
344
429
  def __repr__(self):
345
430
  """Provides the official string representation of the FlowGraph instance."""
346
- settings_str = " -" + '\n -'.join(f"{k}: {v}" for k, v in self.flow_settings)
431
+ settings_str = " -" + "\n -".join(f"{k}: {v}" for k, v in self.flow_settings)
347
432
  return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
348
433
 
349
434
  def print_tree(self):
@@ -361,7 +446,7 @@ class FlowGraph:
361
446
 
362
447
  # Group nodes by depth
363
448
  depth_groups, max_depth = group_nodes_by_depth(node_info)
364
-
449
+
365
450
  # Sort nodes within each depth group
366
451
  for depth in depth_groups:
367
452
  depth_groups[depth].sort()
@@ -371,7 +456,7 @@ class FlowGraph:
371
456
 
372
457
  # Track which nodes connect to what
373
458
  merge_points = define_node_connections(node_info)
374
-
459
+
375
460
  # Build the flow paths
376
461
 
377
462
  # Find the maximum label length for each depth level
@@ -380,15 +465,15 @@ class FlowGraph:
380
465
  if depth in depth_groups:
381
466
  max_len = max(len(node_info[nid].label) for nid in depth_groups[depth])
382
467
  max_label_length[depth] = max_len
383
-
468
+
384
469
  # Draw the paths
385
470
  drawn_nodes = set()
386
471
  merge_drawn = set()
387
-
472
+
388
473
  # Group paths by their merge points
389
474
  paths_by_merge = {}
390
475
  standalone_paths = []
391
-
476
+
392
477
  # Build flow paths
393
478
  paths = build_flow_paths(node_info, self._flow_starts, merge_points)
394
479
 
@@ -410,22 +495,22 @@ class FlowGraph:
410
495
 
411
496
  # Add undrawn nodes
412
497
  add_un_drawn_nodes(drawn_nodes, node_info, lines)
413
-
498
+
414
499
  try:
415
500
  skip_nodes, ordered_nodes = compute_execution_plan(
416
- nodes=self.nodes,
417
- flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
501
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
502
+ )
418
503
  if ordered_nodes:
419
504
  for i, node in enumerate(ordered_nodes, 1):
420
505
  lines.append(f" {i:3d}. {node_info[node.node_id].label}")
421
506
  except Exception as e:
422
507
  lines.append(f" Could not determine execution order: {e}")
423
-
508
+
424
509
  # Print everything
425
510
  output = "\n".join(lines)
426
-
511
+
427
512
  print(output)
428
-
513
+
429
514
  def get_nodes_overview(self):
430
515
  """Gets a list of dictionary representations for all nodes in the graph."""
431
516
  output = []
@@ -433,7 +518,7 @@ class FlowGraph:
433
518
  output.append(v.get_repr())
434
519
  return output
435
520
 
436
- def remove_from_output_cols(self, columns: List[str]):
521
+ def remove_from_output_cols(self, columns: list[str]):
437
522
  """Removes specified columns from the list of expected output columns.
438
523
 
439
524
  Args:
@@ -442,7 +527,7 @@ class FlowGraph:
442
527
  cols = set(columns)
443
528
  self._output_cols = [c for c in self._output_cols if c not in cols]
444
529
 
445
- def get_node(self, node_id: Union[int, str] = None) -> FlowNode | None:
530
+ def get_node(self, node_id: int | str = None) -> FlowNode | None:
446
531
  """Retrieves a node from the graph by its ID.
447
532
 
448
533
  Args:
@@ -456,24 +541,43 @@ class FlowGraph:
456
541
  node = self._node_db.get(node_id)
457
542
  if node is not None:
458
543
  return node
459
-
460
- def add_user_defined_node(self, *,
461
- custom_node: CustomNodeBase,
462
- user_defined_node_settings: input_schema.UserDefinedNode
463
- ):
464
-
465
- def _func(*fdes: FlowDataEngine) -> FlowDataEngine | None:
466
- output = custom_node.process(*(fde.data_frame for fde in fdes))
467
- if isinstance(output, pl.LazyFrame | pl.DataFrame):
544
+
545
+ def add_user_defined_node(
546
+ self, *, custom_node: CustomNodeBase, user_defined_node_settings: input_schema.UserDefinedNode
547
+ ):
548
+ """Adds a user-defined custom node to the graph.
549
+
550
+ Args:
551
+ custom_node: The custom node instance to add.
552
+ user_defined_node_settings: The settings for the user-defined node.
553
+ """
554
+
555
+ def _func(*flow_data_engine: FlowDataEngine) -> FlowDataEngine | None:
556
+ user_id = user_defined_node_settings.user_id
557
+ if user_id is not None:
558
+ custom_node.set_execution_context(user_id)
559
+ if custom_node.settings_schema:
560
+ custom_node.settings_schema.set_secret_context(user_id, custom_node.accessed_secrets)
561
+
562
+ output = custom_node.process(*(fde.data_frame for fde in flow_data_engine))
563
+
564
+ accessed_secrets = custom_node.get_accessed_secrets()
565
+ if accessed_secrets:
566
+ logger.info(f"Node '{user_defined_node_settings.node_id}' accessed secrets: {accessed_secrets}")
567
+ if isinstance(output, (pl.LazyFrame, pl.DataFrame)):
468
568
  return FlowDataEngine(output)
469
569
  return None
470
-
471
- self.add_node_step(node_id=user_defined_node_settings.node_id,
472
- function=_func,
473
- setting_input=user_defined_node_settings,
474
- input_node_ids=user_defined_node_settings.depending_on_ids,
475
- node_type=custom_node.item,
476
- )
570
+
571
+ self.add_node_step(
572
+ node_id=user_defined_node_settings.node_id,
573
+ function=_func,
574
+ setting_input=user_defined_node_settings,
575
+ input_node_ids=user_defined_node_settings.depending_on_ids,
576
+ node_type=custom_node.item,
577
+ )
578
+ if custom_node.number_of_inputs == 0:
579
+ node = self.get_node(user_defined_node_settings.node_id)
580
+ self.add_node_to_starting_list(node)
477
581
 
478
582
  def add_pivot(self, pivot_settings: input_schema.NodePivot):
479
583
  """Adds a pivot node to the graph.
@@ -485,11 +589,13 @@ class FlowGraph:
485
589
  def _func(fl: FlowDataEngine):
486
590
  return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
487
591
 
488
- self.add_node_step(node_id=pivot_settings.node_id,
489
- function=_func,
490
- node_type='pivot',
491
- setting_input=pivot_settings,
492
- input_node_ids=[pivot_settings.depending_on_id])
592
+ self.add_node_step(
593
+ node_id=pivot_settings.node_id,
594
+ function=_func,
595
+ node_type="pivot",
596
+ setting_input=pivot_settings,
597
+ input_node_ids=[pivot_settings.depending_on_id],
598
+ )
493
599
 
494
600
  node = self.get_node(pivot_settings.node_id)
495
601
 
@@ -498,6 +604,7 @@ class FlowGraph:
498
604
  input_data.lazy = True # ensure the dataset is lazy
499
605
  input_lf = input_data.data_frame # get the lazy frame
500
606
  return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
607
+
501
608
  node.schema_callback = schema_callback
502
609
 
503
610
  def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
@@ -510,11 +617,13 @@ class FlowGraph:
510
617
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
511
618
  return fl.unpivot(unpivot_settings.unpivot_input)
512
619
 
513
- self.add_node_step(node_id=unpivot_settings.node_id,
514
- function=_func,
515
- node_type='unpivot',
516
- setting_input=unpivot_settings,
517
- input_node_ids=[unpivot_settings.depending_on_id])
620
+ self.add_node_step(
621
+ node_id=unpivot_settings.node_id,
622
+ function=_func,
623
+ node_type="unpivot",
624
+ setting_input=unpivot_settings,
625
+ input_node_ids=[unpivot_settings.depending_on_id],
626
+ )
518
627
 
519
628
  def add_union(self, union_settings: input_schema.NodeUnion):
520
629
  """Adds a union node to combine multiple data streams.
@@ -524,14 +633,16 @@ class FlowGraph:
524
633
  """
525
634
 
526
635
  def _func(*flowfile_tables: FlowDataEngine):
527
- dfs: List[pl.LazyFrame] | List[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
528
- return FlowDataEngine(pl.concat(dfs, how='diagonal_relaxed'))
636
+ dfs: list[pl.LazyFrame] | list[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
637
+ return FlowDataEngine(pl.concat(dfs, how="diagonal_relaxed"))
529
638
 
530
- self.add_node_step(node_id=union_settings.node_id,
531
- function=_func,
532
- node_type=f'union',
533
- setting_input=union_settings,
534
- input_node_ids=union_settings.depending_on_ids)
639
+ self.add_node_step(
640
+ node_id=union_settings.node_id,
641
+ function=_func,
642
+ node_type="union",
643
+ setting_input=union_settings,
644
+ input_node_ids=union_settings.depending_on_ids,
645
+ )
535
646
 
536
647
  def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
537
648
  """Adds a data exploration/analysis node based on a node promise.
@@ -559,13 +670,14 @@ class FlowGraph:
559
670
  flowfile_table = flowfile_table.get_sample(sample_size, random=True)
560
671
  external_sampler = ExternalDfFetcher(
561
672
  lf=flowfile_table.data_frame,
562
- file_ref="__gf_walker"+node.hash,
673
+ file_ref="__gf_walker" + node.hash,
563
674
  wait_on_completion=True,
564
675
  node_id=node.node_id,
565
676
  flow_id=self.flow_id,
566
677
  )
567
- node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref,
568
- n=min(sample_size, number_of_records))
678
+ node.results.analysis_data_generator = get_read_top_n(
679
+ external_sampler.status.file_ref, n=min(sample_size, number_of_records)
680
+ )
569
681
  return flowfile_table
570
682
 
571
683
  def schema_callback():
@@ -574,11 +686,15 @@ class FlowGraph:
574
686
  input_node = node.all_inputs[0]
575
687
  return input_node.schema
576
688
  else:
577
- return [FlowfileColumn.from_input('col_1', 'na')]
689
+ return [FlowfileColumn.from_input("col_1", "na")]
578
690
 
579
- self.add_node_step(node_id=node_analysis.node_id, node_type='explore_data',
580
- function=analysis_preparation,
581
- setting_input=node_analysis, schema_callback=schema_callback)
691
+ self.add_node_step(
692
+ node_id=node_analysis.node_id,
693
+ node_type="explore_data",
694
+ function=analysis_preparation,
695
+ setting_input=node_analysis,
696
+ schema_callback=schema_callback,
697
+ )
582
698
  node = self.get_node(node_analysis.node_id)
583
699
 
584
700
  def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
@@ -591,19 +707,20 @@ class FlowGraph:
591
707
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
592
708
  return fl.do_group_by(group_by_settings.groupby_input, False)
593
709
 
594
- self.add_node_step(node_id=group_by_settings.node_id,
595
- function=_func,
596
- node_type=f'group_by',
597
- setting_input=group_by_settings,
598
- input_node_ids=[group_by_settings.depending_on_id])
710
+ self.add_node_step(
711
+ node_id=group_by_settings.node_id,
712
+ function=_func,
713
+ node_type="group_by",
714
+ setting_input=group_by_settings,
715
+ input_node_ids=[group_by_settings.depending_on_id],
716
+ )
599
717
 
600
718
  node = self.get_node(group_by_settings.node_id)
601
719
 
602
720
  def schema_callback():
603
-
604
721
  output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
605
722
  depends_on = node.node_inputs.main_inputs[0]
606
- input_schema_dict: Dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
723
+ input_schema_dict: dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
607
724
  output_schema = []
608
725
  for old_name, new_name, data_type in output_columns:
609
726
  data_type = input_schema_dict[old_name] if data_type is None else data_type
@@ -618,38 +735,148 @@ class FlowGraph:
618
735
  Args:
619
736
  filter_settings: The settings for the filter operation.
620
737
  """
738
+ from flowfile_core.schemas.transform_schema import FilterOperator
739
+
740
+ def _build_basic_filter_expression(
741
+ basic_filter: transform_schema.BasicFilter, field_data_type: str | None = None
742
+ ) -> str:
743
+ """Build a filter expression string from a BasicFilter object.
744
+
745
+ Uses the Flowfile expression language that is compatible with polars_expr_transformer.
746
+
747
+ Args:
748
+ basic_filter: The basic filter configuration.
749
+ field_data_type: The data type of the field (optional, for smart quoting).
750
+
751
+ Returns:
752
+ A filter expression string compatible with polars_expr_transformer.
753
+ """
754
+ field = f"[{basic_filter.field}]"
755
+ value = basic_filter.value
756
+ value2 = basic_filter.value2
757
+
758
+ is_numeric_value = value.replace(".", "", 1).replace("-", "", 1).isnumeric() if value else False
759
+ should_quote = field_data_type == "str" or not is_numeric_value
760
+
761
+ try:
762
+ operator = basic_filter.get_operator()
763
+ except (ValueError, AttributeError):
764
+ operator = FilterOperator.from_symbol(str(basic_filter.operator))
765
+
766
+ if operator == FilterOperator.EQUALS:
767
+ if should_quote:
768
+ return f'{field}="{value}"'
769
+ return f"{field}={value}"
770
+
771
+ elif operator == FilterOperator.NOT_EQUALS:
772
+ if should_quote:
773
+ return f'{field}!="{value}"'
774
+ return f"{field}!={value}"
775
+
776
+ elif operator == FilterOperator.GREATER_THAN:
777
+ if should_quote:
778
+ return f'{field}>"{value}"'
779
+ return f"{field}>{value}"
780
+
781
+ elif operator == FilterOperator.GREATER_THAN_OR_EQUALS:
782
+ if should_quote:
783
+ return f'{field}>="{value}"'
784
+ return f"{field}>={value}"
785
+
786
+ elif operator == FilterOperator.LESS_THAN:
787
+ if should_quote:
788
+ return f'{field}<"{value}"'
789
+ return f"{field}<{value}"
790
+
791
+ elif operator == FilterOperator.LESS_THAN_OR_EQUALS:
792
+ if should_quote:
793
+ return f'{field}<="{value}"'
794
+ return f"{field}<={value}"
795
+
796
+ elif operator == FilterOperator.CONTAINS:
797
+ return f'contains({field}, "{value}")'
798
+
799
+ elif operator == FilterOperator.NOT_CONTAINS:
800
+ return f'contains({field}, "{value}") = false'
801
+
802
+ elif operator == FilterOperator.STARTS_WITH:
803
+ return f'left({field}, {len(value)}) = "{value}"'
804
+
805
+ elif operator == FilterOperator.ENDS_WITH:
806
+ return f'right({field}, {len(value)}) = "{value}"'
807
+
808
+ elif operator == FilterOperator.IS_NULL:
809
+ return f"is_empty({field})"
810
+
811
+ elif operator == FilterOperator.IS_NOT_NULL:
812
+ return f"is_not_empty({field})"
813
+
814
+ elif operator == FilterOperator.IN:
815
+ values = [v.strip() for v in value.split(",")]
816
+ if len(values) == 1:
817
+ if should_quote:
818
+ return f'{field}="{values[0]}"'
819
+ return f"{field}={values[0]}"
820
+ if should_quote:
821
+ conditions = [f'({field}="{v}")' for v in values]
822
+ else:
823
+ conditions = [f"({field}={v})" for v in values]
824
+ return " | ".join(conditions)
825
+
826
+ elif operator == FilterOperator.NOT_IN:
827
+ values = [v.strip() for v in value.split(",")]
828
+ if len(values) == 1:
829
+ if should_quote:
830
+ return f'{field}!="{values[0]}"'
831
+ return f"{field}!={values[0]}"
832
+ if should_quote:
833
+ conditions = [f'({field}!="{v}")' for v in values]
834
+ else:
835
+ conditions = [f"({field}!={v})" for v in values]
836
+ return " & ".join(conditions)
621
837
 
622
- is_advanced = filter_settings.filter_input.filter_type == 'advanced'
623
- if is_advanced:
624
- predicate = filter_settings.filter_input.advanced_filter
625
- else:
626
- _basic_filter = filter_settings.filter_input.basic_filter
627
- filter_settings.filter_input.advanced_filter = (f'[{_basic_filter.field}]{_basic_filter.filter_type}"'
628
- f'{_basic_filter.filter_value}"')
838
+ elif operator == FilterOperator.BETWEEN:
839
+ if value2 is None:
840
+ raise ValueError("BETWEEN operator requires value2")
841
+ if should_quote:
842
+ return f'({field}>="{value}") & ({field}<="{value2}")'
843
+ return f"({field}>={value}) & ({field}<={value2})"
844
+
845
+ else:
846
+ # Fallback for unknown operators - use legacy format
847
+ if should_quote:
848
+ return f'{field}{operator.to_symbol()}"{value}"'
849
+ return f"{field}{operator.to_symbol()}{value}"
629
850
 
630
851
  def _func(fl: FlowDataEngine):
631
- is_advanced = filter_settings.filter_input.filter_type == 'advanced'
852
+ is_advanced = filter_settings.filter_input.is_advanced()
853
+
632
854
  if is_advanced:
855
+ predicate = filter_settings.filter_input.advanced_filter
633
856
  return fl.do_filter(predicate)
634
857
  else:
635
858
  basic_filter = filter_settings.filter_input.basic_filter
636
- if basic_filter.filter_value.isnumeric():
859
+ if basic_filter is None:
860
+ logger.warning("Basic filter is None, returning unfiltered data")
861
+ return fl
862
+
863
+ try:
637
864
  field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
638
- if field_data_type == 'str':
639
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
640
- else:
641
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}{basic_filter.filter_value}'
642
- else:
643
- _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
644
- filter_settings.filter_input.advanced_filter = _f
645
- return fl.do_filter(_f)
865
+ except Exception:
866
+ field_data_type = None
646
867
 
647
- self.add_node_step(filter_settings.node_id, _func,
648
- node_type='filter',
649
- renew_schema=False,
650
- setting_input=filter_settings,
651
- input_node_ids=[filter_settings.depending_on_id]
652
- )
868
+ expression = _build_basic_filter_expression(basic_filter, field_data_type)
869
+ filter_settings.filter_input.advanced_filter = expression
870
+ return fl.do_filter(expression)
871
+
872
+ self.add_node_step(
873
+ filter_settings.node_id,
874
+ _func,
875
+ node_type="filter",
876
+ renew_schema=False,
877
+ setting_input=filter_settings,
878
+ input_node_ids=[filter_settings.depending_on_id],
879
+ )
653
880
 
654
881
  def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
655
882
  """Adds a filter node to the graph.
@@ -661,11 +888,13 @@ class FlowGraph:
661
888
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
662
889
  return fl.get_record_count()
663
890
 
664
- self.add_node_step(node_id=node_number_of_records.node_id,
665
- function=_func,
666
- node_type='record_count',
667
- setting_input=node_number_of_records,
668
- input_node_ids=[node_number_of_records.depending_on_id])
891
+ self.add_node_step(
892
+ node_id=node_number_of_records.node_id,
893
+ function=_func,
894
+ node_type="record_count",
895
+ setting_input=node_number_of_records,
896
+ input_node_ids=[node_number_of_records.depending_on_id],
897
+ )
669
898
 
670
899
  def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
671
900
  """Adds a node that executes custom Polars code.
@@ -676,11 +905,14 @@ class FlowGraph:
676
905
 
677
906
  def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
678
907
  return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
679
- self.add_node_step(node_id=node_polars_code.node_id,
680
- function=_func,
681
- node_type='polars_code',
682
- setting_input=node_polars_code,
683
- input_node_ids=node_polars_code.depending_on_ids)
908
+
909
+ self.add_node_step(
910
+ node_id=node_polars_code.node_id,
911
+ function=_func,
912
+ node_type="polars_code",
913
+ setting_input=node_polars_code,
914
+ input_node_ids=node_polars_code.depending_on_ids,
915
+ )
684
916
 
685
917
  try:
686
918
  polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
@@ -688,9 +920,7 @@ class FlowGraph:
688
920
  node = self.get_node(node_id=node_polars_code.node_id)
689
921
  node.results.errors = str(e)
690
922
 
691
- def add_dependency_on_polars_lazy_frame(self,
692
- lazy_frame: pl.LazyFrame,
693
- node_id: int):
923
+ def add_dependency_on_polars_lazy_frame(self, lazy_frame: pl.LazyFrame, node_id: int):
694
924
  """Adds a special node that directly injects a Polars LazyFrame into the graph.
695
925
 
696
926
  Note: This is intended for backend use and will not work in the UI editor.
@@ -699,13 +929,16 @@ class FlowGraph:
699
929
  lazy_frame: The Polars LazyFrame to inject.
700
930
  node_id: The ID for the new node.
701
931
  """
932
+
702
933
  def _func():
703
934
  return FlowDataEngine(lazy_frame)
704
- node_promise = input_schema.NodePromise(flow_id=self.flow_id,
705
- node_id=node_id, node_type="polars_lazy_frame",
706
- is_setup=True)
707
- self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func,
708
- setting_input=node_promise)
935
+
936
+ node_promise = input_schema.NodePromise(
937
+ flow_id=self.flow_id, node_id=node_id, node_type="polars_lazy_frame", is_setup=True
938
+ )
939
+ self.add_node_step(
940
+ node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func, setting_input=node_promise
941
+ )
709
942
 
710
943
  def add_unique(self, unique_settings: input_schema.NodeUnique):
711
944
  """Adds a node to find and remove duplicate rows.
@@ -717,12 +950,14 @@ class FlowGraph:
717
950
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
718
951
  return fl.make_unique(unique_settings.unique_input)
719
952
 
720
- self.add_node_step(node_id=unique_settings.node_id,
721
- function=_func,
722
- input_columns=[],
723
- node_type='unique',
724
- setting_input=unique_settings,
725
- input_node_ids=[unique_settings.depending_on_id])
953
+ self.add_node_step(
954
+ node_id=unique_settings.node_id,
955
+ function=_func,
956
+ input_columns=[],
957
+ node_type="unique",
958
+ setting_input=unique_settings,
959
+ input_node_ids=[unique_settings.depending_on_id],
960
+ )
726
961
 
727
962
  def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
728
963
  """Adds a node that solves graph-like problems within the data.
@@ -735,14 +970,17 @@ class FlowGraph:
735
970
  graph_solver_settings: The settings object defining the graph inputs
736
971
  and the specific algorithm to apply.
737
972
  """
973
+
738
974
  def _func(fl: FlowDataEngine) -> FlowDataEngine:
739
975
  return fl.solve_graph(graph_solver_settings.graph_solver_input)
740
976
 
741
- self.add_node_step(node_id=graph_solver_settings.node_id,
742
- function=_func,
743
- node_type='graph_solver',
744
- setting_input=graph_solver_settings,
745
- input_node_ids=[graph_solver_settings.depending_on_id])
977
+ self.add_node_step(
978
+ node_id=graph_solver_settings.node_id,
979
+ function=_func,
980
+ node_type="graph_solver",
981
+ setting_input=graph_solver_settings,
982
+ input_node_ids=[graph_solver_settings.depending_on_id],
983
+ )
746
984
 
747
985
  def add_formula(self, function_settings: input_schema.NodeFormula):
748
986
  """Adds a node that applies a formula to create or modify a column.
@@ -757,24 +995,28 @@ class FlowGraph:
757
995
  else:
758
996
  output_type = None
759
997
  if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
760
- new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
761
- data_type=str(output_type))]
998
+ new_col = [
999
+ FlowfileColumn.from_input(column_name=function_settings.function.field.name, data_type=str(output_type))
1000
+ ]
762
1001
  else:
763
- new_col = [FlowfileColumn.from_input(function_settings.function.field.name, 'String')]
1002
+ new_col = [FlowfileColumn.from_input(function_settings.function.field.name, "String")]
764
1003
 
765
1004
  def _func(fl: FlowDataEngine):
766
- return fl.apply_sql_formula(func=function_settings.function.function,
767
- col_name=function_settings.function.field.name,
768
- output_data_type=output_type)
769
-
770
- self.add_node_step(function_settings.node_id, _func,
771
- output_schema=new_col,
772
- node_type='formula',
773
- renew_schema=False,
774
- setting_input=function_settings,
775
- input_node_ids=[function_settings.depending_on_id]
776
- )
777
- # TODO: Add validation here
1005
+ return fl.apply_sql_formula(
1006
+ func=function_settings.function.function,
1007
+ col_name=function_settings.function.field.name,
1008
+ output_data_type=output_type,
1009
+ )
1010
+
1011
+ self.add_node_step(
1012
+ function_settings.node_id,
1013
+ _func,
1014
+ output_schema=new_col,
1015
+ node_type="formula",
1016
+ renew_schema=False,
1017
+ setting_input=function_settings,
1018
+ input_node_ids=[function_settings.depending_on_id],
1019
+ )
778
1020
  if error != "":
779
1021
  node = self.get_node(function_settings.node_id)
780
1022
  node.results.errors = error
@@ -791,22 +1033,27 @@ class FlowGraph:
791
1033
  Returns:
792
1034
  The `FlowGraph` instance for method chaining.
793
1035
  """
1036
+
794
1037
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
795
1038
  for left_select in cross_join_settings.cross_join_input.left_select.renames:
796
1039
  left_select.is_available = True if left_select.old_name in main.schema else False
797
1040
  for right_select in cross_join_settings.cross_join_input.right_select.renames:
798
1041
  right_select.is_available = True if right_select.old_name in right.schema else False
799
- return main.do_cross_join(cross_join_input=cross_join_settings.cross_join_input,
800
- auto_generate_selection=cross_join_settings.auto_generate_selection,
801
- verify_integrity=False,
802
- other=right)
803
-
804
- self.add_node_step(node_id=cross_join_settings.node_id,
805
- function=_func,
806
- input_columns=[],
807
- node_type='cross_join',
808
- setting_input=cross_join_settings,
809
- input_node_ids=cross_join_settings.depending_on_ids)
1042
+ return main.do_cross_join(
1043
+ cross_join_input=cross_join_settings.cross_join_input,
1044
+ auto_generate_selection=cross_join_settings.auto_generate_selection,
1045
+ verify_integrity=False,
1046
+ other=right,
1047
+ )
1048
+
1049
+ self.add_node_step(
1050
+ node_id=cross_join_settings.node_id,
1051
+ function=_func,
1052
+ input_columns=[],
1053
+ node_type="cross_join",
1054
+ setting_input=cross_join_settings,
1055
+ input_node_ids=cross_join_settings.depending_on_ids,
1056
+ )
810
1057
  return self
811
1058
 
812
1059
  def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
@@ -818,22 +1065,27 @@ class FlowGraph:
818
1065
  Returns:
819
1066
  The `FlowGraph` instance for method chaining.
820
1067
  """
1068
+
821
1069
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
822
1070
  for left_select in join_settings.join_input.left_select.renames:
823
1071
  left_select.is_available = True if left_select.old_name in main.schema else False
824
1072
  for right_select in join_settings.join_input.right_select.renames:
825
1073
  right_select.is_available = True if right_select.old_name in right.schema else False
826
- return main.join(join_input=join_settings.join_input,
827
- auto_generate_selection=join_settings.auto_generate_selection,
828
- verify_integrity=False,
829
- other=right)
830
-
831
- self.add_node_step(node_id=join_settings.node_id,
832
- function=_func,
833
- input_columns=[],
834
- node_type='join',
835
- setting_input=join_settings,
836
- input_node_ids=join_settings.depending_on_ids)
1074
+ return main.join(
1075
+ join_input=join_settings.join_input,
1076
+ auto_generate_selection=join_settings.auto_generate_selection,
1077
+ verify_integrity=False,
1078
+ other=right,
1079
+ )
1080
+
1081
+ self.add_node_step(
1082
+ node_id=join_settings.node_id,
1083
+ function=_func,
1084
+ input_columns=[],
1085
+ node_type="join",
1086
+ setting_input=join_settings,
1087
+ input_node_ids=join_settings.depending_on_ids,
1088
+ )
837
1089
  return self
838
1090
 
839
1091
  def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
@@ -849,31 +1101,43 @@ class FlowGraph:
849
1101
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
850
1102
  node = self.get_node(node_id=fuzzy_settings.node_id)
851
1103
  if self.execution_location == "local":
852
- return main.fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
853
- other=right,
854
- node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id))
1104
+ return main.fuzzy_join(
1105
+ fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
1106
+ other=right,
1107
+ node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id),
1108
+ )
855
1109
 
856
- f = main.start_fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input), other=right, file_ref=node.hash,
857
- flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
1110
+ f = main.start_fuzzy_join(
1111
+ fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
1112
+ other=right,
1113
+ file_ref=node.hash,
1114
+ flow_id=self.flow_id,
1115
+ node_id=fuzzy_settings.node_id,
1116
+ )
858
1117
  logger.info("Started the fuzzy match action")
859
1118
  node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
860
1119
  return FlowDataEngine(f.get_result())
861
1120
 
862
1121
  def schema_callback():
863
- fm_input_copy = FuzzyMatchInputManager(fuzzy_settings.join_input) # Deepcopy create an unique object per func
1122
+ fm_input_copy = FuzzyMatchInputManager(
1123
+ fuzzy_settings.join_input
1124
+ ) # Deepcopy create an unique object per func
864
1125
  node = self.get_node(node_id=fuzzy_settings.node_id)
865
- return calculate_fuzzy_match_schema(fm_input_copy,
866
- left_schema=node.node_inputs.main_inputs[0].schema,
867
- right_schema=node.node_inputs.right_input.schema
868
- )
869
-
870
- self.add_node_step(node_id=fuzzy_settings.node_id,
871
- function=_func,
872
- input_columns=[],
873
- node_type='fuzzy_match',
874
- setting_input=fuzzy_settings,
875
- input_node_ids=fuzzy_settings.depending_on_ids,
876
- schema_callback=schema_callback)
1126
+ return calculate_fuzzy_match_schema(
1127
+ fm_input_copy,
1128
+ left_schema=node.node_inputs.main_inputs[0].schema,
1129
+ right_schema=node.node_inputs.right_input.schema,
1130
+ )
1131
+
1132
+ self.add_node_step(
1133
+ node_id=fuzzy_settings.node_id,
1134
+ function=_func,
1135
+ input_columns=[],
1136
+ node_type="fuzzy_match",
1137
+ setting_input=fuzzy_settings,
1138
+ input_node_ids=fuzzy_settings.depending_on_ids,
1139
+ schema_callback=schema_callback,
1140
+ )
877
1141
 
878
1142
  return self
879
1143
 
@@ -890,14 +1154,17 @@ class FlowGraph:
890
1154
  Returns:
891
1155
  The `FlowGraph` instance for method chaining.
892
1156
  """
1157
+
893
1158
  def _func(table: FlowDataEngine) -> FlowDataEngine:
894
1159
  return table.split(node_text_to_rows.text_to_rows_input)
895
1160
 
896
- self.add_node_step(node_id=node_text_to_rows.node_id,
897
- function=_func,
898
- node_type='text_to_rows',
899
- setting_input=node_text_to_rows,
900
- input_node_ids=[node_text_to_rows.depending_on_id])
1161
+ self.add_node_step(
1162
+ node_id=node_text_to_rows.node_id,
1163
+ function=_func,
1164
+ node_type="text_to_rows",
1165
+ setting_input=node_text_to_rows,
1166
+ input_node_ids=[node_text_to_rows.depending_on_id],
1167
+ )
901
1168
  return self
902
1169
 
903
1170
  def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
@@ -913,11 +1180,13 @@ class FlowGraph:
913
1180
  def _func(table: FlowDataEngine) -> FlowDataEngine:
914
1181
  return table.do_sort(sort_settings.sort_input)
915
1182
 
916
- self.add_node_step(node_id=sort_settings.node_id,
917
- function=_func,
918
- node_type='sort',
919
- setting_input=sort_settings,
920
- input_node_ids=[sort_settings.depending_on_id])
1183
+ self.add_node_step(
1184
+ node_id=sort_settings.node_id,
1185
+ function=_func,
1186
+ node_type="sort",
1187
+ setting_input=sort_settings,
1188
+ input_node_ids=[sort_settings.depending_on_id],
1189
+ )
921
1190
  return self
922
1191
 
923
1192
  def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
@@ -929,15 +1198,17 @@ class FlowGraph:
929
1198
  Returns:
930
1199
  The `FlowGraph` instance for method chaining.
931
1200
  """
1201
+
932
1202
  def _func(table: FlowDataEngine) -> FlowDataEngine:
933
1203
  return table.get_sample(sample_settings.sample_size)
934
1204
 
935
- self.add_node_step(node_id=sample_settings.node_id,
936
- function=_func,
937
- node_type='sample',
938
- setting_input=sample_settings,
939
- input_node_ids=[sample_settings.depending_on_id]
940
- )
1205
+ self.add_node_step(
1206
+ node_id=sample_settings.node_id,
1207
+ function=_func,
1208
+ node_type="sample",
1209
+ setting_input=sample_settings,
1210
+ input_node_ids=[sample_settings.depending_on_id],
1211
+ )
941
1212
  return self
942
1213
 
943
1214
  def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
@@ -954,12 +1225,13 @@ class FlowGraph:
954
1225
  def _func(table: FlowDataEngine) -> FlowDataEngine:
955
1226
  return table.add_record_id(record_id_settings.record_id_input)
956
1227
 
957
- self.add_node_step(node_id=record_id_settings.node_id,
958
- function=_func,
959
- node_type='record_id',
960
- setting_input=record_id_settings,
961
- input_node_ids=[record_id_settings.depending_on_id]
962
- )
1228
+ self.add_node_step(
1229
+ node_id=record_id_settings.node_id,
1230
+ function=_func,
1231
+ node_type="record_id",
1232
+ setting_input=record_id_settings,
1233
+ input_node_ids=[record_id_settings.depending_on_id],
1234
+ )
963
1235
  return self
964
1236
 
965
1237
  def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
@@ -991,16 +1263,19 @@ class FlowGraph:
991
1263
  for i in ids_to_remove:
992
1264
  v = select_cols.pop(i)
993
1265
  del v
994
- return table.do_select(select_inputs=transform_schema.SelectInputs(select_cols),
995
- keep_missing=select_settings.keep_missing)
996
-
997
- self.add_node_step(node_id=select_settings.node_id,
998
- function=_func,
999
- input_columns=[],
1000
- node_type='select',
1001
- drop_columns=list(drop_cols),
1002
- setting_input=select_settings,
1003
- input_node_ids=[select_settings.depending_on_id])
1266
+ return table.do_select(
1267
+ select_inputs=transform_schema.SelectInputs(select_cols), keep_missing=select_settings.keep_missing
1268
+ )
1269
+
1270
+ self.add_node_step(
1271
+ node_id=select_settings.node_id,
1272
+ function=_func,
1273
+ input_columns=[],
1274
+ node_type="select",
1275
+ drop_columns=list(drop_cols),
1276
+ setting_input=select_settings,
1277
+ input_node_ids=[select_settings.depending_on_id],
1278
+ )
1004
1279
  return self
1005
1280
 
1006
1281
  @property
@@ -1008,7 +1283,7 @@ class FlowGraph:
1008
1283
  """Checks if the graph has any nodes."""
1009
1284
  return len(self._node_ids) > 0
1010
1285
 
1011
- def delete_node(self, node_id: Union[int, str]):
1286
+ def delete_node(self, node_id: int | str):
1012
1287
  """Deletes a node from the graph and updates all its connections.
1013
1288
 
1014
1289
  Args:
@@ -1023,7 +1298,7 @@ class FlowGraph:
1023
1298
  if node:
1024
1299
  logger.info(f"Found node: {node_id}, processing deletion")
1025
1300
 
1026
- lead_to_steps: List[FlowNode] = node.leads_to_nodes
1301
+ lead_to_steps: list[FlowNode] = node.leads_to_nodes
1027
1302
  logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
1028
1303
 
1029
1304
  if len(lead_to_steps) > 0:
@@ -1032,7 +1307,7 @@ class FlowGraph:
1032
1307
  lead_to_step.delete_input_node(node_id, complete=True)
1033
1308
 
1034
1309
  if not node.is_start:
1035
- depends_on: List[FlowNode] = node.node_inputs.get_all_inputs()
1310
+ depends_on: list[FlowNode] = node.node_inputs.get_all_inputs()
1036
1311
  logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
1037
1312
 
1038
1313
  for depend_on in depends_on:
@@ -1052,18 +1327,20 @@ class FlowGraph:
1052
1327
  """Checks if the graph has an initial input data source."""
1053
1328
  return self._input_data is not None
1054
1329
 
1055
- def add_node_step(self,
1056
- node_id: Union[int, str],
1057
- function: Callable,
1058
- input_columns: List[str] = None,
1059
- output_schema: List[FlowfileColumn] = None,
1060
- node_type: str = None,
1061
- drop_columns: List[str] = None,
1062
- renew_schema: bool = True,
1063
- setting_input: Any = None,
1064
- cache_results: bool = None,
1065
- schema_callback: Callable = None,
1066
- input_node_ids: List[int] = None) -> FlowNode:
1330
+ def add_node_step(
1331
+ self,
1332
+ node_id: int | str,
1333
+ function: Callable,
1334
+ input_columns: list[str] = None,
1335
+ output_schema: list[FlowfileColumn] = None,
1336
+ node_type: str = None,
1337
+ drop_columns: list[str] = None,
1338
+ renew_schema: bool = True,
1339
+ setting_input: Any = None,
1340
+ cache_results: bool = None,
1341
+ schema_callback: Callable = None,
1342
+ input_node_ids: list[int] = None,
1343
+ ) -> FlowNode:
1067
1344
  """The core method for adding or updating a node in the graph.
1068
1345
 
1069
1346
  Args:
@@ -1096,29 +1373,33 @@ class FlowGraph:
1096
1373
  if isinstance(input_columns, str):
1097
1374
  input_columns = [input_columns]
1098
1375
  if (
1099
- input_nodes is not None or
1100
- function.__name__ in ('placeholder', 'analysis_preparation') or
1101
- node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
1376
+ input_nodes is not None
1377
+ or function.__name__ in ("placeholder", "analysis_preparation")
1378
+ or node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
1102
1379
  ):
1103
1380
  if not existing_node:
1104
- node = FlowNode(node_id=node_id,
1105
- function=function,
1106
- output_schema=output_schema,
1107
- input_columns=input_columns,
1108
- drop_columns=drop_columns,
1109
- renew_schema=renew_schema,
1110
- setting_input=setting_input,
1111
- node_type=node_type,
1112
- name=function.__name__,
1113
- schema_callback=schema_callback,
1114
- parent_uuid=self.uuid)
1381
+ node = FlowNode(
1382
+ node_id=node_id,
1383
+ function=function,
1384
+ output_schema=output_schema,
1385
+ input_columns=input_columns,
1386
+ drop_columns=drop_columns,
1387
+ renew_schema=renew_schema,
1388
+ setting_input=setting_input,
1389
+ node_type=node_type,
1390
+ name=function.__name__,
1391
+ schema_callback=schema_callback,
1392
+ parent_uuid=self.uuid,
1393
+ )
1115
1394
  else:
1116
- existing_node.update_node(function=function,
1117
- output_schema=output_schema,
1118
- input_columns=input_columns,
1119
- drop_columns=drop_columns,
1120
- setting_input=setting_input,
1121
- schema_callback=schema_callback)
1395
+ existing_node.update_node(
1396
+ function=function,
1397
+ output_schema=output_schema,
1398
+ input_columns=input_columns,
1399
+ drop_columns=drop_columns,
1400
+ setting_input=setting_input,
1401
+ schema_callback=schema_callback,
1402
+ )
1122
1403
  node = existing_node
1123
1404
  else:
1124
1405
  raise Exception("No data initialized")
@@ -1126,7 +1407,7 @@ class FlowGraph:
1126
1407
  self._node_ids.append(node_id)
1127
1408
  return node
1128
1409
 
1129
- def add_include_cols(self, include_columns: List[str]):
1410
+ def add_include_cols(self, include_columns: list[str]):
1130
1411
  """Adds columns to both the input and output column lists.
1131
1412
 
1132
1413
  Args:
@@ -1147,23 +1428,30 @@ class FlowGraph:
1147
1428
  """
1148
1429
 
1149
1430
  def _func(df: FlowDataEngine):
1150
- execute_remote = self.execution_location != 'local'
1151
- df.output(output_fs=output_file.output_settings, flow_id=self.flow_id, node_id=output_file.node_id,
1152
- execute_remote=execute_remote)
1431
+ execute_remote = self.execution_location != "local"
1432
+ df.output(
1433
+ output_fs=output_file.output_settings,
1434
+ flow_id=self.flow_id,
1435
+ node_id=output_file.node_id,
1436
+ execute_remote=execute_remote,
1437
+ )
1153
1438
  return df
1154
1439
 
1155
1440
  def schema_callback():
1156
1441
  input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
1157
1442
 
1158
1443
  return input_node.schema
1159
- input_node_id = getattr(output_file, "depending_on_id") if hasattr(output_file, 'depending_on_id') else None
1160
- self.add_node_step(node_id=output_file.node_id,
1161
- function=_func,
1162
- input_columns=[],
1163
- node_type='output',
1164
- setting_input=output_file,
1165
- schema_callback=schema_callback,
1166
- input_node_ids=[input_node_id])
1444
+
1445
+ input_node_id = output_file.depending_on_id if hasattr(output_file, "depending_on_id") else None
1446
+ self.add_node_step(
1447
+ node_id=output_file.node_id,
1448
+ function=_func,
1449
+ input_columns=[],
1450
+ node_type="output",
1451
+ setting_input=output_file,
1452
+ schema_callback=schema_callback,
1453
+ input_node_ids=[input_node_id],
1454
+ )
1167
1455
 
1168
1456
  def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
1169
1457
  """Adds a node to write data to a database.
@@ -1172,18 +1460,20 @@ class FlowGraph:
1172
1460
  node_database_writer: The settings for the database writer node.
1173
1461
  """
1174
1462
 
1175
- node_type = 'database_writer'
1463
+ node_type = "database_writer"
1176
1464
  database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
1177
- database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
1178
- if database_settings.connection_mode == 'inline':
1465
+ database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
1466
+ if database_settings.connection_mode == "inline":
1179
1467
  database_connection: input_schema.DatabaseConnection = database_settings.database_connection
1180
- encrypted_password = get_encrypted_secret(current_user_id=node_database_writer.user_id,
1181
- secret_name=database_connection.password_ref)
1468
+ encrypted_password = get_encrypted_secret(
1469
+ current_user_id=node_database_writer.user_id, secret_name=database_connection.password_ref
1470
+ )
1182
1471
  if encrypted_password is None:
1183
1472
  raise HTTPException(status_code=400, detail="Password not found")
1184
1473
  else:
1185
- database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
1186
- node_database_writer.user_id)
1474
+ database_reference_settings = get_local_database_connection(
1475
+ database_settings.database_connection_name, node_database_writer.user_id
1476
+ )
1187
1477
  encrypted_password = database_reference_settings.password.get_secret_value()
1188
1478
 
1189
1479
  def _func(df: FlowDataEngine):
@@ -1192,14 +1482,20 @@ class FlowGraph:
1192
1482
  sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
1193
1483
  node_database_writer=node_database_writer,
1194
1484
  password=encrypted_password,
1195
- table_name=(database_settings.schema_name+'.'+database_settings.table_name
1196
- if database_settings.schema_name else database_settings.table_name),
1197
- database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
1198
- else None),
1199
- lf=df.data_frame
1485
+ table_name=(
1486
+ database_settings.schema_name + "." + database_settings.table_name
1487
+ if database_settings.schema_name
1488
+ else database_settings.table_name
1489
+ ),
1490
+ database_reference_settings=(
1491
+ database_reference_settings if database_settings.connection_mode == "reference" else None
1492
+ ),
1493
+ lf=df.data_frame,
1200
1494
  )
1201
1495
  )
1202
- external_database_writer = ExternalDatabaseWriter(database_external_write_settings, wait_on_completion=False)
1496
+ external_database_writer = ExternalDatabaseWriter(
1497
+ database_external_write_settings, wait_on_completion=False
1498
+ )
1203
1499
  node._fetch_cached_df = external_database_writer
1204
1500
  external_database_writer.get_result()
1205
1501
  return df
@@ -1226,56 +1522,64 @@ class FlowGraph:
1226
1522
  """
1227
1523
 
1228
1524
  logger.info("Adding database reader")
1229
- node_type = 'database_reader'
1525
+ node_type = "database_reader"
1230
1526
  database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
1231
- database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
1232
- if database_settings.connection_mode == 'inline':
1527
+ database_connection: input_schema.DatabaseConnection | input_schema.FullDatabaseConnection | None
1528
+ if database_settings.connection_mode == "inline":
1233
1529
  database_connection: input_schema.DatabaseConnection = database_settings.database_connection
1234
- encrypted_password = get_encrypted_secret(current_user_id=node_database_reader.user_id,
1235
- secret_name=database_connection.password_ref)
1530
+ encrypted_password = get_encrypted_secret(
1531
+ current_user_id=node_database_reader.user_id, secret_name=database_connection.password_ref
1532
+ )
1236
1533
  if encrypted_password is None:
1237
1534
  raise HTTPException(status_code=400, detail="Password not found")
1238
1535
  else:
1239
- database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
1240
- node_database_reader.user_id)
1536
+ database_reference_settings = get_local_database_connection(
1537
+ database_settings.database_connection_name, node_database_reader.user_id
1538
+ )
1241
1539
  database_connection = database_reference_settings
1242
1540
  encrypted_password = database_reference_settings.password.get_secret_value()
1243
1541
 
1244
1542
  def _func():
1245
- sql_source = BaseSqlSource(query=None if database_settings.query_mode == 'table' else database_settings.query,
1246
- table_name=database_settings.table_name,
1247
- schema_name=database_settings.schema_name,
1248
- fields=node_database_reader.fields,
1249
- )
1543
+ sql_source = BaseSqlSource(
1544
+ query=None if database_settings.query_mode == "table" else database_settings.query,
1545
+ table_name=database_settings.table_name,
1546
+ schema_name=database_settings.schema_name,
1547
+ fields=node_database_reader.fields,
1548
+ )
1250
1549
  database_external_read_settings = (
1251
1550
  sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
1252
1551
  node_database_reader=node_database_reader,
1253
1552
  password=encrypted_password,
1254
1553
  query=sql_source.query,
1255
- database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
1256
- else None),
1554
+ database_reference_settings=(
1555
+ database_reference_settings if database_settings.connection_mode == "reference" else None
1556
+ ),
1257
1557
  )
1258
1558
  )
1259
1559
 
1260
- external_database_fetcher = ExternalDatabaseFetcher(database_external_read_settings, wait_on_completion=False)
1560
+ external_database_fetcher = ExternalDatabaseFetcher(
1561
+ database_external_read_settings, wait_on_completion=False
1562
+ )
1261
1563
  node._fetch_cached_df = external_database_fetcher
1262
1564
  fl = FlowDataEngine(external_database_fetcher.get_result())
1263
1565
  node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
1264
1566
  return fl
1265
1567
 
1266
1568
  def schema_callback():
1267
- sql_source = SqlSource(connection_string=
1268
- sql_utils.construct_sql_uri(database_type=database_connection.database_type,
1269
- host=database_connection.host,
1270
- port=database_connection.port,
1271
- database=database_connection.database,
1272
- username=database_connection.username,
1273
- password=decrypt_secret(encrypted_password)),
1274
- query=None if database_settings.query_mode == 'table' else database_settings.query,
1275
- table_name=database_settings.table_name,
1276
- schema_name=database_settings.schema_name,
1277
- fields=node_database_reader.fields,
1278
- )
1569
+ sql_source = SqlSource(
1570
+ connection_string=sql_utils.construct_sql_uri(
1571
+ database_type=database_connection.database_type,
1572
+ host=database_connection.host,
1573
+ port=database_connection.port,
1574
+ database=database_connection.database,
1575
+ username=database_connection.username,
1576
+ password=decrypt_secret(encrypted_password),
1577
+ ),
1578
+ query=None if database_settings.query_mode == "table" else database_settings.query,
1579
+ table_name=database_settings.table_name,
1580
+ schema_name=database_settings.schema_name,
1581
+ fields=node_database_reader.fields,
1582
+ )
1279
1583
  return sql_source.get_schema()
1280
1584
 
1281
1585
  node = self.get_node(node_database_reader.node_id)
@@ -1285,16 +1589,20 @@ class FlowGraph:
1285
1589
  node.function = _func
1286
1590
  node.setting_input = node_database_reader
1287
1591
  node.node_settings.cache_results = node_database_reader.cache_results
1288
- if node_database_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1289
- self._flow_starts.append(node)
1592
+ self.add_node_to_starting_list(node)
1290
1593
  node.schema_callback = schema_callback
1291
1594
  else:
1292
- node = FlowNode(node_database_reader.node_id, function=_func,
1293
- setting_input=node_database_reader,
1294
- name=node_type, node_type=node_type, parent_uuid=self.uuid,
1295
- schema_callback=schema_callback)
1595
+ node = FlowNode(
1596
+ node_database_reader.node_id,
1597
+ function=_func,
1598
+ setting_input=node_database_reader,
1599
+ name=node_type,
1600
+ node_type=node_type,
1601
+ parent_uuid=self.uuid,
1602
+ schema_callback=schema_callback,
1603
+ )
1296
1604
  self._node_db[node_database_reader.node_id] = node
1297
- self._flow_starts.append(node)
1605
+ self.add_node_to_starting_list(node)
1298
1606
  self._node_ids.append(node_database_reader.node_id)
1299
1607
 
1300
1608
  def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
@@ -1305,7 +1613,7 @@ class FlowGraph:
1305
1613
  Args:
1306
1614
  external_source_input: The settings for the external SQL source node.
1307
1615
  """
1308
- logger.info('Adding sql source')
1616
+ logger.info("Adding sql source")
1309
1617
  self.add_external_source(external_source_input)
1310
1618
 
1311
1619
  def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
@@ -1316,27 +1624,30 @@ class FlowGraph:
1316
1624
  """
1317
1625
 
1318
1626
  node_type = "cloud_storage_writer"
1627
+
1319
1628
  def _func(df: FlowDataEngine):
1320
1629
  df.lazy = True
1321
- execute_remote = self.execution_location != 'local'
1630
+ execute_remote = self.execution_location != "local"
1322
1631
  cloud_connection_settings = get_cloud_connection_settings(
1323
1632
  connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
1324
1633
  user_id=node_cloud_storage_writer.user_id,
1325
- auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
1634
+ auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode,
1326
1635
  )
1327
1636
  full_cloud_storage_connection = FullCloudStorageConnection(
1328
1637
  storage_type=cloud_connection_settings.storage_type,
1329
1638
  auth_method=cloud_connection_settings.auth_method,
1330
1639
  aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
1331
- **CloudStorageReader.get_storage_options(cloud_connection_settings)
1640
+ **CloudStorageReader.get_storage_options(cloud_connection_settings),
1332
1641
  )
1333
1642
  if execute_remote:
1334
1643
  settings = get_cloud_storage_write_settings_worker_interface(
1335
1644
  write_settings=node_cloud_storage_writer.cloud_storage_settings,
1336
1645
  connection=full_cloud_storage_connection,
1337
1646
  lf=df.data_frame,
1647
+ user_id=node_cloud_storage_writer.user_id,
1338
1648
  flowfile_node_id=node_cloud_storage_writer.node_id,
1339
- flowfile_flow_id=self.flow_id)
1649
+ flowfile_flow_id=self.flow_id,
1650
+ )
1340
1651
  external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
1341
1652
  node._fetch_cached_df = external_database_writer
1342
1653
  external_database_writer.get_result()
@@ -1362,7 +1673,7 @@ class FlowGraph:
1362
1673
  node_type=node_type,
1363
1674
  setting_input=node_cloud_storage_writer,
1364
1675
  schema_callback=schema_callback,
1365
- input_node_ids=[node_cloud_storage_writer.depending_on_id]
1676
+ input_node_ids=[node_cloud_storage_writer.depending_on_id],
1366
1677
  )
1367
1678
 
1368
1679
  node = self.get_node(node_cloud_storage_writer.node_id)
@@ -1380,49 +1691,53 @@ class FlowGraph:
1380
1691
  def _func():
1381
1692
  logger.info("Starting to run the schema callback for cloud storage reader")
1382
1693
  self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
1383
- settings = CloudStorageReadSettingsInternal(read_settings=cloud_storage_read_settings,
1384
- connection=get_cloud_connection_settings(
1385
- connection_name=cloud_storage_read_settings.connection_name,
1386
- user_id=node_cloud_storage_reader.user_id,
1387
- auth_mode=cloud_storage_read_settings.auth_mode
1388
- ))
1694
+ settings = CloudStorageReadSettingsInternal(
1695
+ read_settings=cloud_storage_read_settings,
1696
+ connection=get_cloud_connection_settings(
1697
+ connection_name=cloud_storage_read_settings.connection_name,
1698
+ user_id=node_cloud_storage_reader.user_id,
1699
+ auth_mode=cloud_storage_read_settings.auth_mode,
1700
+ ),
1701
+ )
1389
1702
  fl = FlowDataEngine.from_cloud_storage_obj(settings)
1390
1703
  return fl
1391
1704
 
1392
- node = self.add_node_step(node_id=node_cloud_storage_reader.node_id,
1393
- function=_func,
1394
- cache_results=node_cloud_storage_reader.cache_results,
1395
- setting_input=node_cloud_storage_reader,
1396
- node_type=node_type,
1397
- )
1398
- if node_cloud_storage_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1399
- self._flow_starts.append(node)
1705
+ node = self.add_node_step(
1706
+ node_id=node_cloud_storage_reader.node_id,
1707
+ function=_func,
1708
+ cache_results=node_cloud_storage_reader.cache_results,
1709
+ setting_input=node_cloud_storage_reader,
1710
+ node_type=node_type,
1711
+ )
1712
+ self.add_node_to_starting_list(node)
1400
1713
 
1401
- def add_external_source(self,
1402
- external_source_input: input_schema.NodeExternalSource):
1714
+ def add_external_source(self, external_source_input: input_schema.NodeExternalSource):
1403
1715
  """Adds a node for a custom external data source.
1404
1716
 
1405
1717
  Args:
1406
1718
  external_source_input: The settings for the external source node.
1407
1719
  """
1408
1720
 
1409
- node_type = 'external_source'
1721
+ node_type = "external_source"
1410
1722
  external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
1411
- source_settings = (getattr(input_schema, snake_case_to_camel_case(external_source_input.identifier)).
1412
- model_validate(external_source_input.source_settings))
1413
- if hasattr(external_source_script, 'initial_getter'):
1414
- initial_getter = getattr(external_source_script, 'initial_getter')(source_settings)
1723
+ source_settings = getattr(
1724
+ input_schema, snake_case_to_camel_case(external_source_input.identifier)
1725
+ ).model_validate(external_source_input.source_settings)
1726
+ if hasattr(external_source_script, "initial_getter"):
1727
+ initial_getter = external_source_script.initial_getter(source_settings)
1415
1728
  else:
1416
1729
  initial_getter = None
1417
1730
  data_getter = external_source_script.getter(source_settings)
1418
- external_source = data_source_factory(source_type='custom',
1419
- data_getter=data_getter,
1420
- initial_data_getter=initial_getter,
1421
- orientation=external_source_input.source_settings.orientation,
1422
- schema=None)
1731
+ external_source = data_source_factory(
1732
+ source_type="custom",
1733
+ data_getter=data_getter,
1734
+ initial_data_getter=initial_getter,
1735
+ orientation=external_source_input.source_settings.orientation,
1736
+ schema=None,
1737
+ )
1423
1738
 
1424
1739
  def _func():
1425
- logger.info('Calling external source')
1740
+ logger.info("Calling external source")
1426
1741
  fl = FlowDataEngine.create_from_external_source(external_source=external_source)
1427
1742
  external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
1428
1743
  return fl
@@ -1434,31 +1749,39 @@ class FlowGraph:
1434
1749
  node.function = _func
1435
1750
  node.setting_input = external_source_input
1436
1751
  node.node_settings.cache_results = external_source_input.cache_results
1437
- if external_source_input.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1438
- self._flow_starts.append(node)
1752
+ self.add_node_to_starting_list(node)
1753
+
1439
1754
  else:
1440
- node = FlowNode(external_source_input.node_id, function=_func,
1441
- setting_input=external_source_input,
1442
- name=node_type, node_type=node_type, parent_uuid=self.uuid)
1755
+ node = FlowNode(
1756
+ external_source_input.node_id,
1757
+ function=_func,
1758
+ setting_input=external_source_input,
1759
+ name=node_type,
1760
+ node_type=node_type,
1761
+ parent_uuid=self.uuid,
1762
+ )
1443
1763
  self._node_db[external_source_input.node_id] = node
1444
- self._flow_starts.append(node)
1764
+ self.add_node_to_starting_list(node)
1445
1765
  self._node_ids.append(external_source_input.node_id)
1446
1766
  if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
1447
- logger.info('Using provided schema in the node')
1767
+ logger.info("Using provided schema in the node")
1448
1768
 
1449
1769
  def schema_callback():
1450
- return [FlowfileColumn.from_input(f.name, f.data_type) for f in
1451
- external_source_input.source_settings.fields]
1770
+ return [
1771
+ FlowfileColumn.from_input(f.name, f.data_type) for f in external_source_input.source_settings.fields
1772
+ ]
1452
1773
 
1453
1774
  node.schema_callback = schema_callback
1454
1775
  else:
1455
- logger.warning('Removing schema')
1776
+ logger.warning("Removing schema")
1456
1777
  node._schema_callback = None
1457
- self.add_node_step(node_id=external_source_input.node_id,
1458
- function=_func,
1459
- input_columns=[],
1460
- node_type=node_type,
1461
- setting_input=external_source_input)
1778
+ self.add_node_step(
1779
+ node_id=external_source_input.node_id,
1780
+ function=_func,
1781
+ input_columns=[],
1782
+ node_type=node_type,
1783
+ setting_input=external_source_input,
1784
+ )
1462
1785
 
1463
1786
  def add_read(self, input_file: input_schema.NodeRead):
1464
1787
  """Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
@@ -1466,8 +1789,10 @@ class FlowGraph:
1466
1789
  Args:
1467
1790
  input_file: The settings for the read operation.
1468
1791
  """
1469
- if (input_file.received_file.file_type in ('xlsx', 'excel') and
1470
- input_file.received_file.table_settings.sheet_name == ''):
1792
+ if (
1793
+ input_file.received_file.file_type in ("xlsx", "excel")
1794
+ and input_file.received_file.table_settings.sheet_name == ""
1795
+ ):
1471
1796
  sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
1472
1797
  input_file.received_file.table_settings.sheet_name = sheet_name
1473
1798
 
@@ -1476,14 +1801,17 @@ class FlowGraph:
1476
1801
 
1477
1802
  def _func():
1478
1803
  input_file.received_file.set_absolute_filepath()
1479
- if input_file.received_file.file_type == 'parquet':
1804
+ if input_file.received_file.file_type == "parquet":
1480
1805
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1481
- elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.table_settings.encoding:
1806
+ elif (
1807
+ input_file.received_file.file_type == "csv"
1808
+ and "utf" in input_file.received_file.table_settings.encoding
1809
+ ):
1482
1810
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1483
1811
  else:
1484
- input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
1485
- node_id=input_file.node_id,
1486
- flow_id=self.flow_id)
1812
+ input_data = FlowDataEngine.create_from_path_worker(
1813
+ input_file.received_file, node_id=input_file.node_id, flow_id=self.flow_id
1814
+ )
1487
1815
  input_data.name = input_file.received_file.name
1488
1816
  return input_data
1489
1817
 
@@ -1491,51 +1819,58 @@ class FlowGraph:
1491
1819
  schema_callback = None
1492
1820
  if node:
1493
1821
  start_hash = node.hash
1494
- node.node_type = 'read'
1495
- node.name = 'read'
1822
+ node.node_type = "read"
1823
+ node.name = "read"
1496
1824
  node.function = _func
1497
1825
  node.setting_input = input_file
1498
- if input_file.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1499
- self._flow_starts.append(node)
1826
+ self.add_node_to_starting_list(node)
1500
1827
 
1501
1828
  if start_hash != node.hash:
1502
- logger.info('Hash changed, updating schema')
1829
+ logger.info("Hash changed, updating schema")
1503
1830
  if len(received_file.fields) > 0:
1504
1831
  # If the file has fields defined, we can use them to create the schema
1505
1832
  def schema_callback():
1506
1833
  return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
1507
1834
 
1508
- elif input_file.received_file.file_type in ('csv', 'json', 'parquet'):
1835
+ elif input_file.received_file.file_type in ("csv", "json", "parquet"):
1509
1836
  # everything that can be scanned by polars
1510
1837
  def schema_callback():
1511
1838
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1512
1839
  return input_data.schema
1513
1840
 
1514
- elif input_file.received_file.file_type in ('xlsx', 'excel'):
1841
+ elif input_file.received_file.file_type in ("xlsx", "excel"):
1515
1842
  # If the file is an Excel file, we need to use the openpyxl engine to read the schema
1516
- schema_callback = get_xlsx_schema_callback(engine='openpyxl',
1517
- file_path=received_file.file_path,
1518
- sheet_name=received_file.table_settings.sheet_name,
1519
- start_row=received_file.table_settings.start_row,
1520
- end_row=received_file.table_settings.end_row,
1521
- start_column=received_file.table_settings.start_column,
1522
- end_column=received_file.table_settings.end_column,
1523
- has_headers=received_file.table_settings.has_headers)
1843
+ schema_callback = get_xlsx_schema_callback(
1844
+ engine="openpyxl",
1845
+ file_path=received_file.file_path,
1846
+ sheet_name=received_file.table_settings.sheet_name,
1847
+ start_row=received_file.table_settings.start_row,
1848
+ end_row=received_file.table_settings.end_row,
1849
+ start_column=received_file.table_settings.start_column,
1850
+ end_column=received_file.table_settings.end_column,
1851
+ has_headers=received_file.table_settings.has_headers,
1852
+ )
1524
1853
  else:
1525
1854
  schema_callback = None
1526
1855
  else:
1527
- node = FlowNode(input_file.node_id, function=_func,
1528
- setting_input=input_file,
1529
- name='read', node_type='read', parent_uuid=self.uuid)
1856
+ node = FlowNode(
1857
+ input_file.node_id,
1858
+ function=_func,
1859
+ setting_input=input_file,
1860
+ name="read",
1861
+ node_type="read",
1862
+ parent_uuid=self.uuid,
1863
+ )
1530
1864
  self._node_db[input_file.node_id] = node
1531
- self._flow_starts.append(node)
1865
+ self.add_node_to_starting_list(node)
1532
1866
  self._node_ids.append(input_file.node_id)
1533
1867
 
1534
1868
  if schema_callback is not None:
1535
1869
  node.schema_callback = schema_callback
1870
+ node.user_provided_schema_callback = schema_callback
1536
1871
  return self
1537
1872
 
1538
- def add_datasource(self, input_file: Union[input_schema.NodeDatasource, input_schema.NodeManualInput]) -> "FlowGraph":
1873
+ def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
1539
1874
  """Adds a data source node to the graph.
1540
1875
 
1541
1876
  This method serves as a factory for creating starting nodes, handling both
@@ -1549,25 +1884,30 @@ class FlowGraph:
1549
1884
  """
1550
1885
  if isinstance(input_file, input_schema.NodeManualInput):
1551
1886
  input_data = FlowDataEngine(input_file.raw_data_format)
1552
- ref = 'manual_input'
1887
+ ref = "manual_input"
1553
1888
  else:
1554
1889
  input_data = FlowDataEngine(path_ref=input_file.file_ref)
1555
- ref = 'datasource'
1890
+ ref = "datasource"
1556
1891
  node = self.get_node(input_file.node_id)
1557
1892
  if node:
1558
1893
  node.node_type = ref
1559
1894
  node.name = ref
1560
1895
  node.function = input_data
1561
1896
  node.setting_input = input_file
1562
- if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
1563
- self._flow_starts.append(node)
1897
+ self.add_node_to_starting_list(node)
1898
+
1564
1899
  else:
1565
1900
  input_data.collect()
1566
- node = FlowNode(input_file.node_id, function=input_data,
1567
- setting_input=input_file,
1568
- name=ref, node_type=ref, parent_uuid=self.uuid)
1901
+ node = FlowNode(
1902
+ input_file.node_id,
1903
+ function=input_data,
1904
+ setting_input=input_file,
1905
+ name=ref,
1906
+ node_type=ref,
1907
+ parent_uuid=self.uuid,
1908
+ )
1569
1909
  self._node_db[input_file.node_id] = node
1570
- self._flow_starts.append(node)
1910
+ self.add_node_to_starting_list(node)
1571
1911
  self._node_ids.append(input_file.node_id)
1572
1912
  return self
1573
1913
 
@@ -1582,7 +1922,7 @@ class FlowGraph:
1582
1922
  self.add_datasource(input_file)
1583
1923
 
1584
1924
  @property
1585
- def nodes(self) -> List[FlowNode]:
1925
+ def nodes(self) -> list[FlowNode]:
1586
1926
  """Gets a list of all FlowNode objects in the graph."""
1587
1927
 
1588
1928
  return list(self._node_db.values())
@@ -1592,7 +1932,7 @@ class FlowGraph:
1592
1932
  """Gets the current execution mode ('Development' or 'Performance')."""
1593
1933
  return self.flow_settings.execution_mode
1594
1934
 
1595
- def get_implicit_starter_nodes(self) -> List[FlowNode]:
1935
+ def get_implicit_starter_nodes(self) -> list[FlowNode]:
1596
1936
  """Finds nodes that can act as starting points but are not explicitly defined as such.
1597
1937
 
1598
1938
  Some nodes, like the Polars Code node, can function without an input. This
@@ -1638,24 +1978,31 @@ class FlowGraph:
1638
1978
  if not flow_node:
1639
1979
  raise Exception("Node not found found")
1640
1980
  skip_nodes, execution_order = compute_execution_plan(
1641
- nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1981
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
1642
1982
  )
1643
1983
  if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
1644
1984
  raise Exception("Node can not be executed because it does not have it's inputs")
1645
1985
 
1646
- def create_initial_run_information(self, number_of_nodes: int,
1647
- run_type: Literal["fetch_one", "full_run"]):
1986
+ def create_initial_run_information(self, number_of_nodes: int, run_type: Literal["fetch_one", "full_run"]):
1648
1987
  return RunInformation(
1649
- flow_id=self.flow_id, start_time=datetime.datetime.now(), end_time=None,
1650
- success=None, number_of_nodes=number_of_nodes, node_step_result=[],
1651
- run_type=run_type
1988
+ flow_id=self.flow_id,
1989
+ start_time=datetime.datetime.now(),
1990
+ end_time=None,
1991
+ success=None,
1992
+ number_of_nodes=number_of_nodes,
1993
+ node_step_result=[],
1994
+ run_type=run_type,
1652
1995
  )
1653
1996
 
1654
1997
  def create_empty_run_information(self) -> RunInformation:
1655
1998
  return RunInformation(
1656
- flow_id=self.flow_id, start_time=None, end_time=None,
1657
- success=None, number_of_nodes=0, node_step_result=[],
1658
- run_type="init"
1999
+ flow_id=self.flow_id,
2000
+ start_time=None,
2001
+ end_time=None,
2002
+ success=None,
2003
+ number_of_nodes=0,
2004
+ node_step_result=[],
2005
+ run_type="init",
1659
2006
  )
1660
2007
 
1661
2008
  def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
@@ -1669,14 +2016,16 @@ class FlowGraph:
1669
2016
  self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
1670
2017
  node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
1671
2018
  node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
1672
- logger.info(f'Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}')
2019
+ logger.info(f"Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}")
1673
2020
  try:
1674
2021
  self.latest_run_info.node_step_result.append(node_result)
1675
- flow_node.execute_node(run_location=self.flow_settings.execution_location,
1676
- performance_mode=False,
1677
- node_logger=node_logger,
1678
- optimize_for_downstream=False,
1679
- reset_cache=True)
2022
+ flow_node.execute_node(
2023
+ run_location=self.flow_settings.execution_location,
2024
+ performance_mode=False,
2025
+ node_logger=node_logger,
2026
+ optimize_for_downstream=False,
2027
+ reset_cache=True,
2028
+ )
1680
2029
  node_result.error = str(flow_node.results.errors)
1681
2030
  if self.flow_settings.is_canceled:
1682
2031
  node_result.success = None
@@ -1691,12 +2040,12 @@ class FlowGraph:
1691
2040
  self.flow_settings.is_running = False
1692
2041
  return self.get_run_info()
1693
2042
  except Exception as e:
1694
- node_result.error = 'Node did not run'
2043
+ node_result.error = "Node did not run"
1695
2044
  node_result.success = False
1696
2045
  node_result.end_timestamp = time()
1697
2046
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1698
2047
  node_result.is_running = False
1699
- node_logger.error(f'Error in node {flow_node.node_id}: {e}')
2048
+ node_logger.error(f"Error in node {flow_node.node_id}: {e}")
1700
2049
  finally:
1701
2050
  self.flow_settings.is_running = False
1702
2051
 
@@ -1713,39 +2062,38 @@ class FlowGraph:
1713
2062
  Exception: If the flow is already running.
1714
2063
  """
1715
2064
  if self.flow_settings.is_running:
1716
- raise Exception('Flow is already running')
2065
+ raise Exception("Flow is already running")
1717
2066
  try:
1718
-
1719
2067
  self.flow_settings.is_running = True
1720
2068
  self.flow_settings.is_canceled = False
1721
2069
  self.flow_logger.clear_log_file()
1722
- self.flow_logger.info('Starting to run flowfile flow...')
1723
-
2070
+ self.flow_logger.info("Starting to run flowfile flow...")
1724
2071
  skip_nodes, execution_order = compute_execution_plan(
1725
- nodes=self.nodes,
1726
- flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
2072
+ nodes=self.nodes, flow_starts=self._flow_starts + self.get_implicit_starter_nodes()
1727
2073
  )
1728
2074
 
1729
2075
  self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
1730
2076
 
1731
2077
  skip_node_message(self.flow_logger, skip_nodes)
1732
2078
  execution_order_message(self.flow_logger, execution_order)
1733
- performance_mode = self.flow_settings.execution_mode == 'Performance'
2079
+ performance_mode = self.flow_settings.execution_mode == "Performance"
1734
2080
 
1735
2081
  for node in execution_order:
1736
2082
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1737
2083
  if self.flow_settings.is_canceled:
1738
- self.flow_logger.info('Flow canceled')
2084
+ self.flow_logger.info("Flow canceled")
1739
2085
  break
1740
2086
  if node in skip_nodes:
1741
- node_logger.info(f'Skipping node {node.node_id}')
2087
+ node_logger.info(f"Skipping node {node.node_id}")
1742
2088
  continue
1743
2089
  node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1744
2090
  self.latest_run_info.node_step_result.append(node_result)
1745
- logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1746
- node.execute_node(run_location=self.flow_settings.execution_location,
1747
- performance_mode=performance_mode,
1748
- node_logger=node_logger)
2091
+ logger.info(f"Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}")
2092
+ node.execute_node(
2093
+ run_location=self.flow_settings.execution_location,
2094
+ performance_mode=performance_mode,
2095
+ node_logger=node_logger,
2096
+ )
1749
2097
  try:
1750
2098
  node_result.error = str(node.results.errors)
1751
2099
  if self.flow_settings.is_canceled:
@@ -1758,22 +2106,22 @@ class FlowGraph:
1758
2106
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1759
2107
  node_result.is_running = False
1760
2108
  except Exception as e:
1761
- node_result.error = 'Node did not run'
2109
+ node_result.error = "Node did not run"
1762
2110
  node_result.success = False
1763
2111
  node_result.end_timestamp = time()
1764
2112
  node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1765
2113
  node_result.is_running = False
1766
- node_logger.error(f'Error in node {node.node_id}: {e}')
2114
+ node_logger.error(f"Error in node {node.node_id}: {e}")
1767
2115
  if not node_result.success:
1768
2116
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1769
- node_logger.info(f'Completed node with success: {node_result.success}')
2117
+ node_logger.info(f"Completed node with success: {node_result.success}")
1770
2118
  self.latest_run_info.nodes_completed += 1
1771
2119
  self.latest_run_info.end_time = datetime.datetime.now()
1772
- self.flow_logger.info('Flow completed!')
2120
+ self.flow_logger.info("Flow completed!")
1773
2121
  self.end_datetime = datetime.datetime.now()
1774
2122
  self.flow_settings.is_running = False
1775
2123
  if self.flow_settings.is_canceled:
1776
- self.flow_logger.info('Flow canceled')
2124
+ self.flow_logger.info("Flow canceled")
1777
2125
  return self.get_run_info()
1778
2126
  except Exception as e:
1779
2127
  raise e
@@ -1799,7 +2147,7 @@ class FlowGraph:
1799
2147
  return run_info
1800
2148
 
1801
2149
  @property
1802
- def node_connections(self) -> List[Tuple[int, int]]:
2150
+ def node_connections(self) -> list[tuple[int, int]]:
1803
2151
  """Computes and returns a list of all connections in the graph.
1804
2152
 
1805
2153
  Returns:
@@ -1809,8 +2157,9 @@ class FlowGraph:
1809
2157
  for node in self.nodes:
1810
2158
  outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
1811
2159
  incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
1812
- node_connections = [c for c in outgoing_connections + incoming_connections if (c[0] is not None
1813
- and c[1] is not None)]
2160
+ node_connections = [
2161
+ c for c in outgoing_connections + incoming_connections if (c[0] is not None and c[1] is not None)
2162
+ ]
1814
2163
  for node_connection in node_connections:
1815
2164
  if node_connection not in connections:
1816
2165
  connections.add(node_connection)
@@ -1871,16 +2220,18 @@ class FlowGraph:
1871
2220
  Returns:
1872
2221
  A FlowInformation object representing the complete graph.
1873
2222
  """
1874
- node_information = {node.node_id: node.get_node_information() for
1875
- node in self.nodes if node.is_setup and node.is_correct}
2223
+ node_information = {
2224
+ node.node_id: node.get_node_information() for node in self.nodes if node.is_setup and node.is_correct
2225
+ }
1876
2226
 
1877
- return schemas.FlowInformation(flow_id=self.flow_id,
1878
- flow_name=self.__name__,
1879
- flow_settings=self.flow_settings,
1880
- data=node_information,
1881
- node_starts=[v.node_id for v in self._flow_starts],
1882
- node_connections=self.node_connections
1883
- )
2227
+ return schemas.FlowInformation(
2228
+ flow_id=self.flow_id,
2229
+ flow_name=self.__name__,
2230
+ flow_settings=self.flow_settings,
2231
+ data=node_information,
2232
+ node_starts=[v.node_id for v in self._flow_starts],
2233
+ node_connections=self.node_connections,
2234
+ )
1884
2235
 
1885
2236
  def cancel(self):
1886
2237
  """Cancels an ongoing graph execution."""
@@ -1901,7 +2252,11 @@ class FlowGraph:
1901
2252
  """
1902
2253
  Handle the rename of a flow when it is being saved.
1903
2254
  """
1904
- if self.flow_settings and self.flow_settings.path and Path(self.flow_settings.path).absolute() != new_path.absolute():
2255
+ if (
2256
+ self.flow_settings
2257
+ and self.flow_settings.path
2258
+ and Path(self.flow_settings.path).absolute() != new_path.absolute()
2259
+ ):
1905
2260
  self.__name__ = new_name
1906
2261
  self.flow_settings.save_location = str(new_path.absolute())
1907
2262
  self.flow_settings.name = new_name
@@ -1928,27 +2283,27 @@ class FlowGraph:
1928
2283
  self._handle_flow_renaming(new_flow_name, path)
1929
2284
  self.flow_settings.modified_on = datetime.datetime.now().timestamp()
1930
2285
  try:
1931
- if suffix == '.flowfile':
2286
+ if suffix == ".flowfile":
1932
2287
  raise DeprecationWarning(
1933
- f"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
2288
+ "The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
1934
2289
  "Or stay on v0.4.1 if you still need .flowfile support.\n\n"
1935
2290
  )
1936
- elif suffix in ('.yaml', '.yml'):
2291
+ elif suffix in (".yaml", ".yml"):
1937
2292
  flowfile_data = self.get_flowfile_data()
1938
- data = flowfile_data.model_dump(mode='json')
1939
- with open(flow_path, 'w', encoding='utf-8') as f:
2293
+ data = flowfile_data.model_dump(mode="json")
2294
+ with open(flow_path, "w", encoding="utf-8") as f:
1940
2295
  yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1941
- elif suffix == '.json':
2296
+ elif suffix == ".json":
1942
2297
  flowfile_data = self.get_flowfile_data()
1943
- data = flowfile_data.model_dump(mode='json')
1944
- with open(flow_path, 'w', encoding='utf-8') as f:
2298
+ data = flowfile_data.model_dump(mode="json")
2299
+ with open(flow_path, "w", encoding="utf-8") as f:
1945
2300
  json.dump(data, f, indent=2, ensure_ascii=False)
1946
2301
 
1947
2302
  else:
1948
2303
  flowfile_data = self.get_flowfile_data()
1949
2304
  logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
1950
- data = flowfile_data.model_dump(mode='json')
1951
- with open(flow_path, 'w', encoding='utf-8') as f:
2305
+ data = flowfile_data.model_dump(mode="json")
2306
+ with open(flow_path, "w", encoding="utf-8") as f:
1952
2307
  yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1953
2308
 
1954
2309
  except Exception as e:
@@ -1966,11 +2321,7 @@ class FlowGraph:
1966
2321
  Returns:
1967
2322
  A dictionary representing the graph in Drawflow format.
1968
2323
  """
1969
- result = {
1970
- 'Home': {
1971
- "data": {}
1972
- }
1973
- }
2324
+ result = {"Home": {"data": {}}}
1974
2325
  flow_info: schemas.FlowInformation = self.get_node_storage()
1975
2326
 
1976
2327
  for node_id, node_info in flow_info.data.items():
@@ -1989,7 +2340,7 @@ class FlowGraph:
1989
2340
  "inputs": {},
1990
2341
  "outputs": {},
1991
2342
  "pos_x": pos_x,
1992
- "pos_y": pos_y
2343
+ "pos_y": pos_y,
1993
2344
  }
1994
2345
  except Exception as e:
1995
2346
  logger.error(e)
@@ -2003,24 +2354,27 @@ class FlowGraph:
2003
2354
  leading_to_node = self.get_node(output_node_id)
2004
2355
  input_types = leading_to_node.get_input_type(node_info.id)
2005
2356
  for input_type in input_types:
2006
- if input_type == 'main':
2007
- input_frontend_id = 'input_1'
2008
- elif input_type == 'right':
2009
- input_frontend_id = 'input_2'
2010
- elif input_type == 'left':
2011
- input_frontend_id = 'input_3'
2357
+ if input_type == "main":
2358
+ input_frontend_id = "input_1"
2359
+ elif input_type == "right":
2360
+ input_frontend_id = "input_2"
2361
+ elif input_type == "left":
2362
+ input_frontend_id = "input_3"
2012
2363
  else:
2013
- input_frontend_id = 'input_1'
2364
+ input_frontend_id = "input_1"
2014
2365
  connection = {"node": str(output_node_id), "input": input_frontend_id}
2015
2366
  connections.append(connection)
2016
2367
 
2017
- result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
2018
- "connections": connections}
2368
+ result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {"connections": connections}
2019
2369
  else:
2020
2370
  result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
2021
2371
 
2022
2372
  # Add input to the node based on `depending_on_id` in your backend data
2023
- if node_info.left_input_id is not None or node_info.right_input_id is not None or node_info.input_ids is not None:
2373
+ if (
2374
+ node_info.left_input_id is not None
2375
+ or node_info.right_input_id is not None
2376
+ or node_info.input_ids is not None
2377
+ ):
2024
2378
  main_inputs = node_info.main_input_ids
2025
2379
  result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
2026
2380
  "connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
@@ -2041,8 +2395,8 @@ class FlowGraph:
2041
2395
  Returns:
2042
2396
  A VueFlowInput object.
2043
2397
  """
2044
- edges: List[schemas.NodeEdge] = []
2045
- nodes: List[schemas.NodeInput] = []
2398
+ edges: list[schemas.NodeEdge] = []
2399
+ nodes: list[schemas.NodeInput] = []
2046
2400
  for node in self.nodes:
2047
2401
  nodes.append(node.get_node_input())
2048
2402
  edges.extend(node.get_edge_input())
@@ -2054,7 +2408,9 @@ class FlowGraph:
2054
2408
  for node in self.nodes:
2055
2409
  node.reset(True)
2056
2410
 
2057
- def copy_node(self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str) -> None:
2411
+ def copy_node(
2412
+ self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str
2413
+ ) -> None:
2058
2414
  """Creates a copy of an existing node.
2059
2415
 
2060
2416
  Args:
@@ -2067,9 +2423,7 @@ class FlowGraph:
2067
2423
  if isinstance(existing_setting_input, input_schema.NodePromise):
2068
2424
  return
2069
2425
 
2070
- combined_settings = combine_existing_settings_and_new_settings(
2071
- existing_setting_input, new_node_settings
2072
- )
2426
+ combined_settings = combine_existing_settings_and_new_settings(existing_setting_input, new_node_settings)
2073
2427
  getattr(self, f"add_{node_type}")(combined_settings)
2074
2428
 
2075
2429
  def generate_code(self):
@@ -2077,6 +2431,7 @@ class FlowGraph:
2077
2431
  This method exports the flow graph to a Polars-compatible format.
2078
2432
  """
2079
2433
  from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
2434
+
2080
2435
  print(export_flow_to_polars(self))
2081
2436
 
2082
2437
 
@@ -2095,13 +2450,7 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
2095
2450
  copied_setting_input = deepcopy(setting_input)
2096
2451
 
2097
2452
  # Update only attributes that exist on new_settings
2098
- fields_to_update = (
2099
- "node_id",
2100
- "pos_x",
2101
- "pos_y",
2102
- "description",
2103
- "flow_id"
2104
- )
2453
+ fields_to_update = ("node_id", "pos_x", "pos_y", "description", "flow_id")
2105
2454
 
2106
2455
  for field in fields_to_update:
2107
2456
  if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
@@ -2117,12 +2466,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
2117
2466
  flow: The FlowGraph instance to modify.
2118
2467
  node_connection: An object defining the source and target of the connection.
2119
2468
  """
2120
- logger.info('adding a connection')
2469
+ logger.info("adding a connection")
2121
2470
  from_node = flow.get_node(node_connection.output_connection.node_id)
2122
2471
  to_node = flow.get_node(node_connection.input_connection.node_id)
2123
- logger.info(f'from_node={from_node}, to_node={to_node}')
2472
+ logger.info(f"from_node={from_node}, to_node={to_node}")
2124
2473
  if not (from_node and to_node):
2125
- raise HTTPException(404, 'Not not available')
2474
+ raise HTTPException(404, "Not not available")
2126
2475
  else:
2127
2476
  to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
2128
2477