Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +178 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
  140. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
  143. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
  149. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. flowfile_core/__init__.py +13 -6
  154. flowfile_core/auth/jwt.py +51 -16
  155. flowfile_core/auth/models.py +32 -7
  156. flowfile_core/auth/password.py +89 -0
  157. flowfile_core/auth/secrets.py +8 -6
  158. flowfile_core/configs/__init__.py +9 -7
  159. flowfile_core/configs/flow_logger.py +15 -14
  160. flowfile_core/configs/node_store/__init__.py +72 -4
  161. flowfile_core/configs/node_store/nodes.py +155 -172
  162. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  163. flowfile_core/configs/settings.py +28 -15
  164. flowfile_core/database/connection.py +7 -6
  165. flowfile_core/database/init_db.py +96 -2
  166. flowfile_core/database/models.py +3 -1
  167. flowfile_core/fileExplorer/__init__.py +17 -0
  168. flowfile_core/fileExplorer/funcs.py +123 -57
  169. flowfile_core/fileExplorer/utils.py +10 -11
  170. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  171. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  172. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  173. flowfile_core/flowfile/analytics/utils.py +1 -1
  174. flowfile_core/flowfile/code_generator/code_generator.py +358 -244
  175. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  176. flowfile_core/flowfile/connection_manager/models.py +1 -1
  177. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  178. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/extensions.py +17 -12
  180. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  181. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  182. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
  183. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  184. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  188. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  189. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  190. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  191. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  192. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  193. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  194. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  195. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  196. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  197. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  199. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  200. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  201. flowfile_core/flowfile/flow_graph.py +918 -571
  202. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  203. flowfile_core/flowfile/flow_node/flow_node.py +330 -233
  204. flowfile_core/flowfile/flow_node/models.py +53 -41
  205. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  206. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  207. flowfile_core/flowfile/handler.py +80 -30
  208. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  209. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  210. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  211. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  212. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  213. flowfile_core/flowfile/node_designer/ui_components.py +135 -34
  214. flowfile_core/flowfile/schema_callbacks.py +71 -51
  215. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  216. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  217. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  218. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  219. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  220. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  221. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  222. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  223. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  224. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  225. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  226. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  227. flowfile_core/flowfile/util/node_skipper.py +4 -4
  228. flowfile_core/flowfile/utils.py +19 -21
  229. flowfile_core/main.py +26 -19
  230. flowfile_core/routes/auth.py +284 -11
  231. flowfile_core/routes/cloud_connections.py +25 -25
  232. flowfile_core/routes/logs.py +21 -29
  233. flowfile_core/routes/public.py +3 -3
  234. flowfile_core/routes/routes.py +70 -34
  235. flowfile_core/routes/secrets.py +25 -27
  236. flowfile_core/routes/user_defined_components.py +483 -4
  237. flowfile_core/run_lock.py +0 -1
  238. flowfile_core/schemas/__init__.py +4 -6
  239. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  240. flowfile_core/schemas/cloud_storage_schemas.py +59 -53
  241. flowfile_core/schemas/input_schema.py +231 -144
  242. flowfile_core/schemas/output_model.py +49 -34
  243. flowfile_core/schemas/schemas.py +116 -89
  244. flowfile_core/schemas/transform_schema.py +518 -263
  245. flowfile_core/schemas/yaml_types.py +21 -7
  246. flowfile_core/secret_manager/secret_manager.py +17 -13
  247. flowfile_core/types.py +29 -9
  248. flowfile_core/utils/arrow_reader.py +7 -6
  249. flowfile_core/utils/excel_file_manager.py +3 -3
  250. flowfile_core/utils/fileManager.py +7 -7
  251. flowfile_core/utils/fl_executor.py +8 -10
  252. flowfile_core/utils/utils.py +4 -4
  253. flowfile_core/utils/validate_setup.py +5 -4
  254. flowfile_frame/__init__.py +106 -51
  255. flowfile_frame/adapters.py +2 -9
  256. flowfile_frame/adding_expr.py +73 -32
  257. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  258. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  259. flowfile_frame/config.py +2 -5
  260. flowfile_frame/expr.py +311 -218
  261. flowfile_frame/expr.pyi +160 -159
  262. flowfile_frame/expr_name.py +23 -23
  263. flowfile_frame/flow_frame.py +571 -476
  264. flowfile_frame/flow_frame.pyi +123 -104
  265. flowfile_frame/flow_frame_methods.py +227 -246
  266. flowfile_frame/group_frame.py +50 -20
  267. flowfile_frame/join.py +2 -2
  268. flowfile_frame/lazy.py +129 -87
  269. flowfile_frame/lazy_methods.py +83 -30
  270. flowfile_frame/list_name_space.py +55 -50
  271. flowfile_frame/selectors.py +148 -68
  272. flowfile_frame/series.py +9 -7
  273. flowfile_frame/utils.py +19 -21
  274. flowfile_worker/__init__.py +12 -7
  275. flowfile_worker/configs.py +11 -19
  276. flowfile_worker/create/__init__.py +14 -9
  277. flowfile_worker/create/funcs.py +114 -77
  278. flowfile_worker/create/models.py +46 -43
  279. flowfile_worker/create/pl_types.py +14 -15
  280. flowfile_worker/create/read_excel_tables.py +34 -41
  281. flowfile_worker/create/utils.py +22 -19
  282. flowfile_worker/external_sources/s3_source/main.py +18 -51
  283. flowfile_worker/external_sources/s3_source/models.py +34 -27
  284. flowfile_worker/external_sources/sql_source/main.py +8 -5
  285. flowfile_worker/external_sources/sql_source/models.py +13 -9
  286. flowfile_worker/flow_logger.py +10 -8
  287. flowfile_worker/funcs.py +214 -155
  288. flowfile_worker/main.py +11 -17
  289. flowfile_worker/models.py +35 -28
  290. flowfile_worker/process_manager.py +2 -3
  291. flowfile_worker/routes.py +121 -90
  292. flowfile_worker/secrets.py +9 -6
  293. flowfile_worker/spawner.py +80 -49
  294. flowfile_worker/utils.py +3 -2
  295. shared/__init__.py +2 -7
  296. shared/storage_config.py +25 -13
  297. test_utils/postgres/commands.py +3 -2
  298. test_utils/postgres/fixtures.py +9 -9
  299. test_utils/s3/commands.py +1 -1
  300. test_utils/s3/data_generator.py +3 -4
  301. test_utils/s3/demo_data_generator.py +4 -7
  302. test_utils/s3/fixtures.py +7 -5
  303. tools/migrate/__init__.py +1 -1
  304. tools/migrate/__main__.py +16 -29
  305. tools/migrate/legacy_schemas.py +251 -190
  306. tools/migrate/migrate.py +193 -181
  307. tools/migrate/tests/conftest.py +1 -3
  308. tools/migrate/tests/test_migrate.py +36 -41
  309. tools/migrate/tests/test_migration_e2e.py +28 -29
  310. tools/migrate/tests/test_node_migrations.py +50 -20
  311. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  312. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  313. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  314. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  315. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  316. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  317. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  318. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  319. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  320. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  321. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  322. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  323. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  324. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  325. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  326. flowfile-0.5.1.dist-info/RECORD +0 -388
  327. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
  328. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
  329. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,13 @@
1
1
  # Standard library imports
2
- from base64 import decodebytes, encodebytes
3
2
  import io
4
3
  import threading
4
+ from base64 import decodebytes, encodebytes
5
5
  from time import sleep
6
- from typing import Any, List, Literal, Optional
6
+ from typing import Any, Literal
7
7
  from uuid import uuid4
8
8
 
9
9
  import polars as pl
10
10
  import requests
11
-
12
11
  from pl_fuzzy_frame_match.models import FuzzyMapping
13
12
 
14
13
  from flowfile_core.configs import logger
@@ -17,104 +16,131 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.models import
17
16
  FuzzyJoinInput,
18
17
  OperationType,
19
18
  PolarsOperation,
20
- Status
19
+ Status,
21
20
  )
22
- from flowfile_core.flowfile.sources.external_sources.sql_source.models import (DatabaseExternalReadSettings,
23
- DatabaseExternalWriteSettings)
24
- from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
25
- from flowfile_core.schemas.input_schema import (
26
- ReceivedTable
21
+ from flowfile_core.flowfile.sources.external_sources.sql_source.models import (
22
+ DatabaseExternalReadSettings,
23
+ DatabaseExternalWriteSettings,
27
24
  )
25
+ from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
26
+ from flowfile_core.schemas.input_schema import ReceivedTable
28
27
  from flowfile_core.utils.arrow_reader import read
29
28
 
30
29
 
31
- def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = 'store') -> Status:
30
+ def trigger_df_operation(
31
+ flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = "store"
32
+ ) -> Status:
32
33
  encoded_operation = encodebytes(lf.serialize()).decode()
33
- _json = {'task_id': file_ref, 'operation': encoded_operation, 'operation_type': operation_type,
34
- 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
35
- v = requests.post(url=f'{WORKER_URL}/submit_query/', json=_json)
34
+ _json = {
35
+ "task_id": file_ref,
36
+ "operation": encoded_operation,
37
+ "operation_type": operation_type,
38
+ "flowfile_flow_id": flow_id,
39
+ "flowfile_node_id": node_id,
40
+ }
41
+ v = requests.post(url=f"{WORKER_URL}/submit_query/", json=_json)
36
42
  if not v.ok:
37
- raise Exception(f'trigger_df_operation: Could not cache the data, {v.text}')
43
+ raise Exception(f"trigger_df_operation: Could not cache the data, {v.text}")
38
44
  return Status(**v.json())
39
45
 
40
46
 
41
- def trigger_sample_operation(lf: pl.LazyFrame, file_ref: str, flow_id: int, node_id: str | int, sample_size: int = 100) -> Status:
47
+ def trigger_sample_operation(
48
+ lf: pl.LazyFrame, file_ref: str, flow_id: int, node_id: str | int, sample_size: int = 100
49
+ ) -> Status:
42
50
  encoded_operation = encodebytes(lf.serialize()).decode()
43
- _json = {'task_id': file_ref, 'operation': encoded_operation, 'operation_type': 'store_sample',
44
- 'sample_size': sample_size, 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
45
- v = requests.post(url=f'{WORKER_URL}/store_sample/', json=_json)
51
+ _json = {
52
+ "task_id": file_ref,
53
+ "operation": encoded_operation,
54
+ "operation_type": "store_sample",
55
+ "sample_size": sample_size,
56
+ "flowfile_flow_id": flow_id,
57
+ "flowfile_node_id": node_id,
58
+ }
59
+ v = requests.post(url=f"{WORKER_URL}/store_sample/", json=_json)
46
60
  if not v.ok:
47
- raise Exception(f'trigger_sample_operation: Could not cache the data, {v.text}')
61
+ raise Exception(f"trigger_sample_operation: Could not cache the data, {v.text}")
48
62
  return Status(**v.json())
49
63
 
50
64
 
51
- def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
52
- fuzzy_maps: List[FuzzyMapping],
53
- file_ref: str,
54
- flow_id: int,
55
- node_id: int | str) -> Status:
65
+ def trigger_fuzzy_match_operation(
66
+ left_df: pl.LazyFrame,
67
+ right_df: pl.LazyFrame,
68
+ fuzzy_maps: list[FuzzyMapping],
69
+ file_ref: str,
70
+ flow_id: int,
71
+ node_id: int | str,
72
+ ) -> Status:
56
73
  left_serializable_object = PolarsOperation(operation=encodebytes(left_df.serialize()))
57
74
  right_serializable_object = PolarsOperation(operation=encodebytes(right_df.serialize()))
58
- fuzzy_join_input = FuzzyJoinInput(left_df_operation=left_serializable_object,
59
- right_df_operation=right_serializable_object,
60
- fuzzy_maps=fuzzy_maps,
61
- task_id=file_ref,
62
- flowfile_flow_id=flow_id,
63
- flowfile_node_id=node_id
64
- )
75
+ fuzzy_join_input = FuzzyJoinInput(
76
+ left_df_operation=left_serializable_object,
77
+ right_df_operation=right_serializable_object,
78
+ fuzzy_maps=fuzzy_maps,
79
+ task_id=file_ref,
80
+ flowfile_flow_id=flow_id,
81
+ flowfile_node_id=node_id,
82
+ )
65
83
  print("fuzzy join input", fuzzy_join_input)
66
- v = requests.post(f'{WORKER_URL}/add_fuzzy_join', data=fuzzy_join_input.model_dump_json())
84
+ v = requests.post(f"{WORKER_URL}/add_fuzzy_join", data=fuzzy_join_input.model_dump_json())
67
85
  if not v.ok:
68
- raise Exception(f'trigger_fuzzy_match_operation: Could not cache the data, {v.text}')
86
+ raise Exception(f"trigger_fuzzy_match_operation: Could not cache the data, {v.text}")
69
87
  return Status(**v.json())
70
88
 
71
89
 
72
- def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTable,
73
- file_type: str = Literal['csv', 'parquet', 'json', 'excel']):
74
- f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
75
- params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
90
+ def trigger_create_operation(
91
+ flow_id: int,
92
+ node_id: int | str,
93
+ received_table: ReceivedTable,
94
+ file_type: str = Literal["csv", "parquet", "json", "excel"],
95
+ ):
96
+ f = requests.post(
97
+ url=f"{WORKER_URL}/create_table/{file_type}",
98
+ data=received_table.model_dump_json(),
99
+ params={"flowfile_flow_id": flow_id, "flowfile_node_id": node_id},
100
+ )
76
101
  if not f.ok:
77
- raise Exception(f'trigger_create_operation: Could not cache the data, {f.text}')
102
+ raise Exception(f"trigger_create_operation: Could not cache the data, {f.text}")
78
103
  return Status(**f.json())
79
104
 
80
105
 
81
106
  def trigger_database_read_collector(database_external_read_settings: DatabaseExternalReadSettings):
82
- f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
83
- data=database_external_read_settings.model_dump_json())
107
+ f = requests.post(
108
+ url=f"{WORKER_URL}/store_database_read_result", data=database_external_read_settings.model_dump_json()
109
+ )
84
110
  if not f.ok:
85
- raise Exception(f'trigger_database_read_collector: Could not cache the data, {f.text}')
111
+ raise Exception(f"trigger_database_read_collector: Could not cache the data, {f.text}")
86
112
  return Status(**f.json())
87
113
 
88
114
 
89
115
  def trigger_database_write(database_external_write_settings: DatabaseExternalWriteSettings):
90
- f = requests.post(url=f'{WORKER_URL}/store_database_write_result',
91
- data=database_external_write_settings.model_dump_json())
116
+ f = requests.post(
117
+ url=f"{WORKER_URL}/store_database_write_result", data=database_external_write_settings.model_dump_json()
118
+ )
92
119
  if not f.ok:
93
- raise Exception(f'trigger_database_write: Could not cache the data, {f.text}')
120
+ raise Exception(f"trigger_database_write: Could not cache the data, {f.text}")
94
121
  return Status(**f.json())
95
122
 
96
123
 
97
124
  def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWriteSettingsWorkerInterface):
98
- f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
99
- data=database_external_write_settings.model_dump_json())
125
+ f = requests.post(url=f"{WORKER_URL}/write_data_to_cloud", data=database_external_write_settings.model_dump_json())
100
126
  if not f.ok:
101
- raise Exception(f'trigger_cloud_storage_write: Could not cache the data, {f.text}')
127
+ raise Exception(f"trigger_cloud_storage_write: Could not cache the data, {f.text}")
102
128
  return Status(**f.json())
103
129
 
104
130
 
105
131
  def get_results(file_ref: str) -> Status | None:
106
- f = requests.get(f'{WORKER_URL}/status/{file_ref}')
132
+ f = requests.get(f"{WORKER_URL}/status/{file_ref}")
107
133
  if f.status_code == 200:
108
134
  return Status(**f.json())
109
135
  else:
110
- raise Exception(f'get_results: Could not fetch the data, {f.text}')
136
+ raise Exception(f"get_results: Could not fetch the data, {f.text}")
111
137
 
112
138
 
113
139
  def results_exists(file_ref: str):
114
140
  try:
115
- f = requests.get(f'{WORKER_URL}/status/{file_ref}')
141
+ f = requests.get(f"{WORKER_URL}/status/{file_ref}")
116
142
  if f.status_code == 200:
117
- if f.json()['status'] == 'Completed':
143
+ if f.json()["status"] == "Completed":
118
144
  return True
119
145
  return False
120
146
  except requests.RequestException as e:
@@ -134,7 +160,7 @@ def clear_task_from_worker(file_ref: str) -> bool:
134
160
  bool: True if the task was successfully cleared, False otherwise.
135
161
  """
136
162
  try:
137
- f = requests.delete(f'{WORKER_URL}/clear_task/{file_ref}')
163
+ f = requests.delete(f"{WORKER_URL}/clear_task/{file_ref}")
138
164
  if f.status_code == 200:
139
165
  return True
140
166
  return False
@@ -150,16 +176,16 @@ def get_df_result(encoded_df: str) -> pl.LazyFrame:
150
176
 
151
177
  def get_external_df_result(file_ref: str) -> pl.LazyFrame | None:
152
178
  status = get_results(file_ref)
153
- if status.status != 'Completed':
179
+ if status.status != "Completed":
154
180
  raise Exception(f"Status is not completed, {status.status}")
155
- if status.result_type == 'polars':
181
+ if status.result_type == "polars":
156
182
  return get_df_result(status.results)
157
183
  else:
158
184
  raise Exception(f"Result type is not polars, {status.result_type}")
159
185
 
160
186
 
161
187
  def get_status(file_ref: str) -> Status:
162
- status_response = requests.get(f'{WORKER_URL}/status/{file_ref}')
188
+ status_response = requests.get(f"{WORKER_URL}/status/{file_ref}")
163
189
  if status_response.status_code == 200:
164
190
  return Status(**status_response.json())
165
191
  else:
@@ -180,12 +206,12 @@ def cancel_task(file_ref: str) -> bool:
180
206
  Exception: If there's an error communicating with the worker service
181
207
  """
182
208
  try:
183
- response = requests.post(f'{WORKER_URL}/cancel_task/{file_ref}')
209
+ response = requests.post(f"{WORKER_URL}/cancel_task/{file_ref}")
184
210
  if response.ok:
185
211
  return True
186
212
  return False
187
213
  except requests.RequestException as e:
188
- raise Exception(f'Failed to cancel task: {str(e)}')
214
+ raise Exception(f"Failed to cancel task: {str(e)}")
189
215
 
190
216
 
191
217
  class BaseFetcher:
@@ -203,15 +229,15 @@ class BaseFetcher:
203
229
  self._thread = None
204
230
 
205
231
  # State variables - use properties for thread-safe access
206
- self._result: Optional[Any] = None
232
+ self._result: Any | None = None
207
233
  self._started: bool = False
208
234
  self._running: bool = False
209
235
  self._error_code: int = 0
210
- self._error_description: Optional[str] = None
236
+ self._error_description: str | None = None
211
237
 
212
238
  # Public properties for compatibility with subclasses
213
239
  @property
214
- def result(self) -> Optional[Any]:
240
+ def result(self) -> Any | None:
215
241
  with self._lock:
216
242
  return self._result
217
243
 
@@ -240,7 +266,7 @@ class BaseFetcher:
240
266
  return self._error_code
241
267
 
242
268
  @property
243
- def error_description(self) -> Optional[str]:
269
+ def error_description(self) -> str | None:
244
270
  with self._lock:
245
271
  return self._error_description
246
272
 
@@ -259,22 +285,22 @@ class BaseFetcher:
259
285
  try:
260
286
  while not self._stop_event.is_set():
261
287
  try:
262
- r = requests.get(f'{WORKER_URL}/status/{self.file_ref}', timeout=10)
288
+ r = requests.get(f"{WORKER_URL}/status/{self.file_ref}", timeout=10)
263
289
 
264
290
  if r.status_code == 200:
265
291
  status = Status(**r.json())
266
292
 
267
- if status.status == 'Completed':
293
+ if status.status == "Completed":
268
294
  self._handle_completion(status)
269
295
  return
270
- elif status.status == 'Error':
296
+ elif status.status == "Error":
271
297
  self._handle_error(1, status.error_message)
272
298
  return
273
- elif status.status == 'Unknown Error':
299
+ elif status.status == "Unknown Error":
274
300
  self._handle_error(
275
301
  -1,
276
- 'There was an unknown error with the process, '
277
- 'and the process got killed by the server'
302
+ "There was an unknown error with the process, "
303
+ "and the process got killed by the server",
278
304
  )
279
305
  return
280
306
  else:
@@ -303,7 +329,7 @@ class BaseFetcher:
303
329
  """Handle successful completion. Must be called from fetch thread."""
304
330
  with self._condition:
305
331
  try:
306
- if status.result_type == 'polars':
332
+ if status.result_type == "polars":
307
333
  self._result = get_df_result(status.results)
308
334
  else:
309
335
  self._result = status.results
@@ -336,10 +362,10 @@ class BaseFetcher:
336
362
  """Start the background fetch thread."""
337
363
  with self._lock:
338
364
  if self._started:
339
- logger.info('Fetcher already started')
365
+ logger.info("Fetcher already started")
340
366
  return
341
367
  if self._running:
342
- logger.info('Already running the fetching')
368
+ logger.info("Already running the fetching")
343
369
  return
344
370
 
345
371
  self._running = True
@@ -350,13 +376,13 @@ class BaseFetcher:
350
376
  Cancels the current task both locally and on the worker service.
351
377
  Also cleans up any resources being used.
352
378
  """
353
- logger.warning('Cancelling the operation')
379
+ logger.warning("Cancelling the operation")
354
380
 
355
381
  # Cancel on the worker side
356
382
  try:
357
383
  cancel_task(self.file_ref)
358
384
  except Exception as e:
359
- logger.error(f'Failed to cancel task on worker: {str(e)}')
385
+ logger.error(f"Failed to cancel task on worker: {str(e)}")
360
386
 
361
387
  # Signal the thread to stop
362
388
  self._stop_event.set()
@@ -367,7 +393,7 @@ class BaseFetcher:
367
393
  if self._thread.is_alive():
368
394
  logger.warning("Fetch thread did not stop within timeout")
369
395
 
370
- def get_result(self) -> Optional[Any]:
396
+ def get_result(self) -> Any | None:
371
397
  """
372
398
  Get the result, blocking until it's available.
373
399
 
@@ -408,105 +434,141 @@ class BaseFetcher:
408
434
  return self._error_description is not None
409
435
 
410
436
  @property
411
- def error_info(self) -> tuple[int, Optional[str]]:
437
+ def error_info(self) -> tuple[int, str | None]:
412
438
  """Get error code and description."""
413
439
  with self._lock:
414
440
  return self._error_code, self._error_description
415
441
 
416
442
 
417
443
  class ExternalDfFetcher(BaseFetcher):
418
- status: Optional[Status] = None
419
-
420
- def __init__(self, flow_id: int, node_id: int | str, lf: pl.LazyFrame | pl.DataFrame, file_ref: str = None,
421
- wait_on_completion: bool = True,
422
- operation_type: OperationType = 'store', offload_to_worker: bool = True):
444
+ status: Status | None = None
445
+
446
+ def __init__(
447
+ self,
448
+ flow_id: int,
449
+ node_id: int | str,
450
+ lf: pl.LazyFrame | pl.DataFrame,
451
+ file_ref: str = None,
452
+ wait_on_completion: bool = True,
453
+ operation_type: OperationType = "store",
454
+ offload_to_worker: bool = True,
455
+ ):
423
456
  super().__init__(file_ref=file_ref)
424
457
  lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
425
- r = trigger_df_operation(lf=lf, file_ref=self.file_ref, operation_type=operation_type,
426
- node_id=node_id, flow_id=flow_id)
427
- self.running = r.status == 'Processing'
458
+ r = trigger_df_operation(
459
+ lf=lf, file_ref=self.file_ref, operation_type=operation_type, node_id=node_id, flow_id=flow_id
460
+ )
461
+ self.running = r.status == "Processing"
428
462
  if wait_on_completion:
429
463
  _ = self.get_result()
430
464
  self.status = get_status(self.file_ref)
431
465
 
432
466
 
433
467
  class ExternalSampler(BaseFetcher):
434
- status: Optional[Status] = None
435
-
436
- def __init__(self, lf: pl.LazyFrame | pl.DataFrame, node_id: str | int, flow_id: int, file_ref: str = None, wait_on_completion: bool = True,
437
- sample_size: int = 100):
468
+ status: Status | None = None
469
+
470
+ def __init__(
471
+ self,
472
+ lf: pl.LazyFrame | pl.DataFrame,
473
+ node_id: str | int,
474
+ flow_id: int,
475
+ file_ref: str = None,
476
+ wait_on_completion: bool = True,
477
+ sample_size: int = 100,
478
+ ):
438
479
  super().__init__(file_ref=file_ref)
439
480
  lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
440
- r = trigger_sample_operation(lf=lf, file_ref=file_ref, sample_size=sample_size, node_id=node_id, flow_id=flow_id)
441
- self.running = r.status == 'Processing'
481
+ r = trigger_sample_operation(
482
+ lf=lf, file_ref=file_ref, sample_size=sample_size, node_id=node_id, flow_id=flow_id
483
+ )
484
+ self.running = r.status == "Processing"
442
485
  if wait_on_completion:
443
486
  _ = self.get_result()
444
487
  self.status = get_status(self.file_ref)
445
488
 
446
489
 
447
490
  class ExternalFuzzyMatchFetcher(BaseFetcher):
448
- def __init__(self, left_df: pl.LazyFrame, right_df: pl.LazyFrame, fuzzy_maps: List[Any], flow_id: int,
449
- node_id: int | str,
450
- file_ref: str = None,
451
- wait_on_completion: bool = True):
491
+ def __init__(
492
+ self,
493
+ left_df: pl.LazyFrame,
494
+ right_df: pl.LazyFrame,
495
+ fuzzy_maps: list[Any],
496
+ flow_id: int,
497
+ node_id: int | str,
498
+ file_ref: str = None,
499
+ wait_on_completion: bool = True,
500
+ ):
452
501
  super().__init__(file_ref=file_ref)
453
502
 
454
- r = trigger_fuzzy_match_operation(left_df=left_df, right_df=right_df, fuzzy_maps=fuzzy_maps,
455
- file_ref=file_ref, flow_id=flow_id, node_id=node_id)
503
+ r = trigger_fuzzy_match_operation(
504
+ left_df=left_df,
505
+ right_df=right_df,
506
+ fuzzy_maps=fuzzy_maps,
507
+ file_ref=file_ref,
508
+ flow_id=flow_id,
509
+ node_id=node_id,
510
+ )
456
511
  self.file_ref = r.background_task_id
457
- self.running = r.status == 'Processing'
512
+ self.running = r.status == "Processing"
458
513
  if wait_on_completion:
459
514
  _ = self.get_result()
460
515
 
461
516
 
462
517
  class ExternalCreateFetcher(BaseFetcher):
463
- def __init__(self, received_table: ReceivedTable, node_id: int, flow_id: int,
464
- file_type: str = 'csv', wait_on_completion: bool = True):
465
- r = trigger_create_operation(received_table=received_table, file_type=file_type,
466
- node_id=node_id, flow_id=flow_id)
518
+ def __init__(
519
+ self,
520
+ received_table: ReceivedTable,
521
+ node_id: int,
522
+ flow_id: int,
523
+ file_type: str = "csv",
524
+ wait_on_completion: bool = True,
525
+ ):
526
+ r = trigger_create_operation(
527
+ received_table=received_table, file_type=file_type, node_id=node_id, flow_id=flow_id
528
+ )
467
529
  super().__init__(file_ref=r.background_task_id)
468
- self.running = r.status == 'Processing'
530
+ self.running = r.status == "Processing"
469
531
  if wait_on_completion:
470
532
  _ = self.get_result()
471
533
 
472
534
 
473
535
  class ExternalDatabaseFetcher(BaseFetcher):
474
- def __init__(self, database_external_read_settings: DatabaseExternalReadSettings,
475
- wait_on_completion: bool = True):
536
+ def __init__(self, database_external_read_settings: DatabaseExternalReadSettings, wait_on_completion: bool = True):
476
537
  r = trigger_database_read_collector(database_external_read_settings=database_external_read_settings)
477
538
  super().__init__(file_ref=r.background_task_id)
478
- self.running = r.status == 'Processing'
539
+ self.running = r.status == "Processing"
479
540
  if wait_on_completion:
480
541
  _ = self.get_result()
481
542
 
482
543
 
483
544
  class ExternalDatabaseWriter(BaseFetcher):
484
- def __init__(self, database_external_write_settings: DatabaseExternalWriteSettings,
485
- wait_on_completion: bool = True):
545
+ def __init__(
546
+ self, database_external_write_settings: DatabaseExternalWriteSettings, wait_on_completion: bool = True
547
+ ):
486
548
  r = trigger_database_write(database_external_write_settings=database_external_write_settings)
487
549
  super().__init__(file_ref=r.background_task_id)
488
- self.running = r.status == 'Processing'
550
+ self.running = r.status == "Processing"
489
551
  if wait_on_completion:
490
552
  _ = self.get_result()
491
553
 
492
554
 
493
555
  class ExternalCloudWriter(BaseFetcher):
494
-
495
- def __init__(self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface,
496
- wait_on_completion: bool = True):
556
+ def __init__(
557
+ self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface, wait_on_completion: bool = True
558
+ ):
497
559
  r = trigger_cloud_storage_write(database_external_write_settings=cloud_storage_write_settings)
498
560
  super().__init__(file_ref=r.background_task_id)
499
- self.running = r.status == 'Processing'
561
+ self.running = r.status == "Processing"
500
562
  if wait_on_completion:
501
563
  _ = self.get_result()
502
564
 
503
565
 
504
566
  class ExternalExecutorTracker:
505
- result: Optional[pl.LazyFrame]
567
+ result: pl.LazyFrame | None
506
568
  started: bool = False
507
569
  running: bool = False
508
570
  error_code: int = 0
509
- error_description: Optional[str] = None
571
+ error_description: str | None = None
510
572
  file_ref: str = None
511
573
 
512
574
  def __init__(self, initial_response: Status, wait_on_completion: bool = True):
@@ -515,7 +577,7 @@ class ExternalExecutorTracker:
515
577
  self.thread = threading.Thread(target=self._fetch_cached_df)
516
578
  self.result = None
517
579
  self.error_description = None
518
- self.running = initial_response.status == 'Processing'
580
+ self.running = initial_response.status == "Processing"
519
581
  self.condition = threading.Condition()
520
582
  if wait_on_completion:
521
583
  _ = self.get_result()
@@ -523,30 +585,32 @@ class ExternalExecutorTracker:
523
585
  def _fetch_cached_df(self):
524
586
  with self.condition:
525
587
  if self.running:
526
- logger.info('Already running the fetching')
588
+ logger.info("Already running the fetching")
527
589
  return
528
590
  sleep_time = 1
529
591
  self.running = True
530
592
  while not self.stop_event.is_set():
531
593
  try:
532
- r = requests.get(f'{WORKER_URL}/status/{self.file_ref}')
594
+ r = requests.get(f"{WORKER_URL}/status/{self.file_ref}")
533
595
  if r.status_code == 200:
534
596
  status = Status(**r.json())
535
- if status.status == 'Completed':
597
+ if status.status == "Completed":
536
598
  self.running = False
537
599
  self.condition.notify_all() # Notify all waiting threads
538
- if status.result_type == 'polars':
600
+ if status.result_type == "polars":
539
601
  self.result = get_df_result(status.results)
540
602
  else:
541
603
  self.result = status.results
542
604
  return
543
- elif status.status == 'Error':
605
+ elif status.status == "Error":
544
606
  self.error_code = 1
545
607
  self.error_description = status.error_message
546
608
  break
547
- elif status.status == 'Unknown Error':
609
+ elif status.status == "Unknown Error":
548
610
  self.error_code = -1
549
- self.error_description = 'There was an unknown error with the process, and the process got killed by the server'
611
+ self.error_description = (
612
+ "There was an unknown error with the process, and the process got killed by the server"
613
+ )
550
614
  break
551
615
  else:
552
616
  self.error_description = r.text
@@ -570,12 +634,12 @@ class ExternalExecutorTracker:
570
634
  def start(self):
571
635
  self.started = True
572
636
  if self.running:
573
- logger.info('Already running the fetching')
637
+ logger.info("Already running the fetching")
574
638
  return
575
639
  self.thread.start()
576
640
 
577
641
  def cancel(self):
578
- logger.warning('Cancelling the operation')
642
+ logger.warning("Cancelling the operation")
579
643
  self.thread.join()
580
644
 
581
645
  self.running = False
@@ -591,7 +655,7 @@ class ExternalExecutorTracker:
591
655
  return self.result
592
656
 
593
657
 
594
- def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
658
+ def fetch_unique_values(lf: pl.LazyFrame) -> list[str]:
595
659
  """
596
660
  Fetches unique values from a specified column in a LazyFrame, attempting first via an external fetcher
597
661
  and falling back to direct LazyFrame computation if that fails.
@@ -616,8 +680,7 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
616
680
  # Try external source first if lf is provided
617
681
  try:
618
682
  external_df_fetcher = ExternalDfFetcher(lf=lf, flow_id=1, node_id=-1)
619
- if external_df_fetcher.status.status == 'Completed':
620
-
683
+ if external_df_fetcher.status.status == "Completed":
621
684
  unique_values = read(external_df_fetcher.status.file_ref).column(0).to_pylist()
622
685
  if logger:
623
686
  logger.info(f"Got {len(unique_values)} unique values from external source")
@@ -626,10 +689,10 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
626
689
  if logger:
627
690
  logger.debug(f"Failed reading external file: {str(e)}")
628
691
 
629
- unique_values = (lf.unique().collect(engine="streaming")[:, 0].to_list())
692
+ unique_values = lf.unique().collect(engine="streaming")[:, 0].to_list()
630
693
 
631
694
  if not unique_values:
632
- raise ValueError(f"No unique values found in lazyframe")
695
+ raise ValueError("No unique values found in lazyframe")
633
696
 
634
697
  return unique_values
635
698
 
@@ -1,27 +1,27 @@
1
-
2
1
  import polars as pl
3
- from flowfile_core.utils.fl_executor import process_executor
2
+
4
3
  from flowfile_core.flowfile.flow_data_engine import utils
4
+ from flowfile_core.utils.fl_executor import process_executor
5
5
 
6
6
  # calculate_schema_threaded = process_executor(wait_on_completion=True, max_workers=1)(utils.calculate_schema)
7
7
  write_threaded = process_executor(False, max_workers=1)(utils.write_polars_frame)
8
8
  collect_threaded = process_executor(wait_on_completion=False, max_workers=1)(utils.collect)
9
9
  cache_polars_frame_to_temp_thread = process_executor(wait_on_completion=True, max_workers=1)(
10
- utils.cache_polars_frame_to_temp)
10
+ utils.cache_polars_frame_to_temp
11
+ )
11
12
 
12
13
 
13
14
  @process_executor(False, max_workers=1)
14
15
  def do_something_random():
15
- print('10 seconds')
16
+ print("10 seconds")
16
17
 
17
18
 
18
19
  # @process_executor(False, max_workers=1)
19
20
  def get_join_count(left: pl.LazyFrame, right: pl.LazyFrame, left_on_keys, right_on_keys, how):
20
21
  left_joined_df = left.group_by(left_on_keys).count()
21
22
  right_joined_df = right.group_by(right_on_keys).count()
22
- data: pl.LazyFrame = left_joined_df.join(right_joined_df, left_on=left_on_keys,
23
- right_on=right_on_keys, how=how)
24
- data = data.with_columns(pl.lit(1).alias('total').cast(pl.UInt64))
25
- result = data.select((pl.col('total') * pl.col('count') * pl.col('count_right'))).sum()
23
+ data: pl.LazyFrame = left_joined_df.join(right_joined_df, left_on=left_on_keys, right_on=right_on_keys, how=how)
24
+ data = data.with_columns(pl.lit(1).alias("total").cast(pl.UInt64))
25
+ result = data.select(pl.col("total") * pl.col("count") * pl.col("count_right")).sum()
26
26
  n_records = result.collect().to_series().to_list()[0]
27
27
  return n_records