Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +179 -73
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +52 -59
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
  36. flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
  37. flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
  79. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
  140. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
  143. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
  149. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
  154. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
  155. flowfile_core/__init__.py +13 -3
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +8 -6
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +123 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/code_generator.py +391 -279
  177. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  178. flowfile_core/flowfile/connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  180. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  181. flowfile_core/flowfile/extensions.py +17 -12
  182. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  183. flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
  184. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  190. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
  191. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  192. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  193. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
  194. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  195. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  196. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  197. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
  201. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  202. flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
  203. flowfile_core/flowfile/flow_graph.py +1011 -561
  204. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  205. flowfile_core/flowfile/flow_node/flow_node.py +332 -232
  206. flowfile_core/flowfile/flow_node/models.py +54 -41
  207. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  208. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  209. flowfile_core/flowfile/handler.py +82 -32
  210. flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
  211. flowfile_core/flowfile/manage/io_flowfile.py +391 -0
  212. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  213. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  214. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  215. flowfile_core/flowfile/node_designer/ui_components.py +136 -35
  216. flowfile_core/flowfile/schema_callbacks.py +77 -54
  217. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  218. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  219. flowfile_core/flowfile/setting_generator/settings.py +72 -55
  220. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  221. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  223. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  224. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  225. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  226. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  227. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  228. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  229. flowfile_core/flowfile/util/node_skipper.py +4 -4
  230. flowfile_core/flowfile/utils.py +19 -21
  231. flowfile_core/main.py +26 -19
  232. flowfile_core/routes/auth.py +284 -11
  233. flowfile_core/routes/cloud_connections.py +25 -25
  234. flowfile_core/routes/logs.py +21 -29
  235. flowfile_core/routes/public.py +3 -3
  236. flowfile_core/routes/routes.py +77 -43
  237. flowfile_core/routes/secrets.py +25 -27
  238. flowfile_core/routes/user_defined_components.py +483 -4
  239. flowfile_core/run_lock.py +0 -1
  240. flowfile_core/schemas/__init__.py +4 -6
  241. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  242. flowfile_core/schemas/cloud_storage_schemas.py +59 -55
  243. flowfile_core/schemas/input_schema.py +398 -154
  244. flowfile_core/schemas/output_model.py +50 -35
  245. flowfile_core/schemas/schemas.py +207 -67
  246. flowfile_core/schemas/transform_schema.py +1360 -435
  247. flowfile_core/schemas/yaml_types.py +117 -0
  248. flowfile_core/secret_manager/secret_manager.py +17 -13
  249. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
  250. flowfile_core/utils/arrow_reader.py +7 -6
  251. flowfile_core/utils/excel_file_manager.py +3 -3
  252. flowfile_core/utils/fileManager.py +7 -7
  253. flowfile_core/utils/fl_executor.py +8 -10
  254. flowfile_core/utils/utils.py +4 -4
  255. flowfile_core/utils/validate_setup.py +5 -4
  256. flowfile_frame/__init__.py +107 -50
  257. flowfile_frame/adapters.py +2 -9
  258. flowfile_frame/adding_expr.py +73 -32
  259. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  260. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  261. flowfile_frame/config.py +2 -5
  262. flowfile_frame/expr.py +311 -218
  263. flowfile_frame/expr.pyi +160 -159
  264. flowfile_frame/expr_name.py +23 -23
  265. flowfile_frame/flow_frame.py +581 -489
  266. flowfile_frame/flow_frame.pyi +123 -104
  267. flowfile_frame/flow_frame_methods.py +236 -252
  268. flowfile_frame/group_frame.py +50 -20
  269. flowfile_frame/join.py +2 -2
  270. flowfile_frame/lazy.py +129 -87
  271. flowfile_frame/lazy_methods.py +83 -30
  272. flowfile_frame/list_name_space.py +55 -50
  273. flowfile_frame/selectors.py +148 -68
  274. flowfile_frame/series.py +9 -7
  275. flowfile_frame/utils.py +19 -21
  276. flowfile_worker/__init__.py +12 -4
  277. flowfile_worker/configs.py +11 -19
  278. flowfile_worker/create/__init__.py +14 -27
  279. flowfile_worker/create/funcs.py +143 -94
  280. flowfile_worker/create/models.py +139 -68
  281. flowfile_worker/create/pl_types.py +14 -15
  282. flowfile_worker/create/read_excel_tables.py +34 -41
  283. flowfile_worker/create/utils.py +22 -19
  284. flowfile_worker/external_sources/s3_source/main.py +18 -51
  285. flowfile_worker/external_sources/s3_source/models.py +34 -27
  286. flowfile_worker/external_sources/sql_source/main.py +8 -5
  287. flowfile_worker/external_sources/sql_source/models.py +13 -9
  288. flowfile_worker/flow_logger.py +10 -8
  289. flowfile_worker/funcs.py +214 -155
  290. flowfile_worker/main.py +11 -17
  291. flowfile_worker/models.py +35 -28
  292. flowfile_worker/process_manager.py +2 -3
  293. flowfile_worker/routes.py +121 -93
  294. flowfile_worker/secrets.py +9 -6
  295. flowfile_worker/spawner.py +80 -49
  296. flowfile_worker/utils.py +3 -2
  297. shared/__init__.py +2 -7
  298. shared/storage_config.py +25 -13
  299. test_utils/postgres/commands.py +3 -2
  300. test_utils/postgres/fixtures.py +9 -9
  301. test_utils/s3/commands.py +1 -1
  302. test_utils/s3/data_generator.py +3 -4
  303. test_utils/s3/demo_data_generator.py +4 -7
  304. test_utils/s3/fixtures.py +7 -5
  305. tools/migrate/README.md +56 -0
  306. tools/migrate/__init__.py +12 -0
  307. tools/migrate/__main__.py +118 -0
  308. tools/migrate/legacy_schemas.py +682 -0
  309. tools/migrate/migrate.py +610 -0
  310. tools/migrate/tests/__init__.py +0 -0
  311. tools/migrate/tests/conftest.py +21 -0
  312. tools/migrate/tests/test_migrate.py +622 -0
  313. tools/migrate/tests/test_migration_e2e.py +1009 -0
  314. tools/migrate/tests/test_node_migrations.py +843 -0
  315. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  316. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  317. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  318. flowfile/web/static/assets/Filter-812dcbca.js +0 -164
  319. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  320. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  321. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  322. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  323. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  324. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  325. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  326. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  327. flowfile/web/static/assets/secretApi-538058f3.js +0 -46
  328. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  329. flowfile-0.4.1.dist-info/RECORD +0 -376
  330. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  331. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
  332. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,14 +1,13 @@
1
1
  # Standard library imports
2
- from base64 import decodebytes, encodebytes
3
2
  import io
4
3
  import threading
4
+ from base64 import decodebytes, encodebytes
5
5
  from time import sleep
6
- from typing import Any, List, Literal, Optional
6
+ from typing import Any, Literal
7
7
  from uuid import uuid4
8
8
 
9
9
  import polars as pl
10
10
  import requests
11
-
12
11
  from pl_fuzzy_frame_match.models import FuzzyMapping
13
12
 
14
13
  from flowfile_core.configs import logger
@@ -17,109 +16,131 @@ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.models import
17
16
  FuzzyJoinInput,
18
17
  OperationType,
19
18
  PolarsOperation,
20
- Status
19
+ Status,
21
20
  )
22
- from flowfile_core.flowfile.sources.external_sources.sql_source.models import (DatabaseExternalReadSettings,
23
- DatabaseExternalWriteSettings)
24
- from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
25
- from flowfile_core.schemas.input_schema import (
26
- ReceivedCsvTable,
27
- ReceivedExcelTable,
28
- ReceivedJsonTable,
29
- ReceivedParquetTable
21
+ from flowfile_core.flowfile.sources.external_sources.sql_source.models import (
22
+ DatabaseExternalReadSettings,
23
+ DatabaseExternalWriteSettings,
30
24
  )
25
+ from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
26
+ from flowfile_core.schemas.input_schema import ReceivedTable
31
27
  from flowfile_core.utils.arrow_reader import read
32
28
 
33
- ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
34
29
 
35
-
36
- def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = 'store') -> Status:
30
+ def trigger_df_operation(
31
+ flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = "store"
32
+ ) -> Status:
37
33
  encoded_operation = encodebytes(lf.serialize()).decode()
38
- _json = {'task_id': file_ref, 'operation': encoded_operation, 'operation_type': operation_type,
39
- 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
40
- v = requests.post(url=f'{WORKER_URL}/submit_query/', json=_json)
34
+ _json = {
35
+ "task_id": file_ref,
36
+ "operation": encoded_operation,
37
+ "operation_type": operation_type,
38
+ "flowfile_flow_id": flow_id,
39
+ "flowfile_node_id": node_id,
40
+ }
41
+ v = requests.post(url=f"{WORKER_URL}/submit_query/", json=_json)
41
42
  if not v.ok:
42
- raise Exception(f'trigger_df_operation: Could not cache the data, {v.text}')
43
+ raise Exception(f"trigger_df_operation: Could not cache the data, {v.text}")
43
44
  return Status(**v.json())
44
45
 
45
46
 
46
- def trigger_sample_operation(lf: pl.LazyFrame, file_ref: str, flow_id: int, node_id: str | int, sample_size: int = 100) -> Status:
47
+ def trigger_sample_operation(
48
+ lf: pl.LazyFrame, file_ref: str, flow_id: int, node_id: str | int, sample_size: int = 100
49
+ ) -> Status:
47
50
  encoded_operation = encodebytes(lf.serialize()).decode()
48
- _json = {'task_id': file_ref, 'operation': encoded_operation, 'operation_type': 'store_sample',
49
- 'sample_size': sample_size, 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
50
- v = requests.post(url=f'{WORKER_URL}/store_sample/', json=_json)
51
+ _json = {
52
+ "task_id": file_ref,
53
+ "operation": encoded_operation,
54
+ "operation_type": "store_sample",
55
+ "sample_size": sample_size,
56
+ "flowfile_flow_id": flow_id,
57
+ "flowfile_node_id": node_id,
58
+ }
59
+ v = requests.post(url=f"{WORKER_URL}/store_sample/", json=_json)
51
60
  if not v.ok:
52
- raise Exception(f'trigger_sample_operation: Could not cache the data, {v.text}')
61
+ raise Exception(f"trigger_sample_operation: Could not cache the data, {v.text}")
53
62
  return Status(**v.json())
54
63
 
55
64
 
56
- def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
57
- fuzzy_maps: List[FuzzyMapping],
58
- file_ref: str,
59
- flow_id: int,
60
- node_id: int | str) -> Status:
65
+ def trigger_fuzzy_match_operation(
66
+ left_df: pl.LazyFrame,
67
+ right_df: pl.LazyFrame,
68
+ fuzzy_maps: list[FuzzyMapping],
69
+ file_ref: str,
70
+ flow_id: int,
71
+ node_id: int | str,
72
+ ) -> Status:
61
73
  left_serializable_object = PolarsOperation(operation=encodebytes(left_df.serialize()))
62
74
  right_serializable_object = PolarsOperation(operation=encodebytes(right_df.serialize()))
63
- fuzzy_join_input = FuzzyJoinInput(left_df_operation=left_serializable_object,
64
- right_df_operation=right_serializable_object,
65
- fuzzy_maps=fuzzy_maps,
66
- task_id=file_ref,
67
- flowfile_flow_id=flow_id,
68
- flowfile_node_id=node_id
69
- )
75
+ fuzzy_join_input = FuzzyJoinInput(
76
+ left_df_operation=left_serializable_object,
77
+ right_df_operation=right_serializable_object,
78
+ fuzzy_maps=fuzzy_maps,
79
+ task_id=file_ref,
80
+ flowfile_flow_id=flow_id,
81
+ flowfile_node_id=node_id,
82
+ )
70
83
  print("fuzzy join input", fuzzy_join_input)
71
- v = requests.post(f'{WORKER_URL}/add_fuzzy_join', data=fuzzy_join_input.model_dump_json())
84
+ v = requests.post(f"{WORKER_URL}/add_fuzzy_join", data=fuzzy_join_input.model_dump_json())
72
85
  if not v.ok:
73
- raise Exception(f'trigger_fuzzy_match_operation: Could not cache the data, {v.text}')
86
+ raise Exception(f"trigger_fuzzy_match_operation: Could not cache the data, {v.text}")
74
87
  return Status(**v.json())
75
88
 
76
89
 
77
- def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTableCollection,
78
- file_type: str = Literal['csv', 'parquet', 'json', 'excel']):
79
- f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
80
- params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
90
+ def trigger_create_operation(
91
+ flow_id: int,
92
+ node_id: int | str,
93
+ received_table: ReceivedTable,
94
+ file_type: str = Literal["csv", "parquet", "json", "excel"],
95
+ ):
96
+ f = requests.post(
97
+ url=f"{WORKER_URL}/create_table/{file_type}",
98
+ data=received_table.model_dump_json(),
99
+ params={"flowfile_flow_id": flow_id, "flowfile_node_id": node_id},
100
+ )
81
101
  if not f.ok:
82
- raise Exception(f'trigger_create_operation: Could not cache the data, {f.text}')
102
+ raise Exception(f"trigger_create_operation: Could not cache the data, {f.text}")
83
103
  return Status(**f.json())
84
104
 
85
105
 
86
106
  def trigger_database_read_collector(database_external_read_settings: DatabaseExternalReadSettings):
87
- f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
88
- data=database_external_read_settings.model_dump_json())
107
+ f = requests.post(
108
+ url=f"{WORKER_URL}/store_database_read_result", data=database_external_read_settings.model_dump_json()
109
+ )
89
110
  if not f.ok:
90
- raise Exception(f'trigger_database_read_collector: Could not cache the data, {f.text}')
111
+ raise Exception(f"trigger_database_read_collector: Could not cache the data, {f.text}")
91
112
  return Status(**f.json())
92
113
 
93
114
 
94
115
  def trigger_database_write(database_external_write_settings: DatabaseExternalWriteSettings):
95
- f = requests.post(url=f'{WORKER_URL}/store_database_write_result',
96
- data=database_external_write_settings.model_dump_json())
116
+ f = requests.post(
117
+ url=f"{WORKER_URL}/store_database_write_result", data=database_external_write_settings.model_dump_json()
118
+ )
97
119
  if not f.ok:
98
- raise Exception(f'trigger_database_write: Could not cache the data, {f.text}')
120
+ raise Exception(f"trigger_database_write: Could not cache the data, {f.text}")
99
121
  return Status(**f.json())
100
122
 
101
123
 
102
124
  def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWriteSettingsWorkerInterface):
103
- f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
104
- data=database_external_write_settings.model_dump_json())
125
+ f = requests.post(url=f"{WORKER_URL}/write_data_to_cloud", data=database_external_write_settings.model_dump_json())
105
126
  if not f.ok:
106
- raise Exception(f'trigger_cloud_storage_write: Could not cache the data, {f.text}')
127
+ raise Exception(f"trigger_cloud_storage_write: Could not cache the data, {f.text}")
107
128
  return Status(**f.json())
108
129
 
109
130
 
110
131
  def get_results(file_ref: str) -> Status | None:
111
- f = requests.get(f'{WORKER_URL}/status/{file_ref}')
132
+ f = requests.get(f"{WORKER_URL}/status/{file_ref}")
112
133
  if f.status_code == 200:
113
134
  return Status(**f.json())
114
135
  else:
115
- raise Exception(f'get_results: Could not fetch the data, {f.text}')
136
+ raise Exception(f"get_results: Could not fetch the data, {f.text}")
116
137
 
117
138
 
118
139
  def results_exists(file_ref: str):
119
140
  try:
120
- f = requests.get(f'{WORKER_URL}/status/{file_ref}')
141
+ f = requests.get(f"{WORKER_URL}/status/{file_ref}")
121
142
  if f.status_code == 200:
122
- if f.json()['status'] == 'Completed':
143
+ if f.json()["status"] == "Completed":
123
144
  return True
124
145
  return False
125
146
  except requests.RequestException as e:
@@ -139,7 +160,7 @@ def clear_task_from_worker(file_ref: str) -> bool:
139
160
  bool: True if the task was successfully cleared, False otherwise.
140
161
  """
141
162
  try:
142
- f = requests.delete(f'{WORKER_URL}/clear_task/{file_ref}')
163
+ f = requests.delete(f"{WORKER_URL}/clear_task/{file_ref}")
143
164
  if f.status_code == 200:
144
165
  return True
145
166
  return False
@@ -155,16 +176,16 @@ def get_df_result(encoded_df: str) -> pl.LazyFrame:
155
176
 
156
177
  def get_external_df_result(file_ref: str) -> pl.LazyFrame | None:
157
178
  status = get_results(file_ref)
158
- if status.status != 'Completed':
179
+ if status.status != "Completed":
159
180
  raise Exception(f"Status is not completed, {status.status}")
160
- if status.result_type == 'polars':
181
+ if status.result_type == "polars":
161
182
  return get_df_result(status.results)
162
183
  else:
163
184
  raise Exception(f"Result type is not polars, {status.result_type}")
164
185
 
165
186
 
166
187
  def get_status(file_ref: str) -> Status:
167
- status_response = requests.get(f'{WORKER_URL}/status/{file_ref}')
188
+ status_response = requests.get(f"{WORKER_URL}/status/{file_ref}")
168
189
  if status_response.status_code == 200:
169
190
  return Status(**status_response.json())
170
191
  else:
@@ -185,222 +206,369 @@ def cancel_task(file_ref: str) -> bool:
185
206
  Exception: If there's an error communicating with the worker service
186
207
  """
187
208
  try:
188
- response = requests.post(f'{WORKER_URL}/cancel_task/{file_ref}')
209
+ response = requests.post(f"{WORKER_URL}/cancel_task/{file_ref}")
189
210
  if response.ok:
190
211
  return True
191
212
  return False
192
213
  except requests.RequestException as e:
193
- raise Exception(f'Failed to cancel task: {str(e)}')
214
+ raise Exception(f"Failed to cancel task: {str(e)}")
194
215
 
195
216
 
196
217
  class BaseFetcher:
197
- result: Optional[Any] = None
198
- started: bool = False
199
- running: bool = False
200
- error_code: int = 0
201
- error_description: Optional[str] = None
202
- file_ref: Optional[str] = None
218
+ """
219
+ Thread-safe fetcher for polling worker status and retrieving results.
220
+ """
203
221
 
204
222
  def __init__(self, file_ref: str = None):
205
223
  self.file_ref = file_ref if file_ref else str(uuid4())
206
- self.stop_event = threading.Event()
207
- self.thread = threading.Thread(target=self._fetch_cached_df)
208
- self.result = None
209
- self.error_description = None
210
- self.running = False
211
- self.started = False
212
- self.condition = threading.Condition()
213
- self.error_code = 0
224
+
225
+ # Thread synchronization
226
+ self._lock = threading.Lock()
227
+ self._condition = threading.Condition(self._lock)
228
+ self._stop_event = threading.Event()
229
+ self._thread = None
230
+
231
+ # State variables - use properties for thread-safe access
232
+ self._result: Any | None = None
233
+ self._started: bool = False
234
+ self._running: bool = False
235
+ self._error_code: int = 0
236
+ self._error_description: str | None = None
237
+
238
+ # Public properties for compatibility with subclasses
239
+ @property
240
+ def result(self) -> Any | None:
241
+ with self._lock:
242
+ return self._result
243
+
244
+ @property
245
+ def started(self) -> bool:
246
+ with self._lock:
247
+ return self._started
248
+
249
+ @property
250
+ def running(self) -> bool:
251
+ with self._lock:
252
+ return self._running
253
+
254
+ @running.setter
255
+ def running(self, value: bool):
256
+ """Allow subclasses to set running status and auto-start if needed."""
257
+ with self._lock:
258
+ self._running = value
259
+ # If subclass sets running=True, auto-start the thread
260
+ if value and not self._started:
261
+ self._start_thread()
262
+
263
+ @property
264
+ def error_code(self) -> int:
265
+ with self._lock:
266
+ return self._error_code
267
+
268
+ @property
269
+ def error_description(self) -> str | None:
270
+ with self._lock:
271
+ return self._error_description
272
+
273
+ def _start_thread(self):
274
+ """Internal method to start thread (must be called under lock)."""
275
+ if not self._started:
276
+ self._thread = threading.Thread(target=self._fetch_cached_df, daemon=True)
277
+ self._thread.start()
278
+ self._started = True
214
279
 
215
280
  def _fetch_cached_df(self):
216
- with self.condition:
217
- if self.running:
218
- logger.info('Already running the fetching')
219
- return
281
+ """Background thread that polls for results."""
282
+ sleep_time = 0.5
220
283
 
221
- sleep_time = .5
222
- self.running = True
223
- while not self.stop_event.is_set():
284
+ # Don't check _running here - subclasses already set it
285
+ try:
286
+ while not self._stop_event.is_set():
224
287
  try:
225
- r = requests.get(f'{WORKER_URL}/status/{self.file_ref}')
288
+ r = requests.get(f"{WORKER_URL}/status/{self.file_ref}", timeout=10)
289
+
226
290
  if r.status_code == 200:
227
291
  status = Status(**r.json())
228
- if status.status == 'Completed':
292
+
293
+ if status.status == "Completed":
229
294
  self._handle_completion(status)
230
295
  return
231
- elif status.status == 'Error':
296
+ elif status.status == "Error":
232
297
  self._handle_error(1, status.error_message)
233
- break
234
- elif status.status == 'Unknown Error':
235
- self._handle_error(-1,
236
- 'There was an unknown error with the process, '
237
- 'and the process got killed by the server')
238
- break
298
+ return
299
+ elif status.status == "Unknown Error":
300
+ self._handle_error(
301
+ -1,
302
+ "There was an unknown error with the process, "
303
+ "and the process got killed by the server",
304
+ )
305
+ return
239
306
  else:
240
- self._handle_error(2, r.text)
241
- break
307
+ self._handle_error(2, f"HTTP {r.status_code}: {r.text}")
308
+ return
309
+
242
310
  except requests.RequestException as e:
243
311
  self._handle_error(2, f"Request failed: {e}")
244
- break
312
+ return
245
313
 
246
- sleep(sleep_time)
314
+ # Sleep without holding the lock
315
+ if not self._stop_event.wait(timeout=sleep_time):
316
+ continue
317
+ else:
318
+ break
247
319
 
320
+ # Only reached if stop_event was set
248
321
  self._handle_cancellation()
249
322
 
323
+ except Exception as e:
324
+ # Catch any unexpected errors
325
+ logger.exception("Unexpected error in fetch thread")
326
+ self._handle_error(-1, f"Unexpected error: {e}")
327
+
250
328
  def _handle_completion(self, status):
251
- self.running = False
252
- self.condition.notify_all()
253
- if status.result_type == 'polars':
254
- self.result = get_df_result(status.results)
255
- else:
256
- self.result = status.results
257
-
258
- def _handle_error(self, code, description):
259
- self.error_code = code
260
- self.error_description = description
261
- self.running = False
262
- self.condition.notify_all()
329
+ """Handle successful completion. Must be called from fetch thread."""
330
+ with self._condition:
331
+ try:
332
+ if status.result_type == "polars":
333
+ self._result = get_df_result(status.results)
334
+ else:
335
+ self._result = status.results
336
+ except Exception as e:
337
+ logger.exception("Error processing result")
338
+ self._error_code = -1
339
+ self._error_description = f"Error processing result: {e}"
340
+ finally:
341
+ self._running = False
342
+ self._condition.notify_all()
343
+
344
+ def _handle_error(self, code: int, description: str):
345
+ """Handle error state. Must be called from fetch thread."""
346
+ with self._condition:
347
+ self._error_code = code
348
+ self._error_description = description
349
+ self._running = False
350
+ self._condition.notify_all()
263
351
 
264
352
  def _handle_cancellation(self):
265
- logger.warning("Fetch operation cancelled")
266
- if self.error_description is not None:
267
- logger.warning(self.error_description)
268
- self.running = False
269
- self.condition.notify_all()
353
+ """Handle cancellation. Must be called from fetch thread."""
354
+ with self._condition:
355
+ if self._error_description is None:
356
+ self._error_description = "Task cancelled"
357
+ logger.warning(f"Fetch operation cancelled: {self._error_description}")
358
+ self._running = False
359
+ self._condition.notify_all()
270
360
 
271
361
  def start(self):
272
- if self.running:
273
- logger.info('Already running the fetching')
274
- return
275
- if not self.started:
276
- self.thread.start()
277
- self.started = True
362
+ """Start the background fetch thread."""
363
+ with self._lock:
364
+ if self._started:
365
+ logger.info("Fetcher already started")
366
+ return
367
+ if self._running:
368
+ logger.info("Already running the fetching")
369
+ return
370
+
371
+ self._running = True
372
+ self._start_thread()
278
373
 
279
374
  def cancel(self):
280
375
  """
281
376
  Cancels the current task both locally and on the worker service.
282
377
  Also cleans up any resources being used.
283
378
  """
284
- logger.warning('Cancelling the operation')
379
+ logger.warning("Cancelling the operation")
380
+
381
+ # Cancel on the worker side
285
382
  try:
286
383
  cancel_task(self.file_ref)
287
384
  except Exception as e:
288
- logger.error(f'Failed to cancel task on worker: {str(e)}')
385
+ logger.error(f"Failed to cancel task on worker: {str(e)}")
289
386
 
290
- # Then stop the local monitoring thread
291
- self.stop_event.set()
292
- self.thread.join()
387
+ # Signal the thread to stop
388
+ self._stop_event.set()
293
389
 
294
- # Update local state
295
- with self.condition:
296
- self.running = False
297
- self.error_description = "Task cancelled by user"
298
- self.condition.notify_all()
390
+ # Wait for thread to finish
391
+ if self._thread and self._thread.is_alive():
392
+ self._thread.join(timeout=5.0)
393
+ if self._thread.is_alive():
394
+ logger.warning("Fetch thread did not stop within timeout")
299
395
 
300
- def get_result(self) -> Optional[Any]:
301
- if not self.started:
302
- self.start()
303
- with self.condition:
304
- while self.running and self.result is None:
305
- self.condition.wait() # Wait until notified
306
- if self.error_description is not None:
307
- raise Exception(self.error_description)
308
- return self.result
396
+ def get_result(self) -> Any | None:
397
+ """
398
+ Get the result, blocking until it's available.
309
399
 
400
+ Returns:
401
+ The fetched result.
402
+
403
+ Raises:
404
+ Exception: If an error occurred during fetching.
405
+ """
406
+ # Start if not already started (for manual usage)
407
+ with self._lock:
408
+ if not self._started:
409
+ if not self._running:
410
+ self._running = True
411
+ self._start_thread()
412
+
413
+ # Wait for completion
414
+ with self._condition:
415
+ while self._running:
416
+ self._condition.wait()
417
+
418
+ # Check for errors
419
+ with self._lock:
420
+ if self._error_description is not None:
421
+ raise Exception(self._error_description)
422
+ return self._result
423
+
424
+ @property
425
+ def is_running(self) -> bool:
426
+ """Check if the fetcher is currently running."""
427
+ with self._lock:
428
+ return self._running
429
+
430
+ @property
431
+ def has_error(self) -> bool:
432
+ """Check if the fetcher encountered an error."""
433
+ with self._lock:
434
+ return self._error_description is not None
435
+
436
+ @property
437
+ def error_info(self) -> tuple[int, str | None]:
438
+ """Get error code and description."""
439
+ with self._lock:
440
+ return self._error_code, self._error_description
310
441
 
311
- class ExternalDfFetcher(BaseFetcher):
312
- status: Optional[Status] = None
313
442
 
314
- def __init__(self, flow_id: int, node_id: int | str, lf: pl.LazyFrame | pl.DataFrame, file_ref: str = None,
315
- wait_on_completion: bool = True,
316
- operation_type: OperationType = 'store', offload_to_worker: bool = True):
443
+ class ExternalDfFetcher(BaseFetcher):
444
+ status: Status | None = None
445
+
446
+ def __init__(
447
+ self,
448
+ flow_id: int,
449
+ node_id: int | str,
450
+ lf: pl.LazyFrame | pl.DataFrame,
451
+ file_ref: str = None,
452
+ wait_on_completion: bool = True,
453
+ operation_type: OperationType = "store",
454
+ offload_to_worker: bool = True,
455
+ ):
317
456
  super().__init__(file_ref=file_ref)
318
457
  lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
319
- r = trigger_df_operation(lf=lf, file_ref=self.file_ref, operation_type=operation_type,
320
- node_id=node_id, flow_id=flow_id)
321
- self.running = r.status == 'Processing'
458
+ r = trigger_df_operation(
459
+ lf=lf, file_ref=self.file_ref, operation_type=operation_type, node_id=node_id, flow_id=flow_id
460
+ )
461
+ self.running = r.status == "Processing"
322
462
  if wait_on_completion:
323
463
  _ = self.get_result()
324
464
  self.status = get_status(self.file_ref)
325
465
 
326
466
 
327
467
  class ExternalSampler(BaseFetcher):
328
- status: Optional[Status] = None
329
-
330
- def __init__(self, lf: pl.LazyFrame | pl.DataFrame, node_id: str | int, flow_id: int, file_ref: str = None, wait_on_completion: bool = True,
331
- sample_size: int = 100):
468
+ status: Status | None = None
469
+
470
+ def __init__(
471
+ self,
472
+ lf: pl.LazyFrame | pl.DataFrame,
473
+ node_id: str | int,
474
+ flow_id: int,
475
+ file_ref: str = None,
476
+ wait_on_completion: bool = True,
477
+ sample_size: int = 100,
478
+ ):
332
479
  super().__init__(file_ref=file_ref)
333
480
  lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
334
- r = trigger_sample_operation(lf=lf, file_ref=file_ref, sample_size=sample_size, node_id=node_id, flow_id=flow_id)
335
- self.running = r.status == 'Processing'
481
+ r = trigger_sample_operation(
482
+ lf=lf, file_ref=file_ref, sample_size=sample_size, node_id=node_id, flow_id=flow_id
483
+ )
484
+ self.running = r.status == "Processing"
336
485
  if wait_on_completion:
337
486
  _ = self.get_result()
338
487
  self.status = get_status(self.file_ref)
339
488
 
340
489
 
341
490
  class ExternalFuzzyMatchFetcher(BaseFetcher):
342
- def __init__(self, left_df: pl.LazyFrame, right_df: pl.LazyFrame, fuzzy_maps: List[Any], flow_id: int,
343
- node_id: int | str,
344
- file_ref: str = None,
345
- wait_on_completion: bool = True):
491
+ def __init__(
492
+ self,
493
+ left_df: pl.LazyFrame,
494
+ right_df: pl.LazyFrame,
495
+ fuzzy_maps: list[Any],
496
+ flow_id: int,
497
+ node_id: int | str,
498
+ file_ref: str = None,
499
+ wait_on_completion: bool = True,
500
+ ):
346
501
  super().__init__(file_ref=file_ref)
347
502
 
348
- r = trigger_fuzzy_match_operation(left_df=left_df, right_df=right_df, fuzzy_maps=fuzzy_maps,
349
- file_ref=file_ref, flow_id=flow_id, node_id=node_id)
503
+ r = trigger_fuzzy_match_operation(
504
+ left_df=left_df,
505
+ right_df=right_df,
506
+ fuzzy_maps=fuzzy_maps,
507
+ file_ref=file_ref,
508
+ flow_id=flow_id,
509
+ node_id=node_id,
510
+ )
350
511
  self.file_ref = r.background_task_id
351
- self.running = r.status == 'Processing'
512
+ self.running = r.status == "Processing"
352
513
  if wait_on_completion:
353
514
  _ = self.get_result()
354
515
 
355
516
 
356
517
  class ExternalCreateFetcher(BaseFetcher):
357
- def __init__(self, received_table: ReceivedTableCollection, node_id: int, flow_id: int,
358
- file_type: str = 'csv', wait_on_completion: bool = True):
359
- r = trigger_create_operation(received_table=received_table, file_type=file_type,
360
- node_id=node_id, flow_id=flow_id)
518
+ def __init__(
519
+ self,
520
+ received_table: ReceivedTable,
521
+ node_id: int,
522
+ flow_id: int,
523
+ file_type: str = "csv",
524
+ wait_on_completion: bool = True,
525
+ ):
526
+ r = trigger_create_operation(
527
+ received_table=received_table, file_type=file_type, node_id=node_id, flow_id=flow_id
528
+ )
361
529
  super().__init__(file_ref=r.background_task_id)
362
- self.running = r.status == 'Processing'
530
+ self.running = r.status == "Processing"
363
531
  if wait_on_completion:
364
532
  _ = self.get_result()
365
533
 
366
534
 
367
535
  class ExternalDatabaseFetcher(BaseFetcher):
368
- def __init__(self, database_external_read_settings: DatabaseExternalReadSettings,
369
- wait_on_completion: bool = True):
536
+ def __init__(self, database_external_read_settings: DatabaseExternalReadSettings, wait_on_completion: bool = True):
370
537
  r = trigger_database_read_collector(database_external_read_settings=database_external_read_settings)
371
538
  super().__init__(file_ref=r.background_task_id)
372
- self.running = r.status == 'Processing'
539
+ self.running = r.status == "Processing"
373
540
  if wait_on_completion:
374
541
  _ = self.get_result()
375
542
 
376
543
 
377
544
  class ExternalDatabaseWriter(BaseFetcher):
378
- def __init__(self, database_external_write_settings: DatabaseExternalWriteSettings,
379
- wait_on_completion: bool = True):
545
+ def __init__(
546
+ self, database_external_write_settings: DatabaseExternalWriteSettings, wait_on_completion: bool = True
547
+ ):
380
548
  r = trigger_database_write(database_external_write_settings=database_external_write_settings)
381
549
  super().__init__(file_ref=r.background_task_id)
382
- self.running = r.status == 'Processing'
550
+ self.running = r.status == "Processing"
383
551
  if wait_on_completion:
384
552
  _ = self.get_result()
385
553
 
386
554
 
387
555
  class ExternalCloudWriter(BaseFetcher):
388
-
389
- def __init__(self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface,
390
- wait_on_completion: bool = True):
556
+ def __init__(
557
+ self, cloud_storage_write_settings: CloudStorageWriteSettingsWorkerInterface, wait_on_completion: bool = True
558
+ ):
391
559
  r = trigger_cloud_storage_write(database_external_write_settings=cloud_storage_write_settings)
392
560
  super().__init__(file_ref=r.background_task_id)
393
- self.running = r.status == 'Processing'
561
+ self.running = r.status == "Processing"
394
562
  if wait_on_completion:
395
563
  _ = self.get_result()
396
564
 
397
565
 
398
566
  class ExternalExecutorTracker:
399
- result: Optional[pl.LazyFrame]
567
+ result: pl.LazyFrame | None
400
568
  started: bool = False
401
569
  running: bool = False
402
570
  error_code: int = 0
403
- error_description: Optional[str] = None
571
+ error_description: str | None = None
404
572
  file_ref: str = None
405
573
 
406
574
  def __init__(self, initial_response: Status, wait_on_completion: bool = True):
@@ -409,7 +577,7 @@ class ExternalExecutorTracker:
409
577
  self.thread = threading.Thread(target=self._fetch_cached_df)
410
578
  self.result = None
411
579
  self.error_description = None
412
- self.running = initial_response.status == 'Processing'
580
+ self.running = initial_response.status == "Processing"
413
581
  self.condition = threading.Condition()
414
582
  if wait_on_completion:
415
583
  _ = self.get_result()
@@ -417,30 +585,32 @@ class ExternalExecutorTracker:
417
585
  def _fetch_cached_df(self):
418
586
  with self.condition:
419
587
  if self.running:
420
- logger.info('Already running the fetching')
588
+ logger.info("Already running the fetching")
421
589
  return
422
590
  sleep_time = 1
423
591
  self.running = True
424
592
  while not self.stop_event.is_set():
425
593
  try:
426
- r = requests.get(f'{WORKER_URL}/status/{self.file_ref}')
594
+ r = requests.get(f"{WORKER_URL}/status/{self.file_ref}")
427
595
  if r.status_code == 200:
428
596
  status = Status(**r.json())
429
- if status.status == 'Completed':
597
+ if status.status == "Completed":
430
598
  self.running = False
431
599
  self.condition.notify_all() # Notify all waiting threads
432
- if status.result_type == 'polars':
600
+ if status.result_type == "polars":
433
601
  self.result = get_df_result(status.results)
434
602
  else:
435
603
  self.result = status.results
436
604
  return
437
- elif status.status == 'Error':
605
+ elif status.status == "Error":
438
606
  self.error_code = 1
439
607
  self.error_description = status.error_message
440
608
  break
441
- elif status.status == 'Unknown Error':
609
+ elif status.status == "Unknown Error":
442
610
  self.error_code = -1
443
- self.error_description = 'There was an unknown error with the process, and the process got killed by the server'
611
+ self.error_description = (
612
+ "There was an unknown error with the process, and the process got killed by the server"
613
+ )
444
614
  break
445
615
  else:
446
616
  self.error_description = r.text
@@ -464,12 +634,12 @@ class ExternalExecutorTracker:
464
634
  def start(self):
465
635
  self.started = True
466
636
  if self.running:
467
- logger.info('Already running the fetching')
637
+ logger.info("Already running the fetching")
468
638
  return
469
639
  self.thread.start()
470
640
 
471
641
  def cancel(self):
472
- logger.warning('Cancelling the operation')
642
+ logger.warning("Cancelling the operation")
473
643
  self.thread.join()
474
644
 
475
645
  self.running = False
@@ -485,7 +655,7 @@ class ExternalExecutorTracker:
485
655
  return self.result
486
656
 
487
657
 
488
- def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
658
+ def fetch_unique_values(lf: pl.LazyFrame) -> list[str]:
489
659
  """
490
660
  Fetches unique values from a specified column in a LazyFrame, attempting first via an external fetcher
491
661
  and falling back to direct LazyFrame computation if that fails.
@@ -510,8 +680,7 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
510
680
  # Try external source first if lf is provided
511
681
  try:
512
682
  external_df_fetcher = ExternalDfFetcher(lf=lf, flow_id=1, node_id=-1)
513
- if external_df_fetcher.status.status == 'Completed':
514
-
683
+ if external_df_fetcher.status.status == "Completed":
515
684
  unique_values = read(external_df_fetcher.status.file_ref).column(0).to_pylist()
516
685
  if logger:
517
686
  logger.info(f"Got {len(unique_values)} unique values from external source")
@@ -520,10 +689,10 @@ def fetch_unique_values(lf: pl.LazyFrame) -> List[str]:
520
689
  if logger:
521
690
  logger.debug(f"Failed reading external file: {str(e)}")
522
691
 
523
- unique_values = (lf.unique().collect(engine="streaming")[:, 0].to_list())
692
+ unique_values = lf.unique().collect(engine="streaming")[:, 0].to_list()
524
693
 
525
694
  if not unique_values:
526
- raise ValueError(f"No unique values found in lazyframe")
695
+ raise ValueError("No unique values found in lazyframe")
527
696
 
528
697
  return unique_values
529
698