Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +179 -73
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +52 -59
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
  36. flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
  37. flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
  79. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
  140. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
  143. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
  149. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
  154. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
  155. flowfile_core/__init__.py +13 -3
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +8 -6
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +123 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/code_generator.py +391 -279
  177. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  178. flowfile_core/flowfile/connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  180. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  181. flowfile_core/flowfile/extensions.py +17 -12
  182. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  183. flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
  184. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  190. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
  191. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  192. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  193. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
  194. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  195. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  196. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  197. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
  201. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  202. flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
  203. flowfile_core/flowfile/flow_graph.py +1011 -561
  204. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  205. flowfile_core/flowfile/flow_node/flow_node.py +332 -232
  206. flowfile_core/flowfile/flow_node/models.py +54 -41
  207. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  208. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  209. flowfile_core/flowfile/handler.py +82 -32
  210. flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
  211. flowfile_core/flowfile/manage/io_flowfile.py +391 -0
  212. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  213. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  214. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  215. flowfile_core/flowfile/node_designer/ui_components.py +136 -35
  216. flowfile_core/flowfile/schema_callbacks.py +77 -54
  217. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  218. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  219. flowfile_core/flowfile/setting_generator/settings.py +72 -55
  220. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  221. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  223. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  224. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  225. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  226. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  227. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  228. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  229. flowfile_core/flowfile/util/node_skipper.py +4 -4
  230. flowfile_core/flowfile/utils.py +19 -21
  231. flowfile_core/main.py +26 -19
  232. flowfile_core/routes/auth.py +284 -11
  233. flowfile_core/routes/cloud_connections.py +25 -25
  234. flowfile_core/routes/logs.py +21 -29
  235. flowfile_core/routes/public.py +3 -3
  236. flowfile_core/routes/routes.py +77 -43
  237. flowfile_core/routes/secrets.py +25 -27
  238. flowfile_core/routes/user_defined_components.py +483 -4
  239. flowfile_core/run_lock.py +0 -1
  240. flowfile_core/schemas/__init__.py +4 -6
  241. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  242. flowfile_core/schemas/cloud_storage_schemas.py +59 -55
  243. flowfile_core/schemas/input_schema.py +398 -154
  244. flowfile_core/schemas/output_model.py +50 -35
  245. flowfile_core/schemas/schemas.py +207 -67
  246. flowfile_core/schemas/transform_schema.py +1360 -435
  247. flowfile_core/schemas/yaml_types.py +117 -0
  248. flowfile_core/secret_manager/secret_manager.py +17 -13
  249. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
  250. flowfile_core/utils/arrow_reader.py +7 -6
  251. flowfile_core/utils/excel_file_manager.py +3 -3
  252. flowfile_core/utils/fileManager.py +7 -7
  253. flowfile_core/utils/fl_executor.py +8 -10
  254. flowfile_core/utils/utils.py +4 -4
  255. flowfile_core/utils/validate_setup.py +5 -4
  256. flowfile_frame/__init__.py +107 -50
  257. flowfile_frame/adapters.py +2 -9
  258. flowfile_frame/adding_expr.py +73 -32
  259. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  260. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  261. flowfile_frame/config.py +2 -5
  262. flowfile_frame/expr.py +311 -218
  263. flowfile_frame/expr.pyi +160 -159
  264. flowfile_frame/expr_name.py +23 -23
  265. flowfile_frame/flow_frame.py +581 -489
  266. flowfile_frame/flow_frame.pyi +123 -104
  267. flowfile_frame/flow_frame_methods.py +236 -252
  268. flowfile_frame/group_frame.py +50 -20
  269. flowfile_frame/join.py +2 -2
  270. flowfile_frame/lazy.py +129 -87
  271. flowfile_frame/lazy_methods.py +83 -30
  272. flowfile_frame/list_name_space.py +55 -50
  273. flowfile_frame/selectors.py +148 -68
  274. flowfile_frame/series.py +9 -7
  275. flowfile_frame/utils.py +19 -21
  276. flowfile_worker/__init__.py +12 -4
  277. flowfile_worker/configs.py +11 -19
  278. flowfile_worker/create/__init__.py +14 -27
  279. flowfile_worker/create/funcs.py +143 -94
  280. flowfile_worker/create/models.py +139 -68
  281. flowfile_worker/create/pl_types.py +14 -15
  282. flowfile_worker/create/read_excel_tables.py +34 -41
  283. flowfile_worker/create/utils.py +22 -19
  284. flowfile_worker/external_sources/s3_source/main.py +18 -51
  285. flowfile_worker/external_sources/s3_source/models.py +34 -27
  286. flowfile_worker/external_sources/sql_source/main.py +8 -5
  287. flowfile_worker/external_sources/sql_source/models.py +13 -9
  288. flowfile_worker/flow_logger.py +10 -8
  289. flowfile_worker/funcs.py +214 -155
  290. flowfile_worker/main.py +11 -17
  291. flowfile_worker/models.py +35 -28
  292. flowfile_worker/process_manager.py +2 -3
  293. flowfile_worker/routes.py +121 -93
  294. flowfile_worker/secrets.py +9 -6
  295. flowfile_worker/spawner.py +80 -49
  296. flowfile_worker/utils.py +3 -2
  297. shared/__init__.py +2 -7
  298. shared/storage_config.py +25 -13
  299. test_utils/postgres/commands.py +3 -2
  300. test_utils/postgres/fixtures.py +9 -9
  301. test_utils/s3/commands.py +1 -1
  302. test_utils/s3/data_generator.py +3 -4
  303. test_utils/s3/demo_data_generator.py +4 -7
  304. test_utils/s3/fixtures.py +7 -5
  305. tools/migrate/README.md +56 -0
  306. tools/migrate/__init__.py +12 -0
  307. tools/migrate/__main__.py +118 -0
  308. tools/migrate/legacy_schemas.py +682 -0
  309. tools/migrate/migrate.py +610 -0
  310. tools/migrate/tests/__init__.py +0 -0
  311. tools/migrate/tests/conftest.py +21 -0
  312. tools/migrate/tests/test_migrate.py +622 -0
  313. tools/migrate/tests/test_migration_e2e.py +1009 -0
  314. tools/migrate/tests/test_node_migrations.py +843 -0
  315. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  316. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  317. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  318. flowfile/web/static/assets/Filter-812dcbca.js +0 -164
  319. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  320. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  321. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  322. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  323. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  324. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  325. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  326. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  327. flowfile/web/static/assets/secretApi-538058f3.js +0 -46
  328. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  329. flowfile-0.4.1.dist-info/RECORD +0 -376
  330. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  331. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
  332. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
flowfile_worker/funcs.py CHANGED
@@ -1,43 +1,47 @@
1
- import polars as pl
2
1
  import io
3
- from typing import List, Dict, Callable
4
- from multiprocessing import Array, Value, Queue
2
+ import logging
3
+ import os
4
+ from base64 import encodebytes
5
+ from collections.abc import Callable
6
+ from logging import Logger
7
+ from multiprocessing import Array, Queue, Value
5
8
 
6
- from pl_fuzzy_frame_match import fuzzy_match_dfs, FuzzyMapping
9
+ import polars as pl
10
+ from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs
7
11
 
8
- from flowfile_worker.flow_logger import get_worker_logger
9
- from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
10
- from flowfile_worker.external_sources.sql_source.main import write_df_to_database
11
12
  from flowfile_worker.external_sources.s3_source.main import write_df_to_cloud
12
13
  from flowfile_worker.external_sources.s3_source.models import CloudStorageWriteSettings
13
- from base64 import encodebytes
14
- from logging import Logger
15
- import logging
16
- import os
14
+ from flowfile_worker.external_sources.sql_source.main import write_df_to_database
15
+ from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
16
+ from flowfile_worker.flow_logger import get_worker_logger
17
17
  from flowfile_worker.utils import collect_lazy_frame, collect_lazy_frame_and_get_streaming_info
18
18
 
19
-
20
19
  # 'store', 'calculate_schema', 'calculate_number_of_records', 'write_output', 'fuzzy', 'store_sample']
21
20
 
22
- logging.basicConfig(format='%(asctime)s: %(message)s')
23
- logger = logging.getLogger('Spawner')
21
+ logging.basicConfig(format="%(asctime)s: %(message)s")
22
+ logger = logging.getLogger("Spawner")
24
23
  logger.setLevel(logging.INFO)
25
24
 
26
25
 
27
- def fuzzy_join_task(left_serializable_object: bytes, right_serializable_object: bytes,
28
- fuzzy_maps: List[FuzzyMapping], error_message: Array, file_path: str,
29
- progress: Value,
30
- queue: Queue, flowfile_flow_id: int, flowfile_node_id: int | str,
31
- ):
26
+ def fuzzy_join_task(
27
+ left_serializable_object: bytes,
28
+ right_serializable_object: bytes,
29
+ fuzzy_maps: list[FuzzyMapping],
30
+ error_message: Array,
31
+ file_path: str,
32
+ progress: Value,
33
+ queue: Queue,
34
+ flowfile_flow_id: int,
35
+ flowfile_node_id: int | str,
36
+ ):
32
37
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
33
38
  try:
34
39
  flowfile_logger.info("Starting fuzzy join operation")
35
40
  left_df = pl.LazyFrame.deserialize(io.BytesIO(left_serializable_object))
36
41
  right_df = pl.LazyFrame.deserialize(io.BytesIO(right_serializable_object))
37
- fuzzy_match_result = fuzzy_match_dfs(left_df=left_df,
38
- right_df=right_df,
39
- fuzzy_maps=fuzzy_maps,
40
- logger=flowfile_logger)
42
+ fuzzy_match_result = fuzzy_match_dfs(
43
+ left_df=left_df, right_df=right_df, fuzzy_maps=fuzzy_maps, logger=flowfile_logger
44
+ )
41
45
  flowfile_logger.info("Fuzzy join operation completed successfully")
42
46
  fuzzy_match_result.write_ipc(file_path)
43
47
  with progress.get_lock():
@@ -45,18 +49,23 @@ def fuzzy_join_task(left_serializable_object: bytes, right_serializable_object:
45
49
  except Exception as e:
46
50
  error_msg = str(e).encode()[:256]
47
51
  with error_message.get_lock():
48
- error_message[:len(error_msg)] = error_msg
52
+ error_message[: len(error_msg)] = error_msg
49
53
  with progress.get_lock():
50
54
  progress.value = -1
51
- flowfile_logger.error(f'Error during fuzzy join operation: {str(e)}')
55
+ flowfile_logger.error(f"Error during fuzzy join operation: {str(e)}")
52
56
  lf = pl.scan_ipc(file_path)
53
57
  number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
54
- flowfile_logger.info(f'Number of records after fuzzy match: {number_of_records}')
58
+ flowfile_logger.info(f"Number of records after fuzzy match: {number_of_records}")
55
59
  queue.put(encodebytes(lf.serialize()))
56
60
 
57
61
 
58
- def process_and_cache(polars_serializable_object: io.BytesIO, progress: Value, error_message: Array,
59
- file_path: str, flowfile_logger: Logger) -> bytes:
62
+ def process_and_cache(
63
+ polars_serializable_object: io.BytesIO,
64
+ progress: Value,
65
+ error_message: Array,
66
+ file_path: str,
67
+ flowfile_logger: Logger,
68
+ ) -> bytes:
60
69
  try:
61
70
  lf = pl.LazyFrame.deserialize(polars_serializable_object)
62
71
  collect_lazy_frame(lf).write_ipc(file_path)
@@ -65,23 +74,24 @@ def process_and_cache(polars_serializable_object: io.BytesIO, progress: Value, e
65
74
  progress.value = 100
66
75
  except Exception as e:
67
76
  error_msg = str(e).encode()[:1024] # Limit error message length
68
- flowfile_logger.error(f'Error during process and cache operation: {str(e)}')
77
+ flowfile_logger.error(f"Error during process and cache operation: {str(e)}")
69
78
  with error_message.get_lock():
70
- error_message[:len(error_msg)] = error_msg
79
+ error_message[: len(error_msg)] = error_msg
71
80
  with progress.get_lock():
72
81
  progress.value = -1 # Indicate error
73
82
  return error_msg
74
83
 
75
84
 
76
- def store_sample(polars_serializable_object: bytes,
77
- progress: Value,
78
- error_message: Array,
79
- queue: Queue,
80
- file_path: str,
81
- sample_size: int,
82
- flowfile_flow_id: int,
83
- flowfile_node_id: int | str
84
- ):
85
+ def store_sample(
86
+ polars_serializable_object: bytes,
87
+ progress: Value,
88
+ error_message: Array,
89
+ queue: Queue,
90
+ file_path: str,
91
+ sample_size: int,
92
+ flowfile_flow_id: int,
93
+ flowfile_node_id: int | str,
94
+ ):
85
95
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
86
96
  flowfile_logger.info("Starting store sample operation")
87
97
  try:
@@ -91,38 +101,46 @@ def store_sample(polars_serializable_object: bytes,
91
101
  with progress.get_lock():
92
102
  progress.value = 100
93
103
  except Exception as e:
94
- flowfile_logger.error(f'Error during store sample operation: {str(e)}')
104
+ flowfile_logger.error(f"Error during store sample operation: {str(e)}")
95
105
  error_msg = str(e).encode()[:1024] # Limit error message length
96
106
  with error_message.get_lock():
97
- error_message[:len(error_msg)] = error_msg
107
+ error_message[: len(error_msg)] = error_msg
98
108
  with progress.get_lock():
99
109
  progress.value = -1 # Indicate error
100
110
  return error_msg
101
111
 
102
112
 
103
- def store(polars_serializable_object: bytes, progress: Value, error_message: Array, queue: Queue, file_path: str,
104
- flowfile_flow_id: int, flowfile_node_id: int | str):
113
+ def store(
114
+ polars_serializable_object: bytes,
115
+ progress: Value,
116
+ error_message: Array,
117
+ queue: Queue,
118
+ file_path: str,
119
+ flowfile_flow_id: int,
120
+ flowfile_node_id: int | str,
121
+ ):
105
122
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
106
123
  flowfile_logger.info("Starting store operation")
107
124
  polars_serializable_object_io = io.BytesIO(polars_serializable_object)
108
125
  process_and_cache(polars_serializable_object_io, progress, error_message, file_path, flowfile_logger)
109
126
  lf = pl.scan_ipc(file_path)
110
127
  number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
111
- flowfile_logger.info(f'Number of records processed: {number_of_records}')
128
+ flowfile_logger.info(f"Number of records processed: {number_of_records}")
112
129
  queue.put(encodebytes(lf.serialize()))
113
130
 
114
131
 
115
- def calculate_schema_logic(df: pl.LazyFrame, optimize_memory: bool = True, flowfile_logger: Logger = None) -> List[Dict]:
132
+ def calculate_schema_logic(
133
+ df: pl.LazyFrame, optimize_memory: bool = True, flowfile_logger: Logger = None
134
+ ) -> list[dict]:
116
135
  if flowfile_logger is None:
117
- raise ValueError('flowfile_logger is required')
136
+ raise ValueError("flowfile_logger is required")
118
137
  schema = df.collect_schema()
119
- schema_stats = [dict(column_name=k, pl_datatype=str(v), col_index=i) for i, (k, v) in
120
- enumerate(schema.items())]
121
- flowfile_logger.info('Starting to calculate the number of records')
138
+ schema_stats = [dict(column_name=k, pl_datatype=str(v), col_index=i) for i, (k, v) in enumerate(schema.items())]
139
+ flowfile_logger.info("Starting to calculate the number of records")
122
140
  collected_streaming_info = collect_lazy_frame_and_get_streaming_info(df.select(pl.len()))
123
141
  n_records = collected_streaming_info.df[0, 0]
124
142
  if n_records < 10_000:
125
- flowfile_logger.info('Collecting the whole dataset')
143
+ flowfile_logger.info("Collecting the whole dataset")
126
144
  df = collect_lazy_frame(df).lazy()
127
145
  if optimize_memory and n_records > 1_000_000:
128
146
  df = df.head(1_000_000)
@@ -133,20 +151,27 @@ def calculate_schema_logic(df: pl.LazyFrame, optimize_memory: bool = True, flowf
133
151
  else:
134
152
  df = df.drop(null_cols)
135
153
  pl_stats = df.describe()
136
- n_unique_per_cols = list(df.select(pl.all().approx_n_unique()).collect(
137
- engine="streaming" if collected_streaming_info.streaming_collect_available else "auto").to_dicts()[0].values()
138
- )
139
- stats_headers = pl_stats.drop_in_place('statistic').to_list()
140
- stats = {v['column_name']: v for v in pl_stats.transpose(include_header=True, header_name='column_name',
141
- column_names=stats_headers).to_dicts()}
142
- for i, (col_stat, n_unique_values) in enumerate(zip(stats.values(), n_unique_per_cols)):
143
- col_stat['n_unique'] = n_unique_values
144
- col_stat['examples'] = ', '.join({str(col_stat['min']), str(col_stat['max'])})
145
- col_stat['null_count'] = int(float(col_stat['null_count']))
146
- col_stat['count'] = int(float(col_stat['count']))
154
+ n_unique_per_cols = list(
155
+ df.select(pl.all().approx_n_unique())
156
+ .collect(engine="streaming" if collected_streaming_info.streaming_collect_available else "auto")
157
+ .to_dicts()[0]
158
+ .values()
159
+ )
160
+ stats_headers = pl_stats.drop_in_place("statistic").to_list()
161
+ stats = {
162
+ v["column_name"]: v
163
+ for v in pl_stats.transpose(
164
+ include_header=True, header_name="column_name", column_names=stats_headers
165
+ ).to_dicts()
166
+ }
167
+ for i, (col_stat, n_unique_values) in enumerate(zip(stats.values(), n_unique_per_cols, strict=False)):
168
+ col_stat["n_unique"] = n_unique_values
169
+ col_stat["examples"] = ", ".join({str(col_stat["min"]), str(col_stat["max"])})
170
+ col_stat["null_count"] = int(float(col_stat["null_count"]))
171
+ col_stat["count"] = int(float(col_stat["count"]))
147
172
 
148
173
  for schema_stat in schema_stats:
149
- deep_stat = stats.get(schema_stat['column_name'])
174
+ deep_stat = stats.get(schema_stat["column_name"])
150
175
  if deep_stat:
151
176
  schema_stat.update(deep_stat)
152
177
  del df
@@ -155,30 +180,45 @@ def calculate_schema_logic(df: pl.LazyFrame, optimize_memory: bool = True, flowf
155
180
  return schema_stats
156
181
 
157
182
 
158
- def calculate_schema(polars_serializable_object: bytes, progress: Value, error_message: Array, queue: Queue,
159
- flowfile_flow_id: int, flowfile_node_id: int | str, *args, **kwargs):
183
+ def calculate_schema(
184
+ polars_serializable_object: bytes,
185
+ progress: Value,
186
+ error_message: Array,
187
+ queue: Queue,
188
+ flowfile_flow_id: int,
189
+ flowfile_node_id: int | str,
190
+ *args,
191
+ **kwargs,
192
+ ):
160
193
  polars_serializable_object_io = io.BytesIO(polars_serializable_object)
161
194
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
162
195
  flowfile_logger.info("Starting schema calculation")
163
196
  try:
164
197
  lf = pl.LazyFrame.deserialize(polars_serializable_object_io)
165
198
  schema_stats = calculate_schema_logic(lf, flowfile_logger=flowfile_logger)
166
- flowfile_logger.info('schema_stats', schema_stats)
199
+ flowfile_logger.info("schema_stats", schema_stats)
167
200
  queue.put(schema_stats)
168
201
  flowfile_logger.info("Schema calculation completed successfully")
169
202
  with progress.get_lock():
170
203
  progress.value = 100
171
204
  except Exception as e:
172
205
  error_msg = str(e).encode()[:256] # Limit error message length
173
- flowfile_logger.error('error', e)
206
+ flowfile_logger.error("error", e)
174
207
  with error_message.get_lock():
175
- error_message[:len(error_msg)] = error_msg
208
+ error_message[: len(error_msg)] = error_msg
176
209
  with progress.get_lock():
177
210
  progress.value = -1 # Indicate error
178
211
 
179
212
 
180
- def calculate_number_of_records(polars_serializable_object: bytes, progress: Value, error_message: Array,
181
- queue: Queue, flowfile_flow_id: int, *args, **kwargs):
213
+ def calculate_number_of_records(
214
+ polars_serializable_object: bytes,
215
+ progress: Value,
216
+ error_message: Array,
217
+ queue: Queue,
218
+ flowfile_flow_id: int,
219
+ *args,
220
+ **kwargs,
221
+ ):
182
222
  flowfile_logger = get_worker_logger(flowfile_flow_id, -1)
183
223
  flowfile_logger.info("Starting number of records calculation")
184
224
  polars_serializable_object_io = io.BytesIO(polars_serializable_object)
@@ -187,47 +227,54 @@ def calculate_number_of_records(polars_serializable_object: bytes, progress: Val
187
227
  n_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
188
228
  queue.put(n_records)
189
229
  flowfile_logger.debug("Number of records calculation completed successfully")
190
- flowfile_logger.debug(f'n_records {n_records}')
230
+ flowfile_logger.debug(f"n_records {n_records}")
191
231
  with progress.get_lock():
192
232
  progress.value = 100
193
233
  except Exception as e:
194
- flowfile_logger.error('error', e)
234
+ flowfile_logger.error("error", e)
195
235
  error_msg = str(e).encode()[:256] # Limit error message length
196
236
  with error_message.get_lock():
197
- error_message[:len(error_msg)] = error_msg
237
+ error_message[: len(error_msg)] = error_msg
198
238
  with progress.get_lock():
199
239
  progress.value = -1 # Indicate error
200
- return b'error'
201
-
202
-
203
- def execute_write_method(write_method: Callable, path: str, data_type: str = None, sheet_name: str = None,
204
- delimiter: str = None,
205
- write_mode: str = 'create', flowfile_logger: Logger = None):
206
- flowfile_logger.info('executing write method')
207
- if data_type == 'excel':
208
- logger.info('Writing as excel file')
240
+ return b"error"
241
+
242
+
243
+ def execute_write_method(
244
+ write_method: Callable,
245
+ path: str,
246
+ data_type: str = None,
247
+ sheet_name: str = None,
248
+ delimiter: str = None,
249
+ write_mode: str = "create",
250
+ flowfile_logger: Logger = None,
251
+ ):
252
+ flowfile_logger.info("executing write method")
253
+ if data_type == "excel":
254
+ logger.info("Writing as excel file")
209
255
  write_method(path, worksheet=sheet_name)
210
- elif data_type == 'csv':
211
- logger.info('Writing as csv file')
212
- if write_mode == 'append':
213
- with open(path, 'ab') as f:
214
- write_method(f, separator=delimiter, quote_style='always')
256
+ elif data_type == "csv":
257
+ logger.info("Writing as csv file")
258
+ if write_mode == "append":
259
+ with open(path, "ab") as f:
260
+ write_method(f, separator=delimiter, quote_style="always")
215
261
  else:
216
- write_method(path, separator=delimiter, quote_style='always')
217
- elif data_type == 'parquet':
218
- logger.info('Writing as parquet file')
262
+ write_method(path, separator=delimiter, quote_style="always")
263
+ elif data_type == "parquet":
264
+ logger.info("Writing as parquet file")
219
265
  write_method(path)
220
266
 
221
267
 
222
- def write_to_database(polars_serializable_object: bytes,
223
- progress: Value,
224
- error_message: Array,
225
- queue: Queue,
226
- file_path: str,
227
- database_write_settings: DatabaseWriteSettings,
228
- flowfile_flow_id: int = -1,
229
- flowfile_node_id: int | str = -1
230
- ):
268
+ def write_to_database(
269
+ polars_serializable_object: bytes,
270
+ progress: Value,
271
+ error_message: Array,
272
+ queue: Queue,
273
+ file_path: str,
274
+ database_write_settings: DatabaseWriteSettings,
275
+ flowfile_flow_id: int = -1,
276
+ flowfile_node_id: int | str = -1,
277
+ ):
231
278
  """
232
279
  Writes a Polars DataFrame to a SQL database.
233
280
  """
@@ -242,22 +289,23 @@ def write_to_database(polars_serializable_object: bytes,
242
289
  progress.value = 100
243
290
  except Exception as e:
244
291
  error_msg = str(e).encode()[:1024]
245
- flowfile_logger.error(f'Error during write operation: {str(e)}')
292
+ flowfile_logger.error(f"Error during write operation: {str(e)}")
246
293
  with error_message.get_lock():
247
- error_message[:len(error_msg)] = error_msg
294
+ error_message[: len(error_msg)] = error_msg
248
295
  with progress.get_lock():
249
296
  progress.value = -1
250
297
 
251
298
 
252
- def write_to_cloud_storage(polars_serializable_object: bytes,
253
- progress: Value,
254
- error_message: Array,
255
- queue: Queue,
256
- file_path: str,
257
- cloud_write_settings: CloudStorageWriteSettings,
258
- flowfile_flow_id: int = -1,
259
- flowfile_node_id: int | str = -1
260
- ) -> None:
299
+ def write_to_cloud_storage(
300
+ polars_serializable_object: bytes,
301
+ progress: Value,
302
+ error_message: Array,
303
+ queue: Queue,
304
+ file_path: str,
305
+ cloud_write_settings: CloudStorageWriteSettings,
306
+ flowfile_flow_id: int = -1,
307
+ flowfile_node_id: int | str = -1,
308
+ ) -> None:
261
309
  """
262
310
  Writes a Polars DataFrame to cloud storage using the provided settings.
263
311
  Args:
@@ -276,8 +324,7 @@ def write_to_cloud_storage(polars_serializable_object: bytes,
276
324
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
277
325
  flowfile_logger.info(f"Starting write operation to: {cloud_write_settings.write_settings.resource_path}")
278
326
  df = pl.LazyFrame.deserialize(io.BytesIO(polars_serializable_object))
279
- flowfile_logger.info(f"Starting to sync the data to cloud, execution plan: \n"
280
- f"{df.explain(format='plain')}")
327
+ flowfile_logger.info(f"Starting to sync the data to cloud, execution plan: \n" f"{df.explain(format='plain')}")
281
328
  try:
282
329
  write_df_to_cloud(df, cloud_write_settings, flowfile_logger)
283
330
  flowfile_logger.info("Write operation completed successfully")
@@ -285,26 +332,27 @@ def write_to_cloud_storage(polars_serializable_object: bytes,
285
332
  progress.value = 100
286
333
  except Exception as e:
287
334
  error_msg = str(e).encode()[:1024]
288
- flowfile_logger.error(f'Error during write operation: {str(e)}')
335
+ flowfile_logger.error(f"Error during write operation: {str(e)}")
289
336
  with error_message.get_lock():
290
- error_message[:len(error_msg)] = error_msg
337
+ error_message[: len(error_msg)] = error_msg
291
338
  with progress.get_lock():
292
339
  progress.value = -1
293
340
 
294
341
 
295
- def write_output(polars_serializable_object: bytes,
296
- progress: Value,
297
- error_message: Array,
298
- queue: Queue,
299
- file_path: str,
300
- data_type: str,
301
- path: str,
302
- write_mode: str,
303
- sheet_name: str = None,
304
- delimiter: str = None,
305
- flowfile_flow_id: int = -1,
306
- flowfile_node_id: int | str = -1
307
- ):
342
+ def write_output(
343
+ polars_serializable_object: bytes,
344
+ progress: Value,
345
+ error_message: Array,
346
+ queue: Queue,
347
+ file_path: str,
348
+ data_type: str,
349
+ path: str,
350
+ write_mode: str,
351
+ sheet_name: str = None,
352
+ delimiter: str = None,
353
+ flowfile_flow_id: int = -1,
354
+ flowfile_node_id: int | str = -1,
355
+ ):
308
356
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
309
357
  flowfile_logger.info(f"Starting write operation to: {path}")
310
358
  try:
@@ -312,42 +360,53 @@ def write_output(polars_serializable_object: bytes,
312
360
  if isinstance(df, pl.LazyFrame):
313
361
  flowfile_logger.info(f'Execution plan explanation:\n{df.explain(format="plain")}')
314
362
  flowfile_logger.info("Successfully deserialized dataframe")
315
- sink_method_str = 'sink_'+data_type
316
- write_method_str = 'write_'+data_type
363
+ sink_method_str = "sink_" + data_type
364
+ write_method_str = "write_" + data_type
317
365
  has_sink_method = hasattr(df, sink_method_str)
318
366
  write_method = None
319
- if os.path.exists(path) and write_mode == 'create':
320
- raise Exception('File already exists')
321
- if has_sink_method and write_method != 'append':
322
- flowfile_logger.info(f'Using sink method: {sink_method_str}')
323
- write_method = getattr(df, 'sink_' + data_type)
367
+ if os.path.exists(path) and write_mode == "create":
368
+ raise Exception("File already exists")
369
+ if has_sink_method and write_method != "append":
370
+ flowfile_logger.info(f"Using sink method: {sink_method_str}")
371
+ write_method = getattr(df, "sink_" + data_type)
324
372
  elif not has_sink_method:
325
373
  if isinstance(df, pl.LazyFrame):
326
374
  df = collect_lazy_frame(df)
327
375
  write_method = getattr(df, write_method_str)
328
376
  if write_method is not None:
329
- execute_write_method(write_method, path=path, data_type=data_type, sheet_name=sheet_name,
330
- delimiter=delimiter, write_mode=write_mode, flowfile_logger=flowfile_logger)
331
- number_of_records_written = (collect_lazy_frame(df.select(pl.len()))[0, 0]
332
- if isinstance(df, pl.LazyFrame) else df.height)
333
- flowfile_logger.info(f'Number of records written: {number_of_records_written}')
377
+ execute_write_method(
378
+ write_method,
379
+ path=path,
380
+ data_type=data_type,
381
+ sheet_name=sheet_name,
382
+ delimiter=delimiter,
383
+ write_mode=write_mode,
384
+ flowfile_logger=flowfile_logger,
385
+ )
386
+ number_of_records_written = (
387
+ collect_lazy_frame(df.select(pl.len()))[0, 0] if isinstance(df, pl.LazyFrame) else df.height
388
+ )
389
+ flowfile_logger.info(f"Number of records written: {number_of_records_written}")
334
390
  else:
335
- raise Exception('Write method not found')
391
+ raise Exception("Write method not found")
336
392
  with progress.get_lock():
337
393
  progress.value = 100
338
394
  except Exception as e:
339
- logger.info(f'Error during write operation: {str(e)}')
340
- error_message[:len(str(e))] = str(e).encode()
341
-
342
-
343
- def generic_task(func: Callable,
344
- progress: Value,
345
- error_message: Array,
346
- queue: Queue,
347
- file_path: str,
348
- flowfile_flow_id: int,
349
- flowfile_node_id: int | str,
350
- *args, **kwargs):
395
+ logger.info(f"Error during write operation: {str(e)}")
396
+ error_message[: len(str(e))] = str(e).encode()
397
+
398
+
399
+ def generic_task(
400
+ func: Callable,
401
+ progress: Value,
402
+ error_message: Array,
403
+ queue: Queue,
404
+ file_path: str,
405
+ flowfile_flow_id: int,
406
+ flowfile_node_id: int | str,
407
+ *args,
408
+ **kwargs,
409
+ ):
351
410
  print(kwargs)
352
411
  flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
353
412
  flowfile_logger.info("Starting generic task")
@@ -358,19 +417,19 @@ def generic_task(func: Callable,
358
417
  elif isinstance(df, pl.DataFrame):
359
418
  df.write_ipc(file_path)
360
419
  else:
361
- raise Exception('Returned object is not a DataFrame or LazyFrame')
420
+ raise Exception("Returned object is not a DataFrame or LazyFrame")
362
421
  with progress.get_lock():
363
422
  progress.value = 100
364
423
  flowfile_logger.info("Task completed successfully")
365
424
  except Exception as e:
366
- flowfile_logger.error(f'Error during task execution: {str(e)}')
425
+ flowfile_logger.error(f"Error during task execution: {str(e)}")
367
426
  error_msg = str(e).encode()[:1024]
368
427
  with error_message.get_lock():
369
- error_message[:len(error_msg)] = error_msg
428
+ error_message[: len(error_msg)] = error_msg
370
429
  with progress.get_lock():
371
430
  progress.value = -1
372
431
 
373
432
  lf = pl.scan_ipc(file_path)
374
433
  number_of_records = collect_lazy_frame(lf.select(pl.len()))[0, 0]
375
- flowfile_logger.info(f'Number of records processed: {number_of_records}')
434
+ flowfile_logger.info(f"Number of records processed: {number_of_records}")
376
435
  queue.put(encodebytes(lf.serialize()))
flowfile_worker/main.py CHANGED
@@ -1,16 +1,14 @@
1
1
  import asyncio
2
- import uvicorn
3
2
  import signal
4
-
5
3
  from contextlib import asynccontextmanager
6
- from fastapi import FastAPI
7
4
 
8
- from shared.storage_config import storage
5
+ import uvicorn
6
+ from fastapi import FastAPI
9
7
 
10
- from flowfile_worker.routes import router
11
8
  from flowfile_worker import mp_context
12
- from flowfile_worker.configs import logger, FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT
13
-
9
+ from flowfile_worker.configs import FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT, logger
10
+ from flowfile_worker.routes import router
11
+ from shared.storage_config import storage
14
12
 
15
13
  should_exit = False
16
14
  server_instance = None
@@ -19,11 +17,11 @@ server_instance = None
19
17
  @asynccontextmanager
20
18
  async def shutdown_handler(app: FastAPI):
21
19
  """Handle application startup and shutdown"""
22
- logger.info('Starting application...')
20
+ logger.info("Starting application...")
23
21
  try:
24
22
  yield
25
23
  finally:
26
- logger.info('Shutting down application...')
24
+ logger.info("Shutting down application...")
27
25
  logger.info("Cleaning up worker resources...")
28
26
  for p in mp_context.active_children():
29
27
  try:
@@ -84,17 +82,12 @@ def run(host: str = None, port: int = None):
84
82
  signal.signal(signal.SIGTERM, signal_handler)
85
83
  signal.signal(signal.SIGINT, signal_handler)
86
84
 
87
- config = uvicorn.Config(
88
- app,
89
- host=host,
90
- port=port,
91
- loop="asyncio"
92
- )
85
+ config = uvicorn.Config(app, host=host, port=port, loop="asyncio")
93
86
  server = uvicorn.Server(config)
94
87
  server_instance = server # Store server instance globally
95
88
 
96
- logger.info('Starting server...')
97
- logger.info('Server started')
89
+ logger.info("Starting server...")
90
+ logger.info("Server started")
98
91
 
99
92
  try:
100
93
  server.run()
@@ -107,5 +100,6 @@ def run(host: str = None, port: int = None):
107
100
 
108
101
  if __name__ == "__main__":
109
102
  import multiprocessing
103
+
110
104
  multiprocessing.freeze_support()
111
105
  run()