Flowfile 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +178 -74
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +51 -57
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-0dfba9f2.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d5b1b6c9.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-00d87aad.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-4685e75d.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-23e909da.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-70ae0c79.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-f149cf7c.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-702a3edd.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b1519993.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-6f3e4ea5.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-d38c7295.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-cf5ef661.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-b04ef46a.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-8da3ba3a.css → DesignerView-a6d0ee84.css} +614 -546
  36. flowfile/web/static/assets/{designer-9633482a.js → DesignerView-e6f5c0e8.js} +1107 -3170
  37. flowfile/web/static/assets/{documentation-ca400224.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-5fa10ed8.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-d39af878.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-6b04fb1d.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-999521f4.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-17dd2198.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6b039e18.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-24d0f113.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-34639209.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-0e8724a3.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-3d63a470.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-283fe388.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-edea9802.js → Output-f5efd2aa.js} +12 -9
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-61d19301.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f97fec5b.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-de9f43fe.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-bc3c9984.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-e808b239.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-64a3f259.js → Read-aec2e377.js} +14 -11
  79. flowfile/web/static/assets/{RecordCount-3d5039be.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-597510e0.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-df51adbe.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-4be0a507.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-4839be57.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-9b72f201.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-f0f75a42.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-e1e9c953.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-7ded385d.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-6c777aac.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-7cb93e62.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-6cbde21a.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-d9a40c11.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-5896c375.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-c4fcbf4d.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-4ef91d19.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-a03f512c.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-bfe9b996.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-5d023a27.js → Unique-8cd4f976.js} +13 -10
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-91cc5354.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-7ee2de44.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-e51b9924.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-cf1221f0.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-c1bad5ca.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-614b998d.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-f7971590.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-4fe5f36b.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-5429bbf8.js → index-fb6493ae.js} +41626 -40867
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-076b85ab.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-0fd17dbe.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-b61e0847.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-a8bb8b61.js → readCsv-07f6d9ad.js} +3 -3
  140. flowfile/web/static/assets/{readCsv-c767cb37.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-806d2826.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-67b4aee0.js → readExcel-ed69bc8f.js} +5 -5
  143. flowfile/web/static/assets/{readParquet-48c81530.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-92ce1dbc.js → readParquet-e3ed4528.js} +3 -3
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-92e25ee3.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-41b0e0d7.js → vue-codemirror.esm-0965f39f.js} +31 -640
  149. flowfile/web/static/assets/{vue-content-loader.es-2c8e608f.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +2 -3
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. flowfile_core/__init__.py +13 -6
  154. flowfile_core/auth/jwt.py +51 -16
  155. flowfile_core/auth/models.py +32 -7
  156. flowfile_core/auth/password.py +89 -0
  157. flowfile_core/auth/secrets.py +8 -6
  158. flowfile_core/configs/__init__.py +9 -7
  159. flowfile_core/configs/flow_logger.py +15 -14
  160. flowfile_core/configs/node_store/__init__.py +72 -4
  161. flowfile_core/configs/node_store/nodes.py +155 -172
  162. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  163. flowfile_core/configs/settings.py +28 -15
  164. flowfile_core/database/connection.py +7 -6
  165. flowfile_core/database/init_db.py +96 -2
  166. flowfile_core/database/models.py +3 -1
  167. flowfile_core/fileExplorer/__init__.py +17 -0
  168. flowfile_core/fileExplorer/funcs.py +123 -57
  169. flowfile_core/fileExplorer/utils.py +10 -11
  170. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  171. flowfile_core/flowfile/analytics/analytics_processor.py +26 -24
  172. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  173. flowfile_core/flowfile/analytics/utils.py +1 -1
  174. flowfile_core/flowfile/code_generator/code_generator.py +358 -244
  175. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  176. flowfile_core/flowfile/connection_manager/models.py +1 -1
  177. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  178. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/extensions.py +17 -12
  180. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  181. flowfile_core/flowfile/flow_data_engine/create/funcs.py +115 -83
  182. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +481 -423
  183. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  184. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  188. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +31 -20
  189. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  190. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  191. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +14 -15
  192. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  193. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  194. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  195. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  196. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  197. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +190 -127
  199. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  200. flowfile_core/flowfile/flow_data_engine/utils.py +99 -67
  201. flowfile_core/flowfile/flow_graph.py +918 -571
  202. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  203. flowfile_core/flowfile/flow_node/flow_node.py +330 -233
  204. flowfile_core/flowfile/flow_node/models.py +53 -41
  205. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  206. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  207. flowfile_core/flowfile/handler.py +80 -30
  208. flowfile_core/flowfile/manage/compatibility_enhancements.py +209 -126
  209. flowfile_core/flowfile/manage/io_flowfile.py +54 -57
  210. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  211. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  212. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  213. flowfile_core/flowfile/node_designer/ui_components.py +135 -34
  214. flowfile_core/flowfile/schema_callbacks.py +71 -51
  215. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  216. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  217. flowfile_core/flowfile/setting_generator/settings.py +64 -53
  218. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  219. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  220. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  221. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  222. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  223. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  224. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  225. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  226. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  227. flowfile_core/flowfile/util/node_skipper.py +4 -4
  228. flowfile_core/flowfile/utils.py +19 -21
  229. flowfile_core/main.py +26 -19
  230. flowfile_core/routes/auth.py +284 -11
  231. flowfile_core/routes/cloud_connections.py +25 -25
  232. flowfile_core/routes/logs.py +21 -29
  233. flowfile_core/routes/public.py +3 -3
  234. flowfile_core/routes/routes.py +70 -34
  235. flowfile_core/routes/secrets.py +25 -27
  236. flowfile_core/routes/user_defined_components.py +483 -4
  237. flowfile_core/run_lock.py +0 -1
  238. flowfile_core/schemas/__init__.py +4 -6
  239. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  240. flowfile_core/schemas/cloud_storage_schemas.py +59 -53
  241. flowfile_core/schemas/input_schema.py +231 -144
  242. flowfile_core/schemas/output_model.py +49 -34
  243. flowfile_core/schemas/schemas.py +116 -89
  244. flowfile_core/schemas/transform_schema.py +518 -263
  245. flowfile_core/schemas/yaml_types.py +21 -7
  246. flowfile_core/secret_manager/secret_manager.py +17 -13
  247. flowfile_core/types.py +29 -9
  248. flowfile_core/utils/arrow_reader.py +7 -6
  249. flowfile_core/utils/excel_file_manager.py +3 -3
  250. flowfile_core/utils/fileManager.py +7 -7
  251. flowfile_core/utils/fl_executor.py +8 -10
  252. flowfile_core/utils/utils.py +4 -4
  253. flowfile_core/utils/validate_setup.py +5 -4
  254. flowfile_frame/__init__.py +106 -51
  255. flowfile_frame/adapters.py +2 -9
  256. flowfile_frame/adding_expr.py +73 -32
  257. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  258. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  259. flowfile_frame/config.py +2 -5
  260. flowfile_frame/expr.py +311 -218
  261. flowfile_frame/expr.pyi +160 -159
  262. flowfile_frame/expr_name.py +23 -23
  263. flowfile_frame/flow_frame.py +571 -476
  264. flowfile_frame/flow_frame.pyi +123 -104
  265. flowfile_frame/flow_frame_methods.py +227 -246
  266. flowfile_frame/group_frame.py +50 -20
  267. flowfile_frame/join.py +2 -2
  268. flowfile_frame/lazy.py +129 -87
  269. flowfile_frame/lazy_methods.py +83 -30
  270. flowfile_frame/list_name_space.py +55 -50
  271. flowfile_frame/selectors.py +148 -68
  272. flowfile_frame/series.py +9 -7
  273. flowfile_frame/utils.py +19 -21
  274. flowfile_worker/__init__.py +12 -7
  275. flowfile_worker/configs.py +11 -19
  276. flowfile_worker/create/__init__.py +14 -9
  277. flowfile_worker/create/funcs.py +114 -77
  278. flowfile_worker/create/models.py +46 -43
  279. flowfile_worker/create/pl_types.py +14 -15
  280. flowfile_worker/create/read_excel_tables.py +34 -41
  281. flowfile_worker/create/utils.py +22 -19
  282. flowfile_worker/external_sources/s3_source/main.py +18 -51
  283. flowfile_worker/external_sources/s3_source/models.py +34 -27
  284. flowfile_worker/external_sources/sql_source/main.py +8 -5
  285. flowfile_worker/external_sources/sql_source/models.py +13 -9
  286. flowfile_worker/flow_logger.py +10 -8
  287. flowfile_worker/funcs.py +214 -155
  288. flowfile_worker/main.py +11 -17
  289. flowfile_worker/models.py +35 -28
  290. flowfile_worker/process_manager.py +2 -3
  291. flowfile_worker/routes.py +121 -90
  292. flowfile_worker/secrets.py +9 -6
  293. flowfile_worker/spawner.py +80 -49
  294. flowfile_worker/utils.py +3 -2
  295. shared/__init__.py +2 -7
  296. shared/storage_config.py +25 -13
  297. test_utils/postgres/commands.py +3 -2
  298. test_utils/postgres/fixtures.py +9 -9
  299. test_utils/s3/commands.py +1 -1
  300. test_utils/s3/data_generator.py +3 -4
  301. test_utils/s3/demo_data_generator.py +4 -7
  302. test_utils/s3/fixtures.py +7 -5
  303. tools/migrate/__init__.py +1 -1
  304. tools/migrate/__main__.py +16 -29
  305. tools/migrate/legacy_schemas.py +251 -190
  306. tools/migrate/migrate.py +193 -181
  307. tools/migrate/tests/conftest.py +1 -3
  308. tools/migrate/tests/test_migrate.py +36 -41
  309. tools/migrate/tests/test_migration_e2e.py +28 -29
  310. tools/migrate/tests/test_node_migrations.py +50 -20
  311. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  312. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  313. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  314. flowfile/web/static/assets/Filter-9b6d08db.js +0 -164
  315. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  316. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  317. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  318. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  319. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  320. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  321. flowfile/web/static/assets/nodeInput-5d0d6b79.js +0 -41
  322. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  323. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  324. flowfile/web/static/assets/secretApi-68435402.js +0 -46
  325. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  326. flowfile-0.5.1.dist-info/RECORD +0 -388
  327. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +0 -0
  328. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +0 -0
  329. {flowfile-0.5.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,23 +1,17 @@
1
-
2
- from typing import List
3
-
4
- from polars import datatypes
5
1
  import polars as pl
6
-
7
2
  from pl_fuzzy_frame_match.output_column_name_utils import set_name_in_fuzzy_mappings
8
3
  from pl_fuzzy_frame_match.pre_process import rename_fuzzy_right_mapping
4
+ from polars import datatypes
9
5
 
10
- from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
11
6
  from flowfile_core.configs.flow_logger import main_logger
12
7
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
13
- from flowfile_core.schemas import transform_schema
14
- from flowfile_core.schemas import input_schema
15
- from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
8
+ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
9
+ from flowfile_core.schemas import input_schema, transform_schema
16
10
 
17
11
 
18
- def _ensure_all_columns_have_select(left_cols: List[str],
19
- right_cols: List[str],
20
- fuzzy_match_input: transform_schema.FuzzyMatchInputManager):
12
+ def _ensure_all_columns_have_select(
13
+ left_cols: list[str], right_cols: list[str], fuzzy_match_input: transform_schema.FuzzyMatchInputManager
14
+ ):
21
15
  """
22
16
  Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
23
17
  statements.
@@ -33,13 +27,16 @@ def _ensure_all_columns_have_select(left_cols: List[str],
33
27
  left_cols_in_select = {c.old_name for c in fuzzy_match_input.left_select.renames}
34
28
 
35
29
  fuzzy_match_input.left_select.renames.extend(
36
- [transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select])
30
+ [transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select]
31
+ )
37
32
  fuzzy_match_input.right_select.renames.extend(
38
33
  [transform_schema.SelectInput(col) for col in right_cols if col not in right_cols_in_select]
39
34
  )
40
35
 
41
36
 
42
- def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputsManager) -> None:
37
+ def _order_join_inputs_based_on_col_order(
38
+ col_order: list[str], join_inputs: transform_schema.JoinInputsManager
39
+ ) -> None:
43
40
  """
44
41
  Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
45
42
  This function modifies the join_inputs object in-place.
@@ -52,17 +49,23 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: tra
52
49
  join_inputs.select_inputs.renames = ordered_renames
53
50
 
54
51
 
55
- def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInputManager,
56
- left_schema: List[FlowfileColumn],
57
- right_schema: List[FlowfileColumn]):
58
- _ensure_all_columns_have_select(left_cols=[col.column_name for col in left_schema],
59
- right_cols=[col.column_name for col in right_schema],
60
- fuzzy_match_input=fm_input)
52
+ def calculate_fuzzy_match_schema(
53
+ fm_input: transform_schema.FuzzyMatchInputManager,
54
+ left_schema: list[FlowfileColumn],
55
+ right_schema: list[FlowfileColumn],
56
+ ):
57
+ _ensure_all_columns_have_select(
58
+ left_cols=[col.column_name for col in left_schema],
59
+ right_cols=[col.column_name for col in right_schema],
60
+ fuzzy_match_input=fm_input,
61
+ )
61
62
 
62
- _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in left_schema],
63
- join_inputs=fm_input.left_select)
64
- _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
65
- join_inputs=fm_input.right_select)
63
+ _order_join_inputs_based_on_col_order(
64
+ col_order=[col.column_name for col in left_schema], join_inputs=fm_input.left_select
65
+ )
66
+ _order_join_inputs_based_on_col_order(
67
+ col_order=[col.column_name for col in right_schema], join_inputs=fm_input.right_select
68
+ )
66
69
  for column in fm_input.left_select.renames:
67
70
  if column.join_key:
68
71
  column.keep = True
@@ -78,20 +81,27 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInputManag
78
81
  for column in fm_input.left_select.renames:
79
82
  column_schema = left_schema_dict.get(column.old_name)
80
83
  if column_schema and (column.keep or column.join_key):
81
- output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
82
- example_values=column_schema.example_values))
84
+ output_schema.append(
85
+ FlowfileColumn.from_input(
86
+ column.new_name, column_schema.data_type, example_values=column_schema.example_values
87
+ )
88
+ )
83
89
  for column in fm_input.right_select.renames:
84
90
  column_schema = right_schema_dict.get(column.old_name)
85
91
  if column_schema and (column.keep or column.join_key):
86
- output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
87
- example_values=column_schema.example_values))
92
+ output_schema.append(
93
+ FlowfileColumn.from_input(
94
+ column.new_name, column_schema.data_type, example_values=column_schema.example_values
95
+ )
96
+ )
88
97
  set_name_in_fuzzy_mappings(new_join_mapping)
89
- output_schema.extend([FlowfileColumn.from_input(fuzzy_mapping.output_column_name, 'Float64')
90
- for fuzzy_mapping in new_join_mapping])
98
+ output_schema.extend(
99
+ [FlowfileColumn.from_input(fuzzy_mapping.output_column_name, "Float64") for fuzzy_mapping in new_join_mapping]
100
+ )
91
101
  return output_schema
92
102
 
93
103
 
94
- def get_schema_of_column(node_input_schema: List[FlowfileColumn], col_name: str) -> FlowfileColumn|None:
104
+ def get_schema_of_column(node_input_schema: list[FlowfileColumn], col_name: str) -> FlowfileColumn | None:
95
105
  for s in node_input_schema:
96
106
  if s.name == col_name:
97
107
  return s
@@ -99,44 +109,54 @@ def get_schema_of_column(node_input_schema: List[FlowfileColumn], col_name: str)
99
109
 
100
110
  class InvalidSetup(ValueError):
101
111
  """Error raised when pivot column has too many unique values."""
112
+
102
113
  pass
103
114
 
104
115
 
105
116
  def get_output_data_type_pivot(schema: FlowfileColumn, agg_type: str) -> datatypes:
106
- if agg_type in ('count', 'n_unique'):
117
+ if agg_type in ("count", "n_unique"):
107
118
  output_type = datatypes.Float64 # count is always float
108
- elif schema.generic_datatype() == 'numeric':
119
+ elif schema.generic_datatype() == "numeric":
109
120
  output_type = datatypes.Float64
110
- elif schema.generic_datatype() == 'string':
121
+ elif schema.generic_datatype() == "string":
111
122
  output_type = datatypes.Utf8
112
- elif schema.generic_datatype() == 'date':
123
+ elif schema.generic_datatype() == "date":
113
124
  output_type = datatypes.Datetime
114
125
  else:
115
126
  output_type = datatypes.Utf8
116
127
  return output_type
117
128
 
118
129
 
119
- def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
120
- pivot_input: transform_schema.PivotInput,
121
- output_fields: List[input_schema.MinimalFieldInfo] = None,
122
- input_lf: pl.LazyFrame = None) -> List[FlowfileColumn]:
123
- index_columns_schema = [get_schema_of_column(node_input_schema, index_col) for index_col in
124
- pivot_input.index_columns]
130
+ def pre_calculate_pivot_schema(
131
+ node_input_schema: list[FlowfileColumn],
132
+ pivot_input: transform_schema.PivotInput,
133
+ output_fields: list[input_schema.MinimalFieldInfo] = None,
134
+ input_lf: pl.LazyFrame = None,
135
+ ) -> list[FlowfileColumn]:
136
+ index_columns_schema = [
137
+ get_schema_of_column(node_input_schema, index_col) for index_col in pivot_input.index_columns
138
+ ]
125
139
  val_column_schema = get_schema_of_column(node_input_schema, pivot_input.value_col)
126
140
  if output_fields is not None and len(output_fields) > 0:
127
- return index_columns_schema+[FlowfileColumn(PlType(column_name=output_field.name,
128
- pl_datatype=output_field.data_type)) for output_field in
129
- output_fields]
141
+ return index_columns_schema + [
142
+ FlowfileColumn(PlType(column_name=output_field.name, pl_datatype=output_field.data_type))
143
+ for output_field in output_fields
144
+ ]
130
145
 
131
146
  else:
132
147
  max_unique_vals = 200
133
- unique_vals = fetch_unique_values(input_lf.select(pivot_input.pivot_column)
134
- .unique()
135
- .sort(pivot_input.pivot_column)
136
- .limit(max_unique_vals).cast(pl.String))
148
+ unique_vals = fetch_unique_values(
149
+ input_lf.select(pivot_input.pivot_column)
150
+ .unique()
151
+ .sort(pivot_input.pivot_column)
152
+ .limit(max_unique_vals)
153
+ .cast(pl.String)
154
+ )
137
155
  if len(unique_vals) >= max_unique_vals:
138
- main_logger.warning('Pivot column has too many unique values. Please consider using a different column.'
139
- f' Max unique values: {max_unique_vals}')
156
+ main_logger.warning(
157
+ "Pivot column has too many unique values. Please consider using a different column."
158
+ f" Max unique values: {max_unique_vals}"
159
+ )
140
160
  pl_output_fields = []
141
161
  for val in unique_vals:
142
162
  if len(pivot_input.aggregations) == 1:
@@ -145,5 +165,5 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
145
165
  else:
146
166
  for agg in pivot_input.aggregations:
147
167
  output_type = get_output_data_type_pivot(val_column_schema, agg)
148
- pl_output_fields.append(PlType(column_name=f'{val}_{agg}', pl_datatype=output_type))
168
+ pl_output_fields.append(PlType(column_name=f"{val}_{agg}", pl_datatype=output_type))
149
169
  return index_columns_schema + [FlowfileColumn(pl_output_field) for pl_output_field in pl_output_fields]
@@ -1,2 +1 @@
1
-
2
1
  from flowfile_core.flowfile.setting_generator.settings import setting_generator, setting_updator
@@ -1,5 +1,6 @@
1
+ from collections.abc import Callable
2
+
1
3
  from flowfile_core.configs import logger
2
- from typing import Callable
3
4
 
4
5
 
5
6
  class SettingGenerator:
@@ -13,10 +14,10 @@ class SettingGenerator:
13
14
  setattr(self, f.__name__, f)
14
15
 
15
16
  def get_setting_generator(self, node_type: str) -> Callable:
16
- logger.info('getting setting generator for ' + node_type)
17
+ logger.info("getting setting generator for " + node_type)
17
18
 
18
19
  if node_type in self.setting_generator_set:
19
- logger.info('setting generator found')
20
+ logger.info("setting generator found")
20
21
  return getattr(self, node_type)
21
22
  else:
22
23
  return lambda x: x
@@ -33,9 +34,9 @@ class SettingUpdator:
33
34
  setattr(self, f.__name__, f)
34
35
 
35
36
  def get_setting_updator(self, node_type: str) -> Callable:
36
- logger.info('getting setting updator for ' + node_type)
37
+ logger.info("getting setting updator for " + node_type)
37
38
  if node_type in self.setting_updator_set:
38
- logger.info('setting updator found')
39
+ logger.info("setting updator found")
39
40
  return getattr(self, node_type)
40
41
  else:
41
42
  return lambda x: x
@@ -1,10 +1,11 @@
1
+ from collections.abc import Callable, Iterable
2
+ from functools import wraps
3
+
4
+ from pl_fuzzy_frame_match.models import FuzzyMapping
1
5
 
6
+ from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
2
7
  from flowfile_core.schemas import input_schema, transform_schema
3
- from typing import Callable, Iterable
4
- from functools import wraps
5
8
  from flowfile_core.schemas.output_model import NodeData
6
- from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
7
- from pl_fuzzy_frame_match.models import FuzzyMapping
8
9
 
9
10
  setting_generator = SettingGenerator()
10
11
  setting_updator = SettingUpdator()
@@ -16,6 +17,7 @@ def setting_generator_method(f: callable) -> Callable:
16
17
  if node_data.setting_input is None or isinstance(node_data.setting_input, input_schema.NodePromise):
17
18
  f(node_data)
18
19
  return node_data
20
+
19
21
  setting_generator.add_setting_generator_func(inner)
20
22
  return inner
21
23
 
@@ -38,18 +40,19 @@ def join(node_data: "NodeData") -> NodeData:
38
40
  if len(overlapping_cols) > 0:
39
41
  join_key = overlapping_cols[0]
40
42
  else:
41
- join_key = ''
43
+ join_key = ""
42
44
  join_input_manager = transform_schema.JoinInputManager(
43
- transform_schema.JoinInput(join_mapping=join_key,
44
- left_select=node_data.main_input.columns,
45
- right_select=node_data.right_input.columns
46
- )
45
+ transform_schema.JoinInput(
46
+ join_mapping=join_key,
47
+ left_select=node_data.main_input.columns,
48
+ right_select=node_data.right_input.columns,
49
+ )
47
50
  )
48
51
  join_input_manager.auto_rename()
49
52
  ji = join_input_manager.to_join_input()
50
- node_data.setting_input = input_schema.NodeJoin(flow_id=node_data.flow_id,
51
- node_id=node_data.node_id,
52
- join_input=ji)
53
+ node_data.setting_input = input_schema.NodeJoin(
54
+ flow_id=node_data.flow_id, node_id=node_data.node_id, join_input=ji
55
+ )
53
56
  return node_data
54
57
 
55
58
 
@@ -57,24 +60,31 @@ def join(node_data: "NodeData") -> NodeData:
57
60
  def cross_join(node_data: "NodeData") -> NodeData:
58
61
  if node_data.right_input and node_data.main_input:
59
62
  cj_input_manager = transform_schema.CrossJoinInputManager(
60
- transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
61
- right_select=node_data.right_input.columns)
63
+ transform_schema.CrossJoinInput(
64
+ left_select=node_data.main_input.columns, right_select=node_data.right_input.columns
65
+ )
62
66
  )
63
67
  cj_input_manager.auto_rename()
64
68
  cj = cj_input_manager.to_cross_join_input()
65
- node_data.setting_input = input_schema.NodeCrossJoin(flow_id=node_data.flow_id,
66
- node_id=node_data.node_id,
67
- cross_join_input=cj)
69
+ node_data.setting_input = input_schema.NodeCrossJoin(
70
+ flow_id=node_data.flow_id, node_id=node_data.node_id, cross_join_input=cj
71
+ )
68
72
  return node_data
69
73
 
70
74
 
71
75
  @setting_generator_method
72
76
  def filter(node_data: "NodeData") -> NodeData:
73
77
  if node_data.main_input:
74
- fi = transform_schema.FilterInput(basic_filter=transform_schema.BasicFilter(), filter_type='advanced')
75
- node_data.setting_input = input_schema.NodeFilter(flow_id=node_data.flow_id,
76
- node_id=node_data.node_id,
77
- filter_input=fi)
78
+ # Default to basic mode with an empty basic filter
79
+ basic_filter = transform_schema.BasicFilter(
80
+ field="",
81
+ operator=transform_schema.FilterOperator.EQUALS,
82
+ value="",
83
+ )
84
+ fi = transform_schema.FilterInput(basic_filter=basic_filter, mode="basic")
85
+ node_data.setting_input = input_schema.NodeFilter(
86
+ flow_id=node_data.flow_id, node_id=node_data.node_id, filter_input=fi
87
+ )
78
88
  return node_data
79
89
 
80
90
 
@@ -86,26 +96,26 @@ def join(node_data: NodeData):
86
96
  right_columns = set(node_data.right_input.columns)
87
97
  left_select = setting_input.join_input.left_select
88
98
  right_select = setting_input.join_input.right_select
99
+ # Update is_available based on whether column exists in input
89
100
  for ls in left_select.renames:
90
- if ls.old_name not in right_columns:
91
- left_select.remove_select_input(ls.old_name)
101
+ ls.is_available = ls.old_name in left_columns
92
102
  for rs in right_select.renames:
93
- if rs.old_name not in right_columns:
94
- right_select.remove_select_input(rs.old_name)
95
- existing_columns_right = set(r.old_name for r in right_select.renames if r.is_available)
96
- existing_columns_left = set(r.old_name for r in left_select.renames if r.is_available)
103
+ rs.is_available = rs.old_name in right_columns
104
+ # Check ALL columns in renames to prevent duplicates
105
+ existing_columns_left = set(r.old_name for r in left_select.renames)
106
+ existing_columns_right = set(r.old_name for r in right_select.renames)
97
107
  missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
98
108
  missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
99
- if not hasattr(setting_input, 'auto_keep_left'):
109
+ if not hasattr(setting_input, "auto_keep_left"):
100
110
  setting_input.auto_keep_left = False
101
- if not hasattr(setting_input, 'auto_keep_right'):
111
+ if not hasattr(setting_input, "auto_keep_right"):
102
112
  setting_input.auto_keep_right = False
103
113
  for milc in missing_incoming_left_columns:
104
114
  select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
105
- setting_input.join_input.add_new_select_column(select_input, 'left')
115
+ setting_input.join_input.add_new_select_column(select_input, "left")
106
116
  for mirc in missing_incoming_right_columns:
107
117
  select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
108
- setting_input.join_input.add_new_select_column(select_input, 'right')
118
+ setting_input.join_input.add_new_select_column(select_input, "right")
109
119
  return node_data
110
120
 
111
121
 
@@ -117,31 +127,32 @@ def cross_join(node_data: NodeData):
117
127
  right_columns = set(node_data.right_input.columns)
118
128
  left_select = setting_input.cross_join_input.left_select
119
129
  right_select = setting_input.cross_join_input.right_select
130
+ # Update is_available based on whether column exists in input
120
131
  for ls in left_select.renames:
121
- if ls.old_name not in right_columns:
122
- left_select.remove_select_input(ls.old_name)
132
+ ls.is_available = ls.old_name in left_columns
123
133
  for rs in right_select.renames:
124
- if rs.old_name not in right_columns:
125
- right_select.remove_select_input(rs.old_name)
126
- existing_columns_right = set(r.old_name for r in right_select.renames if r.is_available)
127
- existing_columns_left = set(r.old_name for r in left_select.renames if r.is_available)
134
+ rs.is_available = rs.old_name in right_columns
135
+ # Check ALL columns in renames to prevent duplicates
136
+ existing_columns_left = set(r.old_name for r in left_select.renames)
137
+ existing_columns_right = set(r.old_name for r in right_select.renames)
128
138
  missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
129
139
  missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
130
- if not hasattr(setting_input, 'auto_keep_left'):
140
+ if not hasattr(setting_input, "auto_keep_left"):
131
141
  setting_input.auto_keep_left = False
132
- if not hasattr(setting_input, 'auto_keep_right'):
142
+ if not hasattr(setting_input, "auto_keep_right"):
133
143
  setting_input.auto_keep_right = False
134
144
  for milc in missing_incoming_left_columns:
135
145
  select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
136
- setting_input.cross_join_input.add_new_select_column(select_input, 'left')
146
+ setting_input.cross_join_input.add_new_select_column(select_input, "left")
137
147
  for mirc in missing_incoming_right_columns:
138
148
  select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
139
- setting_input.cross_join_input.add_new_select_column(select_input, 'right')
149
+ setting_input.cross_join_input.add_new_select_column(select_input, "right")
140
150
  return node_data
141
151
 
142
152
 
143
- def check_if_fuzzy_match_is_valid(left_columns: Iterable[str], right_columns: Iterable[str],
144
- fuzzy_map: FuzzyMapping) -> bool:
153
+ def check_if_fuzzy_match_is_valid(
154
+ left_columns: Iterable[str], right_columns: Iterable[str], fuzzy_map: FuzzyMapping
155
+ ) -> bool:
145
156
  if fuzzy_map.left_col not in left_columns:
146
157
  return False
147
158
  if fuzzy_map.right_col not in right_columns:
@@ -159,24 +170,24 @@ def fuzzy_match(node_data: NodeData):
159
170
  right_select = setting_input.join_input.right_select
160
171
  for fuzzy_map in setting_input.join_input.join_mapping:
161
172
  fuzzy_map.valid = check_if_fuzzy_match_is_valid(left_columns, right_columns, fuzzy_map)
173
+ # Update is_available based on whether column exists in input
162
174
  for ls in left_select.renames:
163
- if ls.old_name not in right_columns:
164
- left_select.remove_select_input(ls.old_name)
175
+ ls.is_available = ls.old_name in left_columns
165
176
  for rs in right_select.renames:
166
- if rs.old_name not in right_columns:
167
- right_select.remove_select_input(rs.old_name)
168
- existing_columns_right = set(r.old_name for r in right_select.renames if r.is_available)
169
- existing_columns_left = set(r.old_name for r in left_select.renames if r.is_available)
177
+ rs.is_available = rs.old_name in right_columns
178
+ # Check ALL columns in renames to prevent duplicates
179
+ existing_columns_left = set(r.old_name for r in left_select.renames)
180
+ existing_columns_right = set(r.old_name for r in right_select.renames)
170
181
  missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
171
182
  missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
172
- if not hasattr(setting_input, 'auto_keep_left'):
183
+ if not hasattr(setting_input, "auto_keep_left"):
173
184
  setting_input.auto_keep_left = False
174
- if not hasattr(setting_input, 'auto_keep_right'):
185
+ if not hasattr(setting_input, "auto_keep_right"):
175
186
  setting_input.auto_keep_right = False
176
187
  for milc in missing_incoming_left_columns:
177
188
  select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
178
- setting_input.join_input.add_new_select_column(select_input, 'left')
189
+ setting_input.join_input.add_new_select_column(select_input, "left")
179
190
  for mirc in missing_incoming_right_columns:
180
191
  select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
181
- setting_input.join_input.add_new_select_column(select_input, 'right')
192
+ setting_input.join_input.add_new_select_column(select_input, "right")
182
193
  return node_data
@@ -1,31 +1,34 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Generator, Callable, List, Any, Optional, Dict
3
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
2
+ from collections.abc import Callable, Generator
3
+ from typing import Any
4
+
4
5
  import polars as pl
5
6
 
7
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
8
+
6
9
 
7
10
  class ExternalDataSource(ABC):
8
- schema: Optional[List[FlowfileColumn]]
9
- data_getter: Optional[Callable]
11
+ schema: list[FlowfileColumn] | None
12
+ data_getter: Callable | None
10
13
  is_collected: bool
11
14
  cache_store: Any
12
15
  _type: str
13
- initial_data_getter: Optional[Callable]
16
+ initial_data_getter: Callable | None
14
17
 
15
18
  @abstractmethod
16
19
  def __init__(self):
17
20
  pass
18
21
 
19
22
  @abstractmethod
20
- def get_initial_data(self) -> List[Dict[str, Any]]:
23
+ def get_initial_data(self) -> list[dict[str, Any]]:
21
24
  pass
22
25
 
23
26
  @abstractmethod
24
- def get_iter(self) -> Generator[Dict[str, Any], None, None]:
27
+ def get_iter(self) -> Generator[dict[str, Any], None, None]:
25
28
  pass
26
29
 
27
30
  @abstractmethod
28
- def get_sample(self, n: int = 10000) -> Generator[Dict[str, Any], None, None]:
31
+ def get_sample(self, n: int = 10000) -> Generator[dict[str, Any], None, None]:
29
32
  pass
30
33
 
31
34
  @abstractmethod
@@ -34,6 +37,5 @@ class ExternalDataSource(ABC):
34
37
 
35
38
  @staticmethod
36
39
  @abstractmethod
37
- def parse_schema(*args, **kwargs) -> List[FlowfileColumn]:
40
+ def parse_schema(*args, **kwargs) -> list[FlowfileColumn]:
38
41
  pass
39
-
@@ -1,16 +1,19 @@
1
- from typing import Any, Dict, Generator, List, Optional, Callable
1
+ from collections.abc import Callable, Generator
2
+ from typing import Any
3
+
4
+ import polars as pl
5
+
2
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
3
- from flowfile_core.schemas import input_schema
4
7
  from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
5
- import polars as pl
8
+ from flowfile_core.schemas import input_schema
6
9
 
7
10
 
8
11
  class CustomExternalSourceSettings:
9
12
  data_getter: Generator
10
- initial_data_getter: Optional[Callable] = None
11
- orientation: str = 'row'
13
+ initial_data_getter: Callable | None = None
14
+ orientation: str = "row"
12
15
 
13
- def __init__(self, data_getter: Generator, initial_data_getter: Optional[Callable] = None, orientation: str = 'row'):
16
+ def __init__(self, data_getter: Generator, initial_data_getter: Callable | None = None, orientation: str = "row"):
14
17
  self.data_getter = data_getter
15
18
  self.initial_data_getter = initial_data_getter
16
19
  self.orientation = orientation
@@ -18,15 +21,18 @@ class CustomExternalSourceSettings:
18
21
 
19
22
  class CustomExternalSource(ExternalDataSource):
20
23
  data_getter: Generator = None
21
- schema: Optional[List[FlowfileColumn]] = None
22
- cache_store: List = None
24
+ schema: list[FlowfileColumn] | None = None
25
+ cache_store: list = None
23
26
  is_collected: bool = False
24
27
 
25
- def __init__(self, data_getter: Generator[Any, None, None],
26
- initial_data_getter: Callable = None,
27
- orientation: str = 'row',
28
- schema: List = None,
29
- **kwargs):
28
+ def __init__(
29
+ self,
30
+ data_getter: Generator[Any, None, None],
31
+ initial_data_getter: Callable = None,
32
+ orientation: str = "row",
33
+ schema: list = None,
34
+ **kwargs,
35
+ ):
30
36
  self.cache_store = list()
31
37
  self.data_getter = data_getter
32
38
  self.collected = False
@@ -38,23 +44,27 @@ class CustomExternalSource(ExternalDataSource):
38
44
  else:
39
45
  self.schema = None
40
46
 
41
- if not initial_data_getter and orientation == 'row':
47
+ if not initial_data_getter and orientation == "row":
48
+
42
49
  def initial_data_getter():
43
50
  if len(self.cache_store) == 0:
44
51
  self.cache_store.append(next(data_getter, None))
45
52
  return self.cache_store
53
+
46
54
  self.initial_data_getter = initial_data_getter
47
55
  elif initial_data_getter:
48
56
  self.initial_data_getter = initial_data_getter
49
57
  elif self.schema:
58
+
50
59
  def initial_data_getter():
51
60
  return [{d.column_name: None for d in self.schema}]
61
+
52
62
  self.initial_data_getter = initial_data_getter
53
63
  else:
54
64
  self.initial_data_getter = None
55
65
 
56
66
  @staticmethod
57
- def parse_schema(schema: List[Any]) -> List[FlowfileColumn]:
67
+ def parse_schema(schema: list[Any]) -> list[FlowfileColumn]:
58
68
  if len(schema) == 0:
59
69
  return []
60
70
  first_col = schema[0]
@@ -63,7 +73,7 @@ class CustomExternalSource(ExternalDataSource):
63
73
  elif isinstance(first_col, (list, tuple)):
64
74
  return [FlowfileColumn.from_input(column_name=col[0], data_type=col[1]) for col in schema]
65
75
  elif isinstance(first_col, str):
66
- return [FlowfileColumn.from_input(column_name=col, data_type='varchar') for col in schema]
76
+ return [FlowfileColumn.from_input(column_name=col, data_type="varchar") for col in schema]
67
77
  elif isinstance(first_col, input_schema.MinimalFieldInfo):
68
78
  return [FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type) for col in schema]
69
79
  elif isinstance(first_col, FlowfileColumn):
@@ -76,7 +86,7 @@ class CustomExternalSource(ExternalDataSource):
76
86
  return self.initial_data_getter()
77
87
  return []
78
88
 
79
- def get_iter(self) -> Generator[Dict[str, Any], None, None]:
89
+ def get_iter(self) -> Generator[dict[str, Any], None, None]:
80
90
  if self.collected:
81
91
  return
82
92
  for data in self.cache_store:
@@ -1,10 +1,13 @@
1
- from typing import Dict, Any, Generator
1
+ from collections.abc import Generator
2
2
  from time import sleep
3
- from flowfile_core.schemas.input_schema import SampleUsers
3
+ from typing import Any
4
+
4
5
  import requests
5
6
 
7
+ from flowfile_core.schemas.input_schema import SampleUsers
6
8
 
7
- def getter(data: SampleUsers) -> Generator[Dict[str, Any], None, None]:
9
+
10
+ def getter(data: SampleUsers) -> Generator[dict[str, Any], None, None]:
8
11
  """
9
12
  Sample users generator function. This is a minimal example of a generator function that yields user data and can
10
13
  be used in a flowfile. The function simulates a delay to mimic the behavior of an external data source.
@@ -17,13 +20,10 @@ def getter(data: SampleUsers) -> Generator[Dict[str, Any], None, None]:
17
20
  index_pos = 0
18
21
  for i in range(data.size):
19
22
  sleep(0.01)
20
- headers = {
21
- 'x-api-key': 'reqres-free-v1'
22
- }
23
+ headers = {"x-api-key": "reqres-free-v1"}
23
24
 
24
25
  response = requests.get("https://reqres.in/api/users", headers=headers).json()
25
- for v in response['data']:
26
- v['index'] = index_pos
26
+ for v in response["data"]:
27
+ v["index"] = index_pos
27
28
  index_pos += 1
28
29
  yield v
29
-
@@ -16,4 +16,3 @@ def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource:
16
16
  return CustomExternalSource(**kwargs)
17
17
  else:
18
18
  raise ValueError(f"Unknown source type: {source_type}")
19
-