Flowfile 0.4.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. build_backends/main.py +25 -22
  2. build_backends/main_prd.py +10 -19
  3. flowfile/__init__.py +179 -73
  4. flowfile/__main__.py +10 -7
  5. flowfile/api.py +52 -59
  6. flowfile/web/__init__.py +14 -9
  7. flowfile/web/static/assets/AdminView-49392a9a.js +713 -0
  8. flowfile/web/static/assets/AdminView-f53bad23.css +129 -0
  9. flowfile/web/static/assets/CloudConnectionView-36bcd6df.css +72 -0
  10. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionView-f13f202b.js} +11 -11
  11. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-0023d4a5.js} +10 -8
  12. flowfile/web/static/assets/{CloudStorageReader-29d14fcc.css → CloudStorageReader-24c54524.css} +27 -27
  13. flowfile/web/static/assets/{CloudStorageWriter-b0ee067f.css → CloudStorageWriter-60547855.css} +26 -26
  14. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-8e781e11.js} +10 -8
  15. flowfile/web/static/assets/{ColumnSelector-47996a16.css → ColumnSelector-371637fb.css} +2 -2
  16. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-8ad68ea9.js} +3 -5
  17. flowfile/web/static/assets/{ContextMenu-c13f91d0.css → ContextMenu-26d4dd27.css} +6 -6
  18. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-31ee57f0.js} +3 -3
  19. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-69a74055.js} +3 -3
  20. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-8e2051c6.js} +3 -3
  21. flowfile/web/static/assets/{ContextMenu-4c74eef1.css → ContextMenu-8ec1729e.css} +6 -6
  22. flowfile/web/static/assets/{ContextMenu-63cfa99b.css → ContextMenu-9b310c60.css} +6 -6
  23. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-03df6938.js} +12 -10
  24. flowfile/web/static/assets/{CrossJoin-1119d18e.css → CrossJoin-71b4cc10.css} +20 -20
  25. flowfile/web/static/assets/CustomNode-59e99a86.css +32 -0
  26. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-8479239b.js} +36 -24
  27. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-869e3efd.js} +5 -4
  28. flowfile/web/static/assets/{DatabaseConnectionSettings-0c04b2e5.css → DatabaseConnectionSettings-e91df89a.css} +13 -13
  29. flowfile/web/static/assets/{DatabaseReader-ae61773c.css → DatabaseReader-36898a00.css} +24 -24
  30. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-c58b9552.js} +25 -15
  31. flowfile/web/static/assets/DatabaseView-6655afd6.css +57 -0
  32. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseView-d26a9140.js} +11 -11
  33. flowfile/web/static/assets/{DatabaseWriter-2f570e53.css → DatabaseWriter-217a99f1.css} +19 -19
  34. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-4d05ddc7.js} +17 -10
  35. flowfile/web/static/assets/{designer-e3c150ec.css → DesignerView-a6d0ee84.css} +629 -538
  36. flowfile/web/static/assets/{designer-f3656d8c.js → DesignerView-e6f5c0e8.js} +1214 -3209
  37. flowfile/web/static/assets/{documentation-52b241e7.js → DocumentationView-2e78ef1b.js} +5 -5
  38. flowfile/web/static/assets/{documentation-12216a74.css → DocumentationView-fd46c656.css} +7 -7
  39. flowfile/web/static/assets/{ExploreData-2d0cf4db.css → ExploreData-10c5acc8.css} +13 -12
  40. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-7b54caca.js} +18 -9
  41. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-3fa399b2.js} +9 -7
  42. flowfile/web/static/assets/{ExternalSource-e37b6275.css → ExternalSource-47ab05a3.css} +17 -17
  43. flowfile/web/static/assets/Filter-7494ea97.css +48 -0
  44. flowfile/web/static/assets/Filter-8cbbdbf3.js +287 -0
  45. flowfile/web/static/assets/{Formula-bb96803d.css → Formula-53d58c43.css} +7 -7
  46. flowfile/web/static/assets/{Formula-71472193.js → Formula-aac42b1e.js} +13 -11
  47. flowfile/web/static/assets/{FuzzyMatch-1010f966.css → FuzzyMatch-ad6361d6.css} +68 -69
  48. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-cd9bbfca.js} +12 -10
  49. flowfile/web/static/assets/{Pivot-cf333e3d.css → GraphSolver-c24dec17.css} +5 -5
  50. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-c7e6780e.js} +13 -11
  51. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-93c5d22b.js} +9 -7
  52. flowfile/web/static/assets/{GroupBy-b9505323.css → GroupBy-be7ac0bf.css} +10 -10
  53. flowfile/web/static/assets/{Join-fd79b451.css → Join-28b5e18f.css} +22 -22
  54. flowfile/web/static/assets/{Join-a1b800be.js → Join-a19b2de2.js} +13 -11
  55. flowfile/web/static/assets/LoginView-0df4ed0a.js +134 -0
  56. flowfile/web/static/assets/LoginView-d325d632.css +172 -0
  57. flowfile/web/static/assets/ManualInput-3702e677.css +293 -0
  58. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-8d3374b2.js} +170 -116
  59. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-ad1b6243.js} +2 -2
  60. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js} +1 -1
  61. flowfile/web/static/assets/NodeDesigner-40b647c9.js +2610 -0
  62. flowfile/web/static/assets/NodeDesigner-5f53be3f.css +1429 -0
  63. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-7100234c.js} +2 -2
  64. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js} +5 -2
  65. flowfile/web/static/assets/{Output-ddc9079f.css → Output-35e97000.css} +6 -6
  66. flowfile/web/static/assets/{Output-76750610.js → Output-f5efd2aa.js} +60 -38
  67. flowfile/web/static/assets/{GraphSolver-f0cb7bfb.css → Pivot-0eda81b4.css} +5 -5
  68. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-d981d23c.js} +11 -9
  69. flowfile/web/static/assets/PivotValidation-0e905b1a.css +13 -0
  70. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-39386e95.js} +3 -3
  71. flowfile/web/static/assets/PivotValidation-41b57ad6.css +13 -0
  72. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-63de1f73.js} +3 -3
  73. flowfile/web/static/assets/{PolarsCode-650322d1.css → PolarsCode-2b1f1f23.css} +4 -4
  74. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-f9d69217.js} +18 -9
  75. flowfile/web/static/assets/PopOver-b22f049e.js +939 -0
  76. flowfile/web/static/assets/PopOver-d96599db.css +33 -0
  77. flowfile/web/static/assets/{Read-6b17491f.css → Read-36e7bd51.css} +12 -12
  78. flowfile/web/static/assets/{Read-637b72a7.js → Read-aec2e377.js} +83 -105
  79. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-78ed6845.js} +6 -4
  80. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-2156e890.js} +8 -6
  81. flowfile/web/static/assets/{SQLQueryComponent-36cef432.css → SQLQueryComponent-1c2f26b4.css} +5 -5
  82. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-48c72f5b.js} +3 -3
  83. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-1352ca74.js} +6 -4
  84. flowfile/web/static/assets/SecretSelector-22b5ff89.js +113 -0
  85. flowfile/web/static/assets/SecretSelector-6329f743.css +43 -0
  86. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretsView-17df66ee.js} +35 -36
  87. flowfile/web/static/assets/SecretsView-aa291340.css +38 -0
  88. flowfile/web/static/assets/{Select-850215fd.js → Select-0aee4c54.js} +9 -7
  89. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-0784e157.js} +3 -3
  90. flowfile/web/static/assets/{SettingsSection-71e6b7e3.css → SettingsSection-07fbbc39.css} +4 -4
  91. flowfile/web/static/assets/{SettingsSection-5c696bee.css → SettingsSection-26fe48d4.css} +4 -4
  92. flowfile/web/static/assets/{SettingsSection-2e4d03c4.css → SettingsSection-8f980839.css} +4 -4
  93. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-cd341bb6.js} +3 -3
  94. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-f2002a6d.js} +3 -3
  95. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-460cc0ea.js} +2 -2
  96. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js} +1 -1
  97. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-5d926864.js} +7 -4
  98. flowfile/web/static/assets/SliderInput-f2e4f23c.css +4 -0
  99. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-3cdc971b.js} +9 -7
  100. flowfile/web/static/assets/{Unique-f9fb0809.css → Sort-8a871341.css} +10 -10
  101. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-a2d0bfbd.js} +2 -2
  102. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js} +5 -2
  103. flowfile/web/static/assets/{TextToRows-5d2c1190.css → TextToRows-12afb4f4.css} +10 -10
  104. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-918945f7.js} +11 -10
  105. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-f0ef5196.js} +2 -2
  106. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js} +1 -1
  107. flowfile/web/static/assets/{UnavailableFields-5edd5322.css → UnavailableFields-54d2f518.css} +6 -6
  108. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-bdad6144.js} +4 -4
  109. flowfile/web/static/assets/{Union-af6c3d9b.css → Union-d6a8d7d5.css} +7 -7
  110. flowfile/web/static/assets/{Union-b563478a.js → Union-e8ab8c86.js} +8 -6
  111. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-8cd4f976.js} +13 -22
  112. flowfile/web/static/assets/{Sort-3643d625.css → Unique-9fb2f567.css} +10 -10
  113. flowfile/web/static/assets/{Unpivot-1e422df3.css → Unpivot-710a2948.css} +7 -7
  114. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-8da14095.js} +10 -8
  115. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-6f7d89ff.js} +3 -3
  116. flowfile/web/static/assets/UnpivotValidation-d5ca3b7b.css +13 -0
  117. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-3fb312e1.js} +4 -4
  118. flowfile/web/static/assets/{VueGraphicWalker-ed5ab88b.css → VueGraphicWalker-430f0b86.css} +1 -1
  119. flowfile/web/static/assets/{api-4c8e3822.js → api-24483f0d.js} +1 -1
  120. flowfile/web/static/assets/{api-2d6adc4f.js → api-8b81fa73.js} +1 -1
  121. flowfile/web/static/assets/{dropDown-35135ba8.css → dropDown-3d8dc5fa.css} +40 -40
  122. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-ac0fda9d.js} +3 -3
  123. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-5497a84a.js} +11 -10
  124. flowfile/web/static/assets/{fullEditor-178376bb.css → fullEditor-a0be62b3.css} +74 -62
  125. flowfile/web/static/assets/{genericNodeSettings-924759c7.css → genericNodeSettings-3b2507ea.css} +10 -10
  126. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-99014e1d.js} +5 -5
  127. flowfile/web/static/assets/index-07dda503.js +38 -0
  128. flowfile/web/static/assets/index-3ba44389.js +2696 -0
  129. flowfile/web/static/assets/{index-50508d4d.css → index-e6289dd0.css} +1945 -569
  130. flowfile/web/static/assets/{index-246f201c.js → index-fb6493ae.js} +41626 -40869
  131. flowfile/web/static/assets/node.types-2c15bb7e.js +82 -0
  132. flowfile/web/static/assets/nodeInput-0eb13f1a.js +2 -0
  133. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-8f8ba42d.js} +3 -3
  134. flowfile/web/static/assets/outputCsv-b9a072af.css +2499 -0
  135. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-393f4fef.js} +3 -3
  136. flowfile/web/static/assets/{outputExcel-b41305c0.css → outputExcel-f5d272b2.css} +26 -26
  137. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-07c81f65.js} +4 -4
  138. flowfile/web/static/assets/outputParquet-54597c3c.css +4 -0
  139. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-07f6d9ad.js} +21 -20
  140. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-3bfac4c3.css} +15 -15
  141. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-3db6b763.css} +13 -13
  142. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-ed69bc8f.js} +10 -12
  143. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-c5244ad5.css} +4 -4
  144. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-e3ed4528.js} +4 -7
  145. flowfile/web/static/assets/secrets.api-002e7d7e.js +65 -0
  146. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-80b92899.js} +5 -5
  147. flowfile/web/static/assets/{selectDynamic-aa913ff4.css → selectDynamic-f2fb394f.css} +21 -20
  148. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-0965f39f.js} +31 -637
  149. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-c506ad97.js} +1 -1
  150. flowfile/web/static/index.html +2 -2
  151. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/METADATA +4 -4
  152. flowfile-0.5.3.dist-info/RECORD +402 -0
  153. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/WHEEL +1 -1
  154. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/entry_points.txt +1 -0
  155. flowfile_core/__init__.py +13 -3
  156. flowfile_core/auth/jwt.py +51 -16
  157. flowfile_core/auth/models.py +32 -7
  158. flowfile_core/auth/password.py +89 -0
  159. flowfile_core/auth/secrets.py +8 -6
  160. flowfile_core/configs/__init__.py +9 -7
  161. flowfile_core/configs/flow_logger.py +15 -14
  162. flowfile_core/configs/node_store/__init__.py +72 -4
  163. flowfile_core/configs/node_store/nodes.py +155 -172
  164. flowfile_core/configs/node_store/user_defined_node_registry.py +108 -27
  165. flowfile_core/configs/settings.py +28 -15
  166. flowfile_core/database/connection.py +7 -6
  167. flowfile_core/database/init_db.py +96 -2
  168. flowfile_core/database/models.py +3 -1
  169. flowfile_core/fileExplorer/__init__.py +17 -0
  170. flowfile_core/fileExplorer/funcs.py +123 -57
  171. flowfile_core/fileExplorer/utils.py +10 -11
  172. flowfile_core/flowfile/_extensions/real_time_interface.py +10 -8
  173. flowfile_core/flowfile/analytics/analytics_processor.py +27 -24
  174. flowfile_core/flowfile/analytics/graphic_walker.py +11 -12
  175. flowfile_core/flowfile/analytics/utils.py +1 -1
  176. flowfile_core/flowfile/code_generator/code_generator.py +391 -279
  177. flowfile_core/flowfile/connection_manager/_connection_manager.py +6 -5
  178. flowfile_core/flowfile/connection_manager/models.py +1 -1
  179. flowfile_core/flowfile/database_connection_manager/db_connections.py +60 -44
  180. flowfile_core/flowfile/database_connection_manager/models.py +1 -1
  181. flowfile_core/flowfile/extensions.py +17 -12
  182. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +34 -32
  183. flowfile_core/flowfile/flow_data_engine/create/funcs.py +152 -103
  184. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +526 -477
  185. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +2 -2
  186. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +92 -52
  187. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +12 -11
  188. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +6 -6
  189. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +26 -30
  190. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +43 -32
  191. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -1
  192. flowfile_core/flowfile/flow_data_engine/join/utils.py +11 -9
  193. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +15 -11
  194. flowfile_core/flowfile/flow_data_engine/pivot_table.py +5 -7
  195. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +95 -82
  196. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +66 -65
  197. flowfile_core/flowfile/flow_data_engine/sample_data.py +27 -21
  198. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -1
  199. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +13 -11
  200. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +360 -191
  201. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +8 -8
  202. flowfile_core/flowfile/flow_data_engine/utils.py +101 -67
  203. flowfile_core/flowfile/flow_graph.py +1011 -561
  204. flowfile_core/flowfile/flow_graph_utils.py +31 -49
  205. flowfile_core/flowfile/flow_node/flow_node.py +332 -232
  206. flowfile_core/flowfile/flow_node/models.py +54 -41
  207. flowfile_core/flowfile/flow_node/schema_callback.py +14 -19
  208. flowfile_core/flowfile/graph_tree/graph_tree.py +41 -41
  209. flowfile_core/flowfile/handler.py +82 -32
  210. flowfile_core/flowfile/manage/compatibility_enhancements.py +493 -47
  211. flowfile_core/flowfile/manage/io_flowfile.py +391 -0
  212. flowfile_core/flowfile/node_designer/__init__.py +15 -13
  213. flowfile_core/flowfile/node_designer/_type_registry.py +34 -37
  214. flowfile_core/flowfile/node_designer/custom_node.py +162 -36
  215. flowfile_core/flowfile/node_designer/ui_components.py +136 -35
  216. flowfile_core/flowfile/schema_callbacks.py +77 -54
  217. flowfile_core/flowfile/setting_generator/__init__.py +0 -1
  218. flowfile_core/flowfile/setting_generator/setting_generator.py +6 -5
  219. flowfile_core/flowfile/setting_generator/settings.py +72 -55
  220. flowfile_core/flowfile/sources/external_sources/base_class.py +12 -10
  221. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +27 -17
  222. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +9 -9
  223. flowfile_core/flowfile/sources/external_sources/factory.py +0 -1
  224. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +45 -31
  225. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +198 -73
  226. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +250 -196
  227. flowfile_core/flowfile/util/calculate_layout.py +9 -13
  228. flowfile_core/flowfile/util/execution_orderer.py +25 -17
  229. flowfile_core/flowfile/util/node_skipper.py +4 -4
  230. flowfile_core/flowfile/utils.py +19 -21
  231. flowfile_core/main.py +26 -19
  232. flowfile_core/routes/auth.py +284 -11
  233. flowfile_core/routes/cloud_connections.py +25 -25
  234. flowfile_core/routes/logs.py +21 -29
  235. flowfile_core/routes/public.py +3 -3
  236. flowfile_core/routes/routes.py +77 -43
  237. flowfile_core/routes/secrets.py +25 -27
  238. flowfile_core/routes/user_defined_components.py +483 -4
  239. flowfile_core/run_lock.py +0 -1
  240. flowfile_core/schemas/__init__.py +4 -6
  241. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +55 -55
  242. flowfile_core/schemas/cloud_storage_schemas.py +59 -55
  243. flowfile_core/schemas/input_schema.py +398 -154
  244. flowfile_core/schemas/output_model.py +50 -35
  245. flowfile_core/schemas/schemas.py +207 -67
  246. flowfile_core/schemas/transform_schema.py +1360 -435
  247. flowfile_core/schemas/yaml_types.py +117 -0
  248. flowfile_core/secret_manager/secret_manager.py +17 -13
  249. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +33 -3
  250. flowfile_core/utils/arrow_reader.py +7 -6
  251. flowfile_core/utils/excel_file_manager.py +3 -3
  252. flowfile_core/utils/fileManager.py +7 -7
  253. flowfile_core/utils/fl_executor.py +8 -10
  254. flowfile_core/utils/utils.py +4 -4
  255. flowfile_core/utils/validate_setup.py +5 -4
  256. flowfile_frame/__init__.py +107 -50
  257. flowfile_frame/adapters.py +2 -9
  258. flowfile_frame/adding_expr.py +73 -32
  259. flowfile_frame/cloud_storage/frame_helpers.py +27 -23
  260. flowfile_frame/cloud_storage/secret_manager.py +12 -26
  261. flowfile_frame/config.py +2 -5
  262. flowfile_frame/expr.py +311 -218
  263. flowfile_frame/expr.pyi +160 -159
  264. flowfile_frame/expr_name.py +23 -23
  265. flowfile_frame/flow_frame.py +581 -489
  266. flowfile_frame/flow_frame.pyi +123 -104
  267. flowfile_frame/flow_frame_methods.py +236 -252
  268. flowfile_frame/group_frame.py +50 -20
  269. flowfile_frame/join.py +2 -2
  270. flowfile_frame/lazy.py +129 -87
  271. flowfile_frame/lazy_methods.py +83 -30
  272. flowfile_frame/list_name_space.py +55 -50
  273. flowfile_frame/selectors.py +148 -68
  274. flowfile_frame/series.py +9 -7
  275. flowfile_frame/utils.py +19 -21
  276. flowfile_worker/__init__.py +12 -4
  277. flowfile_worker/configs.py +11 -19
  278. flowfile_worker/create/__init__.py +14 -27
  279. flowfile_worker/create/funcs.py +143 -94
  280. flowfile_worker/create/models.py +139 -68
  281. flowfile_worker/create/pl_types.py +14 -15
  282. flowfile_worker/create/read_excel_tables.py +34 -41
  283. flowfile_worker/create/utils.py +22 -19
  284. flowfile_worker/external_sources/s3_source/main.py +18 -51
  285. flowfile_worker/external_sources/s3_source/models.py +34 -27
  286. flowfile_worker/external_sources/sql_source/main.py +8 -5
  287. flowfile_worker/external_sources/sql_source/models.py +13 -9
  288. flowfile_worker/flow_logger.py +10 -8
  289. flowfile_worker/funcs.py +214 -155
  290. flowfile_worker/main.py +11 -17
  291. flowfile_worker/models.py +35 -28
  292. flowfile_worker/process_manager.py +2 -3
  293. flowfile_worker/routes.py +121 -93
  294. flowfile_worker/secrets.py +9 -6
  295. flowfile_worker/spawner.py +80 -49
  296. flowfile_worker/utils.py +3 -2
  297. shared/__init__.py +2 -7
  298. shared/storage_config.py +25 -13
  299. test_utils/postgres/commands.py +3 -2
  300. test_utils/postgres/fixtures.py +9 -9
  301. test_utils/s3/commands.py +1 -1
  302. test_utils/s3/data_generator.py +3 -4
  303. test_utils/s3/demo_data_generator.py +4 -7
  304. test_utils/s3/fixtures.py +7 -5
  305. tools/migrate/README.md +56 -0
  306. tools/migrate/__init__.py +12 -0
  307. tools/migrate/__main__.py +118 -0
  308. tools/migrate/legacy_schemas.py +682 -0
  309. tools/migrate/migrate.py +610 -0
  310. tools/migrate/tests/__init__.py +0 -0
  311. tools/migrate/tests/conftest.py +21 -0
  312. tools/migrate/tests/test_migrate.py +622 -0
  313. tools/migrate/tests/test_migration_e2e.py +1009 -0
  314. tools/migrate/tests/test_node_migrations.py +843 -0
  315. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +0 -86
  316. flowfile/web/static/assets/CustomNode-74a37f74.css +0 -32
  317. flowfile/web/static/assets/DatabaseManager-30fa27e5.css +0 -64
  318. flowfile/web/static/assets/Filter-812dcbca.js +0 -164
  319. flowfile/web/static/assets/Filter-f62091b3.css +0 -20
  320. flowfile/web/static/assets/ManualInput-3246a08d.css +0 -96
  321. flowfile/web/static/assets/PivotValidation-891ddfb0.css +0 -13
  322. flowfile/web/static/assets/PivotValidation-c46cd420.css +0 -13
  323. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +0 -4
  324. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +0 -13
  325. flowfile/web/static/assets/outputCsv-9cc59e0b.css +0 -2499
  326. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +0 -4
  327. flowfile/web/static/assets/secretApi-538058f3.js +0 -46
  328. flowfile/web/static/assets/vue-codemirror-bccfde04.css +0 -32
  329. flowfile-0.4.1.dist-info/RECORD +0 -376
  330. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  331. {flowfile-0.4.1.dist-info → flowfile-0.5.3.dist-info}/licenses/LICENSE +0 -0
  332. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,22 +1,17 @@
1
-
2
- from typing import List
3
-
4
- from polars import datatypes
5
1
  import polars as pl
6
-
7
2
  from pl_fuzzy_frame_match.output_column_name_utils import set_name_in_fuzzy_mappings
8
3
  from pl_fuzzy_frame_match.pre_process import rename_fuzzy_right_mapping
4
+ from polars import datatypes
9
5
 
10
- from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
11
6
  from flowfile_core.configs.flow_logger import main_logger
12
7
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
13
- from flowfile_core.schemas import transform_schema
14
- from flowfile_core.schemas import input_schema
8
+ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
9
+ from flowfile_core.schemas import input_schema, transform_schema
15
10
 
16
11
 
17
- def _ensure_all_columns_have_select(left_cols: List[str],
18
- right_cols: List[str],
19
- fuzzy_match_input: transform_schema.FuzzyMatchInput):
12
+ def _ensure_all_columns_have_select(
13
+ left_cols: list[str], right_cols: list[str], fuzzy_match_input: transform_schema.FuzzyMatchInputManager
14
+ ):
20
15
  """
21
16
  Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
22
17
  statements.
@@ -32,13 +27,16 @@ def _ensure_all_columns_have_select(left_cols: List[str],
32
27
  left_cols_in_select = {c.old_name for c in fuzzy_match_input.left_select.renames}
33
28
 
34
29
  fuzzy_match_input.left_select.renames.extend(
35
- [transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select])
30
+ [transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select]
31
+ )
36
32
  fuzzy_match_input.right_select.renames.extend(
37
33
  [transform_schema.SelectInput(col) for col in right_cols if col not in right_cols_in_select]
38
34
  )
39
35
 
40
36
 
41
- def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputs) -> None:
37
+ def _order_join_inputs_based_on_col_order(
38
+ col_order: list[str], join_inputs: transform_schema.JoinInputsManager
39
+ ) -> None:
42
40
  """
43
41
  Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
44
42
  This function modifies the join_inputs object in-place.
@@ -46,27 +44,35 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: tra
46
44
  Returns:
47
45
  None
48
46
  """
49
- select_map = {select.new_name: select for select in join_inputs.renames}
47
+ select_map = {select.old_name: select for select in join_inputs.renames}
50
48
  ordered_renames = [select_map[col] for col in col_order if col in select_map]
51
- join_inputs.renames = ordered_renames
52
-
53
-
54
- def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
55
- left_schema: List[FlowfileColumn],
56
- right_schema: List[FlowfileColumn]):
57
- _ensure_all_columns_have_select(left_cols=[col.column_name for col in left_schema],
58
- right_cols=[col.column_name for col in right_schema],
59
- fuzzy_match_input=fm_input)
60
- _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in left_schema],
61
- join_inputs=fm_input.left_select)
62
- _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
63
- join_inputs=fm_input.right_select)
49
+ join_inputs.select_inputs.renames = ordered_renames
50
+
51
+
52
+ def calculate_fuzzy_match_schema(
53
+ fm_input: transform_schema.FuzzyMatchInputManager,
54
+ left_schema: list[FlowfileColumn],
55
+ right_schema: list[FlowfileColumn],
56
+ ):
57
+ _ensure_all_columns_have_select(
58
+ left_cols=[col.column_name for col in left_schema],
59
+ right_cols=[col.column_name for col in right_schema],
60
+ fuzzy_match_input=fm_input,
61
+ )
62
+
63
+ _order_join_inputs_based_on_col_order(
64
+ col_order=[col.column_name for col in left_schema], join_inputs=fm_input.left_select
65
+ )
66
+ _order_join_inputs_based_on_col_order(
67
+ col_order=[col.column_name for col in right_schema], join_inputs=fm_input.right_select
68
+ )
64
69
  for column in fm_input.left_select.renames:
65
70
  if column.join_key:
66
71
  column.keep = True
67
72
  for column in fm_input.right_select.renames:
68
73
  if column.join_key:
69
74
  column.keep = True
75
+
70
76
  left_schema_dict, right_schema_dict = ({ls.name: ls for ls in left_schema}, {rs.name: rs for rs in right_schema})
71
77
  fm_input.auto_rename()
72
78
  right_renames = {column.old_name: column.new_name for column in fm_input.right_select.renames}
@@ -75,20 +81,27 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
75
81
  for column in fm_input.left_select.renames:
76
82
  column_schema = left_schema_dict.get(column.old_name)
77
83
  if column_schema and (column.keep or column.join_key):
78
- output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
79
- example_values=column_schema.example_values))
84
+ output_schema.append(
85
+ FlowfileColumn.from_input(
86
+ column.new_name, column_schema.data_type, example_values=column_schema.example_values
87
+ )
88
+ )
80
89
  for column in fm_input.right_select.renames:
81
90
  column_schema = right_schema_dict.get(column.old_name)
82
91
  if column_schema and (column.keep or column.join_key):
83
- output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
84
- example_values=column_schema.example_values))
92
+ output_schema.append(
93
+ FlowfileColumn.from_input(
94
+ column.new_name, column_schema.data_type, example_values=column_schema.example_values
95
+ )
96
+ )
85
97
  set_name_in_fuzzy_mappings(new_join_mapping)
86
- output_schema.extend([FlowfileColumn.from_input(fuzzy_mapping.output_column_name, 'Float64')
87
- for fuzzy_mapping in new_join_mapping])
98
+ output_schema.extend(
99
+ [FlowfileColumn.from_input(fuzzy_mapping.output_column_name, "Float64") for fuzzy_mapping in new_join_mapping]
100
+ )
88
101
  return output_schema
89
102
 
90
103
 
91
- def get_schema_of_column(node_input_schema: List[FlowfileColumn], col_name: str) -> FlowfileColumn|None:
104
+ def get_schema_of_column(node_input_schema: list[FlowfileColumn], col_name: str) -> FlowfileColumn | None:
92
105
  for s in node_input_schema:
93
106
  if s.name == col_name:
94
107
  return s
@@ -96,44 +109,54 @@ def get_schema_of_column(node_input_schema: List[FlowfileColumn], col_name: str)
96
109
 
97
110
  class InvalidSetup(ValueError):
98
111
  """Error raised when pivot column has too many unique values."""
112
+
99
113
  pass
100
114
 
101
115
 
102
116
  def get_output_data_type_pivot(schema: FlowfileColumn, agg_type: str) -> datatypes:
103
- if agg_type in ('count', 'n_unique'):
117
+ if agg_type in ("count", "n_unique"):
104
118
  output_type = datatypes.Float64 # count is always float
105
- elif schema.generic_datatype() == 'numeric':
119
+ elif schema.generic_datatype() == "numeric":
106
120
  output_type = datatypes.Float64
107
- elif schema.generic_datatype() == 'string':
121
+ elif schema.generic_datatype() == "string":
108
122
  output_type = datatypes.Utf8
109
- elif schema.generic_datatype() == 'date':
123
+ elif schema.generic_datatype() == "date":
110
124
  output_type = datatypes.Datetime
111
125
  else:
112
126
  output_type = datatypes.Utf8
113
127
  return output_type
114
128
 
115
129
 
116
- def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
117
- pivot_input: transform_schema.PivotInput,
118
- output_fields: List[input_schema.MinimalFieldInfo] = None,
119
- input_lf: pl.LazyFrame = None) -> List[FlowfileColumn]:
120
- index_columns_schema = [get_schema_of_column(node_input_schema, index_col) for index_col in
121
- pivot_input.index_columns]
130
+ def pre_calculate_pivot_schema(
131
+ node_input_schema: list[FlowfileColumn],
132
+ pivot_input: transform_schema.PivotInput,
133
+ output_fields: list[input_schema.MinimalFieldInfo] = None,
134
+ input_lf: pl.LazyFrame = None,
135
+ ) -> list[FlowfileColumn]:
136
+ index_columns_schema = [
137
+ get_schema_of_column(node_input_schema, index_col) for index_col in pivot_input.index_columns
138
+ ]
122
139
  val_column_schema = get_schema_of_column(node_input_schema, pivot_input.value_col)
123
140
  if output_fields is not None and len(output_fields) > 0:
124
- return index_columns_schema+[FlowfileColumn(PlType(column_name=output_field.name,
125
- pl_datatype=output_field.data_type)) for output_field in
126
- output_fields]
141
+ return index_columns_schema + [
142
+ FlowfileColumn(PlType(column_name=output_field.name, pl_datatype=output_field.data_type))
143
+ for output_field in output_fields
144
+ ]
127
145
 
128
146
  else:
129
147
  max_unique_vals = 200
130
- unique_vals = fetch_unique_values(input_lf.select(pivot_input.pivot_column)
131
- .unique()
132
- .sort(pivot_input.pivot_column)
133
- .limit(max_unique_vals).cast(pl.String))
148
+ unique_vals = fetch_unique_values(
149
+ input_lf.select(pivot_input.pivot_column)
150
+ .unique()
151
+ .sort(pivot_input.pivot_column)
152
+ .limit(max_unique_vals)
153
+ .cast(pl.String)
154
+ )
134
155
  if len(unique_vals) >= max_unique_vals:
135
- main_logger.warning('Pivot column has too many unique values. Please consider using a different column.'
136
- f' Max unique values: {max_unique_vals}')
156
+ main_logger.warning(
157
+ "Pivot column has too many unique values. Please consider using a different column."
158
+ f" Max unique values: {max_unique_vals}"
159
+ )
137
160
  pl_output_fields = []
138
161
  for val in unique_vals:
139
162
  if len(pivot_input.aggregations) == 1:
@@ -142,5 +165,5 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
142
165
  else:
143
166
  for agg in pivot_input.aggregations:
144
167
  output_type = get_output_data_type_pivot(val_column_schema, agg)
145
- pl_output_fields.append(PlType(column_name=f'{val}_{agg}', pl_datatype=output_type))
168
+ pl_output_fields.append(PlType(column_name=f"{val}_{agg}", pl_datatype=output_type))
146
169
  return index_columns_schema + [FlowfileColumn(pl_output_field) for pl_output_field in pl_output_fields]
@@ -1,2 +1 @@
1
-
2
1
  from flowfile_core.flowfile.setting_generator.settings import setting_generator, setting_updator
@@ -1,5 +1,6 @@
1
+ from collections.abc import Callable
2
+
1
3
  from flowfile_core.configs import logger
2
- from typing import Callable
3
4
 
4
5
 
5
6
  class SettingGenerator:
@@ -13,10 +14,10 @@ class SettingGenerator:
13
14
  setattr(self, f.__name__, f)
14
15
 
15
16
  def get_setting_generator(self, node_type: str) -> Callable:
16
- logger.info('getting setting generator for ' + node_type)
17
+ logger.info("getting setting generator for " + node_type)
17
18
 
18
19
  if node_type in self.setting_generator_set:
19
- logger.info('setting generator found')
20
+ logger.info("setting generator found")
20
21
  return getattr(self, node_type)
21
22
  else:
22
23
  return lambda x: x
@@ -33,9 +34,9 @@ class SettingUpdator:
33
34
  setattr(self, f.__name__, f)
34
35
 
35
36
  def get_setting_updator(self, node_type: str) -> Callable:
36
- logger.info('getting setting updator for ' + node_type)
37
+ logger.info("getting setting updator for " + node_type)
37
38
  if node_type in self.setting_updator_set:
38
- logger.info('setting updator found')
39
+ logger.info("setting updator found")
39
40
  return getattr(self, node_type)
40
41
  else:
41
42
  return lambda x: x
@@ -1,10 +1,11 @@
1
+ from collections.abc import Callable, Iterable
2
+ from functools import wraps
3
+
4
+ from pl_fuzzy_frame_match.models import FuzzyMapping
1
5
 
6
+ from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
2
7
  from flowfile_core.schemas import input_schema, transform_schema
3
- from typing import Callable, Iterable
4
- from functools import wraps
5
8
  from flowfile_core.schemas.output_model import NodeData
6
- from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
7
- from pl_fuzzy_frame_match.models import FuzzyMapping
8
9
 
9
10
  setting_generator = SettingGenerator()
10
11
  setting_updator = SettingUpdator()
@@ -16,6 +17,7 @@ def setting_generator_method(f: callable) -> Callable:
16
17
  if node_data.setting_input is None or isinstance(node_data.setting_input, input_schema.NodePromise):
17
18
  f(node_data)
18
19
  return node_data
20
+
19
21
  setting_generator.add_setting_generator_func(inner)
20
22
  return inner
21
23
 
@@ -38,37 +40,51 @@ def join(node_data: "NodeData") -> NodeData:
38
40
  if len(overlapping_cols) > 0:
39
41
  join_key = overlapping_cols[0]
40
42
  else:
41
- join_key = ''
42
- ji = transform_schema.JoinInput(join_mapping=join_key,
43
- left_select=node_data.main_input.columns,
44
- right_select=node_data.right_input.columns
45
- )
46
- ji.auto_rename()
47
- node_data.setting_input = input_schema.NodeJoin(flow_id=node_data.flow_id,
48
- node_id=node_data.node_id,
49
- join_input=ji)
43
+ join_key = ""
44
+ join_input_manager = transform_schema.JoinInputManager(
45
+ transform_schema.JoinInput(
46
+ join_mapping=join_key,
47
+ left_select=node_data.main_input.columns,
48
+ right_select=node_data.right_input.columns,
49
+ )
50
+ )
51
+ join_input_manager.auto_rename()
52
+ ji = join_input_manager.to_join_input()
53
+ node_data.setting_input = input_schema.NodeJoin(
54
+ flow_id=node_data.flow_id, node_id=node_data.node_id, join_input=ji
55
+ )
50
56
  return node_data
51
57
 
52
58
 
53
59
  @setting_generator_method
54
60
  def cross_join(node_data: "NodeData") -> NodeData:
55
61
  if node_data.right_input and node_data.main_input:
56
- ji = transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
57
- right_select=node_data.right_input.columns)
58
- ji.auto_rename()
59
- node_data.setting_input = input_schema.NodeCrossJoin(flow_id=node_data.flow_id,
60
- node_id=node_data.node_id,
61
- cross_join_input=ji)
62
+ cj_input_manager = transform_schema.CrossJoinInputManager(
63
+ transform_schema.CrossJoinInput(
64
+ left_select=node_data.main_input.columns, right_select=node_data.right_input.columns
65
+ )
66
+ )
67
+ cj_input_manager.auto_rename()
68
+ cj = cj_input_manager.to_cross_join_input()
69
+ node_data.setting_input = input_schema.NodeCrossJoin(
70
+ flow_id=node_data.flow_id, node_id=node_data.node_id, cross_join_input=cj
71
+ )
62
72
  return node_data
63
73
 
64
74
 
65
75
  @setting_generator_method
66
76
  def filter(node_data: "NodeData") -> NodeData:
67
77
  if node_data.main_input:
68
- fi = transform_schema.FilterInput(basic_filter=transform_schema.BasicFilter(), filter_type='advanced')
69
- node_data.setting_input = input_schema.NodeFilter(flow_id=node_data.flow_id,
70
- node_id=node_data.node_id,
71
- filter_input=fi)
78
+ # Default to basic mode with an empty basic filter
79
+ basic_filter = transform_schema.BasicFilter(
80
+ field="",
81
+ operator=transform_schema.FilterOperator.EQUALS,
82
+ value="",
83
+ )
84
+ fi = transform_schema.FilterInput(basic_filter=basic_filter, mode="basic")
85
+ node_data.setting_input = input_schema.NodeFilter(
86
+ flow_id=node_data.flow_id, node_id=node_data.node_id, filter_input=fi
87
+ )
72
88
  return node_data
73
89
 
74
90
 
@@ -80,26 +96,26 @@ def join(node_data: NodeData):
80
96
  right_columns = set(node_data.right_input.columns)
81
97
  left_select = setting_input.join_input.left_select
82
98
  right_select = setting_input.join_input.right_select
99
+ # Update is_available based on whether column exists in input
83
100
  for ls in left_select.renames:
84
- if ls.old_name not in right_columns:
85
- left_select.remove_select_input(ls.old_name)
101
+ ls.is_available = ls.old_name in left_columns
86
102
  for rs in right_select.renames:
87
- if rs.old_name not in right_columns:
88
- right_select.remove_select_input(rs.old_name)
89
- existing_columns_right = set(r.old_name for r in right_select.renames if r.is_available)
90
- existing_columns_left = set(r.old_name for r in left_select.renames if r.is_available)
103
+ rs.is_available = rs.old_name in right_columns
104
+ # Check ALL columns in renames to prevent duplicates
105
+ existing_columns_left = set(r.old_name for r in left_select.renames)
106
+ existing_columns_right = set(r.old_name for r in right_select.renames)
91
107
  missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
92
108
  missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
93
- if not hasattr(setting_input, 'auto_keep_left'):
109
+ if not hasattr(setting_input, "auto_keep_left"):
94
110
  setting_input.auto_keep_left = False
95
- if not hasattr(setting_input, 'auto_keep_right'):
111
+ if not hasattr(setting_input, "auto_keep_right"):
96
112
  setting_input.auto_keep_right = False
97
113
  for milc in missing_incoming_left_columns:
98
114
  select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
99
- setting_input.join_input.add_new_select_column(select_input, 'left')
115
+ setting_input.join_input.add_new_select_column(select_input, "left")
100
116
  for mirc in missing_incoming_right_columns:
101
117
  select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
102
- setting_input.join_input.add_new_select_column(select_input, 'right')
118
+ setting_input.join_input.add_new_select_column(select_input, "right")
103
119
  return node_data
104
120
 
105
121
 
@@ -111,31 +127,32 @@ def cross_join(node_data: NodeData):
111
127
  right_columns = set(node_data.right_input.columns)
112
128
  left_select = setting_input.cross_join_input.left_select
113
129
  right_select = setting_input.cross_join_input.right_select
130
+ # Update is_available based on whether column exists in input
114
131
  for ls in left_select.renames:
115
- if ls.old_name not in right_columns:
116
- left_select.remove_select_input(ls.old_name)
132
+ ls.is_available = ls.old_name in left_columns
117
133
  for rs in right_select.renames:
118
- if rs.old_name not in right_columns:
119
- right_select.remove_select_input(rs.old_name)
120
- existing_columns_right = set(r.old_name for r in right_select.renames if r.is_available)
121
- existing_columns_left = set(r.old_name for r in left_select.renames if r.is_available)
134
+ rs.is_available = rs.old_name in right_columns
135
+ # Check ALL columns in renames to prevent duplicates
136
+ existing_columns_left = set(r.old_name for r in left_select.renames)
137
+ existing_columns_right = set(r.old_name for r in right_select.renames)
122
138
  missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
123
139
  missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
124
- if not hasattr(setting_input, 'auto_keep_left'):
140
+ if not hasattr(setting_input, "auto_keep_left"):
125
141
  setting_input.auto_keep_left = False
126
- if not hasattr(setting_input, 'auto_keep_right'):
142
+ if not hasattr(setting_input, "auto_keep_right"):
127
143
  setting_input.auto_keep_right = False
128
144
  for milc in missing_incoming_left_columns:
129
145
  select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
130
- setting_input.cross_join_input.add_new_select_column(select_input, 'left')
146
+ setting_input.cross_join_input.add_new_select_column(select_input, "left")
131
147
  for mirc in missing_incoming_right_columns:
132
148
  select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
133
- setting_input.cross_join_input.add_new_select_column(select_input, 'right')
149
+ setting_input.cross_join_input.add_new_select_column(select_input, "right")
134
150
  return node_data
135
151
 
136
152
 
137
- def check_if_fuzzy_match_is_valid(left_columns: Iterable[str], right_columns: Iterable[str],
138
- fuzzy_map: FuzzyMapping) -> bool:
153
+ def check_if_fuzzy_match_is_valid(
154
+ left_columns: Iterable[str], right_columns: Iterable[str], fuzzy_map: FuzzyMapping
155
+ ) -> bool:
139
156
  if fuzzy_map.left_col not in left_columns:
140
157
  return False
141
158
  if fuzzy_map.right_col not in right_columns:
@@ -153,24 +170,24 @@ def fuzzy_match(node_data: NodeData):
153
170
  right_select = setting_input.join_input.right_select
154
171
  for fuzzy_map in setting_input.join_input.join_mapping:
155
172
  fuzzy_map.valid = check_if_fuzzy_match_is_valid(left_columns, right_columns, fuzzy_map)
173
+ # Update is_available based on whether column exists in input
156
174
  for ls in left_select.renames:
157
- if ls.old_name not in right_columns:
158
- left_select.remove_select_input(ls.old_name)
175
+ ls.is_available = ls.old_name in left_columns
159
176
  for rs in right_select.renames:
160
- if rs.old_name not in right_columns:
161
- right_select.remove_select_input(rs.old_name)
162
- existing_columns_right = set(r.old_name for r in right_select.renames if r.is_available)
163
- existing_columns_left = set(r.old_name for r in left_select.renames if r.is_available)
177
+ rs.is_available = rs.old_name in right_columns
178
+ # Check ALL columns in renames to prevent duplicates
179
+ existing_columns_left = set(r.old_name for r in left_select.renames)
180
+ existing_columns_right = set(r.old_name for r in right_select.renames)
164
181
  missing_incoming_left_columns = [ilc for ilc in left_columns if ilc not in existing_columns_left]
165
182
  missing_incoming_right_columns = [irc for irc in right_columns if irc not in existing_columns_right]
166
- if not hasattr(setting_input, 'auto_keep_left'):
183
+ if not hasattr(setting_input, "auto_keep_left"):
167
184
  setting_input.auto_keep_left = False
168
- if not hasattr(setting_input, 'auto_keep_right'):
185
+ if not hasattr(setting_input, "auto_keep_right"):
169
186
  setting_input.auto_keep_right = False
170
187
  for milc in missing_incoming_left_columns:
171
188
  select_input = transform_schema.SelectInput(old_name=milc, keep=setting_input.auto_keep_left)
172
- setting_input.join_input.add_new_select_column(select_input, 'left')
189
+ setting_input.join_input.add_new_select_column(select_input, "left")
173
190
  for mirc in missing_incoming_right_columns:
174
191
  select_input = transform_schema.SelectInput(old_name=mirc, keep=setting_input.auto_keep_right)
175
- setting_input.join_input.add_new_select_column(select_input, 'right')
192
+ setting_input.join_input.add_new_select_column(select_input, "right")
176
193
  return node_data
@@ -1,31 +1,34 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Generator, Callable, List, Any, Optional, Dict
3
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
2
+ from collections.abc import Callable, Generator
3
+ from typing import Any
4
+
4
5
  import polars as pl
5
6
 
7
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
8
+
6
9
 
7
10
  class ExternalDataSource(ABC):
8
- schema: Optional[List[FlowfileColumn]]
9
- data_getter: Optional[Callable]
11
+ schema: list[FlowfileColumn] | None
12
+ data_getter: Callable | None
10
13
  is_collected: bool
11
14
  cache_store: Any
12
15
  _type: str
13
- initial_data_getter: Optional[Callable]
16
+ initial_data_getter: Callable | None
14
17
 
15
18
  @abstractmethod
16
19
  def __init__(self):
17
20
  pass
18
21
 
19
22
  @abstractmethod
20
- def get_initial_data(self) -> List[Dict[str, Any]]:
23
+ def get_initial_data(self) -> list[dict[str, Any]]:
21
24
  pass
22
25
 
23
26
  @abstractmethod
24
- def get_iter(self) -> Generator[Dict[str, Any], None, None]:
27
+ def get_iter(self) -> Generator[dict[str, Any], None, None]:
25
28
  pass
26
29
 
27
30
  @abstractmethod
28
- def get_sample(self, n: int = 10000) -> Generator[Dict[str, Any], None, None]:
31
+ def get_sample(self, n: int = 10000) -> Generator[dict[str, Any], None, None]:
29
32
  pass
30
33
 
31
34
  @abstractmethod
@@ -34,6 +37,5 @@ class ExternalDataSource(ABC):
34
37
 
35
38
  @staticmethod
36
39
  @abstractmethod
37
- def parse_schema(*args, **kwargs) -> List[FlowfileColumn]:
40
+ def parse_schema(*args, **kwargs) -> list[FlowfileColumn]:
38
41
  pass
39
-
@@ -1,16 +1,19 @@
1
- from typing import Any, Dict, Generator, List, Optional, Callable
1
+ from collections.abc import Callable, Generator
2
+ from typing import Any
3
+
4
+ import polars as pl
5
+
2
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
3
- from flowfile_core.schemas import input_schema
4
7
  from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
5
- import polars as pl
8
+ from flowfile_core.schemas import input_schema
6
9
 
7
10
 
8
11
  class CustomExternalSourceSettings:
9
12
  data_getter: Generator
10
- initial_data_getter: Optional[Callable] = None
11
- orientation: str = 'row'
13
+ initial_data_getter: Callable | None = None
14
+ orientation: str = "row"
12
15
 
13
- def __init__(self, data_getter: Generator, initial_data_getter: Optional[Callable] = None, orientation: str = 'row'):
16
+ def __init__(self, data_getter: Generator, initial_data_getter: Callable | None = None, orientation: str = "row"):
14
17
  self.data_getter = data_getter
15
18
  self.initial_data_getter = initial_data_getter
16
19
  self.orientation = orientation
@@ -18,15 +21,18 @@ class CustomExternalSourceSettings:
18
21
 
19
22
  class CustomExternalSource(ExternalDataSource):
20
23
  data_getter: Generator = None
21
- schema: Optional[List[FlowfileColumn]] = None
22
- cache_store: List = None
24
+ schema: list[FlowfileColumn] | None = None
25
+ cache_store: list = None
23
26
  is_collected: bool = False
24
27
 
25
- def __init__(self, data_getter: Generator[Any, None, None],
26
- initial_data_getter: Callable = None,
27
- orientation: str = 'row',
28
- schema: List = None,
29
- **kwargs):
28
+ def __init__(
29
+ self,
30
+ data_getter: Generator[Any, None, None],
31
+ initial_data_getter: Callable = None,
32
+ orientation: str = "row",
33
+ schema: list = None,
34
+ **kwargs,
35
+ ):
30
36
  self.cache_store = list()
31
37
  self.data_getter = data_getter
32
38
  self.collected = False
@@ -38,23 +44,27 @@ class CustomExternalSource(ExternalDataSource):
38
44
  else:
39
45
  self.schema = None
40
46
 
41
- if not initial_data_getter and orientation == 'row':
47
+ if not initial_data_getter and orientation == "row":
48
+
42
49
  def initial_data_getter():
43
50
  if len(self.cache_store) == 0:
44
51
  self.cache_store.append(next(data_getter, None))
45
52
  return self.cache_store
53
+
46
54
  self.initial_data_getter = initial_data_getter
47
55
  elif initial_data_getter:
48
56
  self.initial_data_getter = initial_data_getter
49
57
  elif self.schema:
58
+
50
59
  def initial_data_getter():
51
60
  return [{d.column_name: None for d in self.schema}]
61
+
52
62
  self.initial_data_getter = initial_data_getter
53
63
  else:
54
64
  self.initial_data_getter = None
55
65
 
56
66
  @staticmethod
57
- def parse_schema(schema: List[Any]) -> List[FlowfileColumn]:
67
+ def parse_schema(schema: list[Any]) -> list[FlowfileColumn]:
58
68
  if len(schema) == 0:
59
69
  return []
60
70
  first_col = schema[0]
@@ -63,7 +73,7 @@ class CustomExternalSource(ExternalDataSource):
63
73
  elif isinstance(first_col, (list, tuple)):
64
74
  return [FlowfileColumn.from_input(column_name=col[0], data_type=col[1]) for col in schema]
65
75
  elif isinstance(first_col, str):
66
- return [FlowfileColumn.from_input(column_name=col, data_type='varchar') for col in schema]
76
+ return [FlowfileColumn.from_input(column_name=col, data_type="varchar") for col in schema]
67
77
  elif isinstance(first_col, input_schema.MinimalFieldInfo):
68
78
  return [FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type) for col in schema]
69
79
  elif isinstance(first_col, FlowfileColumn):
@@ -76,7 +86,7 @@ class CustomExternalSource(ExternalDataSource):
76
86
  return self.initial_data_getter()
77
87
  return []
78
88
 
79
- def get_iter(self) -> Generator[Dict[str, Any], None, None]:
89
+ def get_iter(self) -> Generator[dict[str, Any], None, None]:
80
90
  if self.collected:
81
91
  return
82
92
  for data in self.cache_store:
@@ -1,10 +1,13 @@
1
- from typing import Dict, Any, Generator
1
+ from collections.abc import Generator
2
2
  from time import sleep
3
- from flowfile_core.schemas.input_schema import SampleUsers
3
+ from typing import Any
4
+
4
5
  import requests
5
6
 
7
+ from flowfile_core.schemas.input_schema import SampleUsers
6
8
 
7
- def getter(data: SampleUsers) -> Generator[Dict[str, Any], None, None]:
9
+
10
+ def getter(data: SampleUsers) -> Generator[dict[str, Any], None, None]:
8
11
  """
9
12
  Sample users generator function. This is a minimal example of a generator function that yields user data and can
10
13
  be used in a flowfile. The function simulates a delay to mimic the behavior of an external data source.
@@ -17,13 +20,10 @@ def getter(data: SampleUsers) -> Generator[Dict[str, Any], None, None]:
17
20
  index_pos = 0
18
21
  for i in range(data.size):
19
22
  sleep(0.01)
20
- headers = {
21
- 'x-api-key': 'reqres-free-v1'
22
- }
23
+ headers = {"x-api-key": "reqres-free-v1"}
23
24
 
24
25
  response = requests.get("https://reqres.in/api/users", headers=headers).json()
25
- for v in response['data']:
26
- v['index'] = index_pos
26
+ for v in response["data"]:
27
+ v["index"] = index_pos
27
28
  index_pos += 1
28
29
  yield v
29
-
@@ -16,4 +16,3 @@ def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource:
16
16
  return CustomExternalSource(**kwargs)
17
17
  else:
18
18
  raise ValueError(f"Unknown source type: {source_type}")
19
-