Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. flowfile/__init__.py +8 -1
  2. flowfile/api.py +1 -3
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
  8. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  9. flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  22. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
  30. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
  43. flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
  45. flowfile/web/static/assets/Output-283fe388.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
  47. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
  48. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  49. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  50. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
  52. flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
  54. flowfile/web/static/assets/Read-64a3f259.js +218 -0
  55. flowfile/web/static/assets/Read-e808b239.css +62 -0
  56. flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  65. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  66. flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
  67. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
  68. flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
  71. flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
  75. flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
  82. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  83. flowfile/web/static/assets/Union-bfe9b996.js +77 -0
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
  93. flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
  94. flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
  95. flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
  102. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  103. flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
  104. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  105. flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
  106. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  107. flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
  108. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  109. flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
  110. flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
  111. flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
  112. flowfile/web/static/assets/readExcel-806d2826.css +64 -0
  113. flowfile/web/static/assets/readParquet-48c81530.css +19 -0
  114. flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
  115. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
  116. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
  117. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  118. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  119. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
  120. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
  121. flowfile/web/static/index.html +2 -2
  122. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
  123. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
  124. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  125. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  126. flowfile_core/__init__.py +3 -0
  127. flowfile_core/configs/flow_logger.py +5 -13
  128. flowfile_core/configs/node_store/__init__.py +30 -0
  129. flowfile_core/configs/node_store/nodes.py +383 -99
  130. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  131. flowfile_core/configs/settings.py +2 -1
  132. flowfile_core/database/connection.py +5 -21
  133. flowfile_core/fileExplorer/funcs.py +239 -121
  134. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  135. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  136. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  137. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  138. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  139. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  140. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  141. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  142. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  143. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  144. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
  145. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  146. flowfile_core/flowfile/flow_graph.py +240 -54
  147. flowfile_core/flowfile/flow_node/flow_node.py +48 -13
  148. flowfile_core/flowfile/flow_node/models.py +2 -1
  149. flowfile_core/flowfile/handler.py +24 -5
  150. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  151. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  152. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  153. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  154. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  155. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  156. flowfile_core/flowfile/schema_callbacks.py +17 -10
  157. flowfile_core/flowfile/setting_generator/settings.py +15 -10
  158. flowfile_core/main.py +5 -1
  159. flowfile_core/routes/routes.py +73 -30
  160. flowfile_core/routes/user_defined_components.py +55 -0
  161. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  162. flowfile_core/schemas/input_schema.py +228 -65
  163. flowfile_core/schemas/output_model.py +5 -2
  164. flowfile_core/schemas/schemas.py +153 -35
  165. flowfile_core/schemas/transform_schema.py +1083 -412
  166. flowfile_core/schemas/yaml_types.py +103 -0
  167. flowfile_core/types.py +156 -0
  168. flowfile_core/utils/validate_setup.py +3 -1
  169. flowfile_frame/__init__.py +3 -1
  170. flowfile_frame/flow_frame.py +31 -24
  171. flowfile_frame/flow_frame_methods.py +12 -9
  172. flowfile_worker/__init__.py +9 -35
  173. flowfile_worker/create/__init__.py +3 -21
  174. flowfile_worker/create/funcs.py +68 -56
  175. flowfile_worker/create/models.py +130 -62
  176. flowfile_worker/main.py +5 -2
  177. flowfile_worker/routes.py +52 -13
  178. shared/__init__.py +15 -0
  179. shared/storage_config.py +258 -0
  180. tools/migrate/README.md +56 -0
  181. tools/migrate/__init__.py +12 -0
  182. tools/migrate/__main__.py +131 -0
  183. tools/migrate/legacy_schemas.py +621 -0
  184. tools/migrate/migrate.py +598 -0
  185. tools/migrate/tests/__init__.py +0 -0
  186. tools/migrate/tests/conftest.py +23 -0
  187. tools/migrate/tests/test_migrate.py +627 -0
  188. tools/migrate/tests/test_migration_e2e.py +1010 -0
  189. tools/migrate/tests/test_node_migrations.py +813 -0
  190. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  191. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  192. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  193. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  194. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  195. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  196. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  197. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  198. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  199. flowfile_core/flowfile/manage/open_flowfile.py +0 -135
  200. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
  201. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -67,7 +67,7 @@ from flowfile_core.flowfile.sources.external_sources.base_class import ExternalD
67
67
  T = TypeVar('T', pl.DataFrame, pl.LazyFrame)
68
68
 
69
69
 
70
- def _handle_duplication_join_keys(left_df: T, right_df: T, join_input: transform_schemas.JoinInput) -> Tuple[T, T, Dict[str, str]]:
70
+ def _handle_duplication_join_keys(left_df: T, right_df: T, join_manager: transform_schemas.JoinInputManager) -> Tuple[T, T, Dict[str, str]]:
71
71
  """Temporarily renames join keys to avoid conflicts during a join.
72
72
 
73
73
  This helper function checks the join type and renames the join key columns
@@ -86,20 +86,22 @@ def _handle_duplication_join_keys(left_df: T, right_df: T, join_input: transform
86
86
  - The (potentially modified) right DataFrame.
87
87
  - A dictionary mapping the temporary names back to their desired final names.
88
88
  """
89
+
89
90
  def _construct_temp_name(column_name: str) -> str:
90
91
  return "__FL_TEMP__"+column_name
91
- if join_input.how == 'right':
92
+
93
+ if join_manager.how == 'right':
92
94
  left_df = left_df.with_columns(pl.col(jk.new_name).alias(_construct_temp_name(jk.new_name))
93
- for jk in join_input.left_select.join_key_selects)
95
+ for jk in join_manager.left_manager.get_join_key_selects())
94
96
  reverse_actions = {
95
97
  _construct_temp_name(jk.new_name): transform_schemas.construct_join_key_name("left", jk.new_name)
96
- for jk in join_input.left_select.join_key_selects}
97
- elif join_input.how in ('left', 'inner'):
98
+ for jk in join_manager.left_manager.get_join_key_selects()}
99
+ elif join_manager.how in ('left', 'inner'):
98
100
  right_df = right_df.with_columns(pl.col(jk.new_name).alias(_construct_temp_name(jk.new_name))
99
- for jk in join_input.right_select.join_key_selects)
101
+ for jk in join_manager.right_manager.get_join_key_selects())
100
102
  reverse_actions = {
101
103
  _construct_temp_name(jk.new_name): transform_schemas.construct_join_key_name("right", jk.new_name)
102
- for jk in join_input.right_select.join_key_selects}
104
+ for jk in join_manager.right_manager.get_join_key_selects()}
103
105
  else:
104
106
  reverse_actions = {}
105
107
  return left_df, right_df, reverse_actions
@@ -193,7 +195,6 @@ class FlowDataEngine:
193
195
  _number_of_records_callback: Callable = None
194
196
  _data_callback: Callable = None
195
197
 
196
-
197
198
  def __init__(self,
198
199
  raw_data: Union[List[Dict], List[Any], Dict[str, Any], 'ParquetFile', pl.DataFrame, pl.LazyFrame, input_schema.RawData] = None,
199
200
  path_ref: str = None,
@@ -1147,7 +1148,7 @@ class FlowDataEngine:
1147
1148
  return cls(df, schema=schema, calculate_schema_stats=False, number_of_records=0)
1148
1149
 
1149
1150
  @classmethod
1150
- def create_from_path(cls, received_table: input_schema.ReceivedTableBase) -> "FlowDataEngine":
1151
+ def create_from_path(cls, received_table: input_schema.ReceivedTable) -> "FlowDataEngine":
1151
1152
  """Creates a FlowDataEngine from a local file path.
1152
1153
 
1153
1154
  Supports various file types like CSV, Parquet, and Excel.
@@ -1579,7 +1580,6 @@ class FlowDataEngine:
1579
1580
  A new `FlowDataEngine` instance containing the sampled data.
1580
1581
  """
1581
1582
  logging.info(f'Getting sample of {n_rows} rows')
1582
-
1583
1583
  if random:
1584
1584
  if self.lazy and self.external_source is not None:
1585
1585
  self.collect_external()
@@ -1657,9 +1657,12 @@ class FlowDataEngine:
1657
1657
  An `ExternalFuzzyMatchFetcher` object that can be used to track the
1658
1658
  progress and retrieve the result of the fuzzy join.
1659
1659
  """
1660
- left_df, right_df = prepare_for_fuzzy_match(left=self, right=other, fuzzy_match_input=fuzzy_match_input)
1660
+ fuzzy_match_input_manager = transform_schemas.FuzzyMatchInputManager(fuzzy_match_input)
1661
+ left_df, right_df = prepare_for_fuzzy_match(left=self, right=other,
1662
+ fuzzy_match_input_manager=fuzzy_match_input_manager)
1663
+
1661
1664
  return ExternalFuzzyMatchFetcher(left_df, right_df,
1662
- fuzzy_maps=fuzzy_match_input.fuzzy_maps,
1665
+ fuzzy_maps=fuzzy_match_input_manager.fuzzy_maps,
1663
1666
  file_ref=file_ref + '_fm',
1664
1667
  wait_on_completion=False,
1665
1668
  flow_id=flow_id,
@@ -1674,10 +1677,12 @@ class FlowDataEngine:
1674
1677
  ):
1675
1678
  if file_ref is None:
1676
1679
  file_ref = str(id(self)) + '_' + str(id(other))
1680
+ fuzzy_match_input_manager = transform_schemas.FuzzyMatchInputManager(fuzzy_match_input)
1677
1681
 
1678
- left_df, right_df = prepare_for_fuzzy_match(left=self, right=other, fuzzy_match_input=fuzzy_match_input)
1682
+ left_df, right_df = prepare_for_fuzzy_match(left=self, right=other,
1683
+ fuzzy_match_input_manager=fuzzy_match_input_manager)
1679
1684
  external_tracker = ExternalFuzzyMatchFetcher(left_df, right_df,
1680
- fuzzy_maps=fuzzy_match_input.fuzzy_maps,
1685
+ fuzzy_maps=fuzzy_match_input_manager.fuzzy_maps,
1681
1686
  file_ref=file_ref + '_fm',
1682
1687
  wait_on_completion=False,
1683
1688
  flow_id=flow_id,
@@ -1687,8 +1692,10 @@ class FlowDataEngine:
1687
1692
  def fuzzy_join(self, fuzzy_match_input: transform_schemas.FuzzyMatchInput,
1688
1693
  other: "FlowDataEngine",
1689
1694
  node_logger: NodeLogger = None) -> "FlowDataEngine":
1690
- left_df, right_df = prepare_for_fuzzy_match(left=self, right=other, fuzzy_match_input=fuzzy_match_input)
1691
- fuzzy_mappings = [FuzzyMapping(**fm.__dict__) for fm in fuzzy_match_input.fuzzy_maps]
1695
+ fuzzy_match_input_manager = transform_schemas.FuzzyMatchInputManager(fuzzy_match_input)
1696
+ left_df, right_df = prepare_for_fuzzy_match(left=self, right=other,
1697
+ fuzzy_match_input_manager=fuzzy_match_input_manager)
1698
+ fuzzy_mappings = [FuzzyMapping(**fm.__dict__) for fm in fuzzy_match_input_manager.fuzzy_maps]
1692
1699
  return FlowDataEngine(fuzzy_match_dfs(left_df, right_df, fuzzy_maps=fuzzy_mappings,
1693
1700
  logger=node_logger.logger if node_logger else logger)
1694
1701
  .lazy())
@@ -1713,24 +1720,22 @@ class FlowDataEngine:
1713
1720
  Exception: If `verify_integrity` is True and the join would result in
1714
1721
  an excessively large number of records.
1715
1722
  """
1716
-
1717
1723
  self.lazy = True
1718
-
1719
1724
  other.lazy = True
1720
-
1721
- verify_join_select_integrity(cross_join_input, left_columns=self.columns, right_columns=other.columns)
1722
- right_select = [v.old_name for v in cross_join_input.right_select.renames
1725
+ cross_join_input_manager = transform_schemas.CrossJoinInputManager(cross_join_input)
1726
+ verify_join_select_integrity(cross_join_input_manager.input, left_columns=self.columns, right_columns=other.columns)
1727
+ right_select = [v.old_name for v in cross_join_input_manager.right_select.renames
1723
1728
  if (v.keep or v.join_key) and v.is_available]
1724
- left_select = [v.old_name for v in cross_join_input.left_select.renames
1729
+ left_select = [v.old_name for v in cross_join_input_manager.left_select.renames
1725
1730
  if (v.keep or v.join_key) and v.is_available]
1726
-
1727
- left = self.data_frame.select(left_select).rename(cross_join_input.left_select.rename_table)
1728
- right = other.data_frame.select(right_select).rename(cross_join_input.right_select.rename_table)
1731
+ cross_join_input_manager.auto_rename(rename_mode="suffix")
1732
+ left = self.data_frame.select(left_select).rename(cross_join_input_manager.left_select.rename_table)
1733
+ right = other.data_frame.select(right_select).rename(cross_join_input_manager.right_select.rename_table)
1729
1734
 
1730
1735
  joined_df = left.join(right, how='cross')
1731
1736
 
1732
1737
  cols_to_delete_after = [col.new_name for col in
1733
- cross_join_input.left_select.renames + cross_join_input.left_select.renames
1738
+ cross_join_input_manager.left_select.renames + cross_join_input_manager.left_select.renames
1734
1739
  if col.join_key and not col.keep and col.is_available]
1735
1740
 
1736
1741
  fl = FlowDataEngine(joined_df.drop(cols_to_delete_after), calculate_schema_stats=False, streamable=False)
@@ -1738,76 +1743,60 @@ class FlowDataEngine:
1738
1743
 
1739
1744
  def join(self, join_input: transform_schemas.JoinInput, auto_generate_selection: bool,
1740
1745
  verify_integrity: bool, other: "FlowDataEngine") -> "FlowDataEngine":
1741
- """Performs a standard SQL-style join with another DataFrame.
1746
+ """Performs a standard SQL-style join with another DataFrame."""
1747
+ # Create manager from input
1748
+ join_manager = transform_schemas.JoinInputManager(join_input)
1749
+ ensure_right_unselect_for_semi_and_anti_joins(join_manager.input)
1750
+ for jk in join_manager.join_mapping:
1751
+ if jk.left_col not in {c.old_name for c in join_manager.left_select.renames}:
1752
+ join_manager.left_select.append(transform_schemas.SelectInput(jk.left_col, keep=False))
1753
+ if jk.right_col not in {c.old_name for c in join_manager.right_select.renames}:
1754
+ join_manager.right_select.append(transform_schemas.SelectInput(jk.right_col, keep=False))
1755
+ verify_join_select_integrity(join_manager.input, left_columns=self.columns, right_columns=other.columns)
1756
+ if not verify_join_map_integrity(join_manager.input, left_columns=self.schema, right_columns=other.schema):
1757
+ raise Exception('Join is not valid by the data fields')
1742
1758
 
1743
- Supports various join types like 'inner', 'left', 'right', 'outer', 'semi', and 'anti'.
1759
+ if auto_generate_selection:
1760
+ join_manager.auto_rename()
1744
1761
 
1745
- Args:
1746
- join_input: A `JoinInput` object defining the join keys, join type,
1747
- and column selections.
1748
- auto_generate_selection: If True, automatically handles column renaming.
1749
- verify_integrity: If True, performs checks to prevent excessively large joins.
1750
- other: The right `FlowDataEngine` to join with.
1762
+ # Use manager properties throughout
1763
+ left = self.data_frame.select(join_manager.left_manager.get_select_cols()).rename(join_manager.left_manager.get_rename_table())
1764
+ right = other.data_frame.select(join_manager.right_manager.get_select_cols()).rename(join_manager.right_manager.get_rename_table())
1751
1765
 
1752
- Returns:
1753
- A new `FlowDataEngine` with the joined data.
1754
-
1755
- Raises:
1756
- Exception: If the join configuration is invalid or if `verify_integrity`
1757
- is True and the join is predicted to be too large.
1758
- """
1759
- ensure_right_unselect_for_semi_and_anti_joins(join_input)
1760
- verify_join_select_integrity(join_input, left_columns=self.columns, right_columns=other.columns)
1761
- if not verify_join_map_integrity(join_input, left_columns=self.schema, right_columns=other.schema):
1762
- raise Exception('Join is not valid by the data fields')
1763
- if auto_generate_selection:
1764
- join_input.auto_rename()
1765
- left = self.data_frame.select(get_select_columns(join_input.left_select.renames)).rename(join_input.left_select.rename_table)
1766
- right = other.data_frame.select(get_select_columns(join_input.right_select.renames)).rename(join_input.right_select.rename_table)
1767
- if verify_integrity and join_input.how != 'right':
1768
- n_records = get_join_count(left, right, left_on_keys=join_input.left_join_keys,
1769
- right_on_keys=join_input.right_join_keys, how=join_input.how)
1770
- if n_records > 1_000_000_000:
1771
- raise Exception("Join will result in too many records, ending process")
1772
- else:
1773
- n_records = -1
1774
- left, right, reverse_join_key_mapping = _handle_duplication_join_keys(left, right, join_input)
1775
- left, right = rename_df_table_for_join(left, right, join_input.get_join_key_renames())
1776
- if join_input.how == 'right':
1766
+ left, right, reverse_join_key_mapping = _handle_duplication_join_keys(left, right, join_manager)
1767
+ left, right = rename_df_table_for_join(left, right, join_manager.get_join_key_renames())
1768
+ if join_manager.how == 'right':
1777
1769
  joined_df = right.join(
1778
1770
  other=left,
1779
- left_on=join_input.right_join_keys,
1780
- right_on=join_input.left_join_keys,
1771
+ left_on=join_manager.right_join_keys,
1772
+ right_on=join_manager.left_join_keys,
1781
1773
  how="left",
1782
1774
  suffix="").rename(reverse_join_key_mapping)
1783
1775
  else:
1784
1776
  joined_df = left.join(
1785
1777
  other=right,
1786
- left_on=join_input.left_join_keys,
1787
- right_on=join_input.right_join_keys,
1788
- how=join_input.how,
1778
+ left_on=join_manager.left_join_keys,
1779
+ right_on=join_manager.right_join_keys,
1780
+ how=join_manager.how,
1789
1781
  suffix="").rename(reverse_join_key_mapping)
1790
- left_cols_to_delete_after = [get_col_name_to_delete(col, 'left') for col in join_input.left_select.renames
1791
- if not col.keep
1792
- and col.is_available and col.join_key
1793
- ]
1794
- right_cols_to_delete_after = [get_col_name_to_delete(col, 'right') for col in join_input.right_select.renames
1795
- if not col.keep
1796
- and col.is_available and col.join_key
1797
- and join_input.how in ("left", "right", "inner", "cross", "outer")
1798
- ]
1782
+
1783
+ left_cols_to_delete_after = [get_col_name_to_delete(col, 'left')
1784
+ for col in join_manager.input.left_select.renames
1785
+ if not col.keep and col.is_available and col.join_key]
1786
+
1787
+ right_cols_to_delete_after = [get_col_name_to_delete(col, 'right')
1788
+ for col in join_manager.input.right_select.renames
1789
+ if not col.keep and col.is_available and col.join_key
1790
+ and join_manager.how in ("left", "right", "inner", "cross", "outer")]
1791
+
1799
1792
  if len(right_cols_to_delete_after + left_cols_to_delete_after) > 0:
1800
1793
  joined_df = joined_df.drop(left_cols_to_delete_after + right_cols_to_delete_after)
1801
- undo_join_key_remapping = get_undo_rename_mapping_join(join_input)
1794
+
1795
+ undo_join_key_remapping = get_undo_rename_mapping_join(join_manager)
1802
1796
  joined_df = joined_df.rename(undo_join_key_remapping)
1803
1797
 
1804
- if verify_integrity:
1805
- return FlowDataEngine(joined_df, calculate_schema_stats=True,
1806
- number_of_records=n_records, streamable=False)
1807
- else:
1808
- fl = FlowDataEngine(joined_df, calculate_schema_stats=False,
1809
- number_of_records=0, streamable=False)
1810
- return fl
1798
+ return FlowDataEngine(joined_df, calculate_schema_stats=False,
1799
+ number_of_records=0, streamable=False)
1811
1800
 
1812
1801
  def solve_graph(self, graph_solver_input: transform_schemas.GraphSolverInput) -> "FlowDataEngine":
1813
1802
  """Solves a graph problem represented by 'from' and 'to' columns.
@@ -2105,7 +2094,7 @@ class FlowDataEngine:
2105
2094
  A new `FlowDataEngine` instance with the applied formula.
2106
2095
  """
2107
2096
  expr = to_expr(func)
2108
- if output_data_type not in (None, "Auto"):
2097
+ if output_data_type not in (None, transform_schemas.AUTO_DATA_TYPE):
2109
2098
  df = self.data_frame.with_columns(expr.cast(output_data_type).alias(col_name))
2110
2099
  else:
2111
2100
  df = self.data_frame.with_columns(expr.alias(col_name))
@@ -2134,8 +2123,8 @@ class FlowDataEngine:
2134
2123
  data_type=output_fs.file_type,
2135
2124
  path=output_fs.abs_file_path,
2136
2125
  write_mode=output_fs.write_mode,
2137
- sheet_name=output_fs.output_excel_table.sheet_name,
2138
- delimiter=output_fs.output_csv_table.delimiter,
2126
+ sheet_name=output_fs.sheet_name,
2127
+ delimiter=output_fs.delimiter,
2139
2128
  flow_id=flow_id,
2140
2129
  node_id=node_id
2141
2130
  )
@@ -2149,8 +2138,8 @@ class FlowDataEngine:
2149
2138
  data_type=output_fs.file_type,
2150
2139
  path=output_fs.abs_file_path,
2151
2140
  write_mode=output_fs.write_mode,
2152
- sheet_name=output_fs.output_excel_table.sheet_name,
2153
- delimiter=output_fs.output_csv_table.delimiter,
2141
+ sheet_name=output_fs.sheet_name,
2142
+ delimiter=output_fs.delimiter,
2154
2143
  flow_id=flow_id,
2155
2144
  node_id=node_id,
2156
2145
  )
@@ -2239,6 +2228,7 @@ class FlowDataEngine:
2239
2228
  def _calculate_schema(self) -> List[Dict]:
2240
2229
  """Calculates schema statistics."""
2241
2230
  if self.external_source is not None:
2231
+
2242
2232
  self.collect_external()
2243
2233
  v = utils.calculate_schema(self.data_frame)
2244
2234
  return v
@@ -2256,6 +2246,7 @@ class FlowDataEngine:
2256
2246
  def create_from_path_worker(cls, received_table: input_schema.ReceivedTable, flow_id: int, node_id: int | str):
2257
2247
  """Creates a FlowDataEngine from a path in a worker process."""
2258
2248
  received_table.set_absolute_filepath()
2249
+
2259
2250
  external_fetcher = ExternalCreateFetcher(received_table=received_table,
2260
2251
  file_type=received_table.file_type, flow_id=flow_id, node_id=node_id)
2261
2252
  return cls(external_fetcher.get_result())
@@ -0,0 +1,4 @@
1
+ from typing import Literal
2
+
3
+ DataTypeGroup = Literal['numeric', 'str', 'date']
4
+ ReadableDataTypeGroup = Literal['Numeric', 'String', 'Date', 'Other', 'Boolean', 'Binary', 'Complex']
@@ -1,44 +1,13 @@
1
1
 
2
2
  from dataclasses import dataclass
3
- from typing import Optional, Any, List, Dict, Literal, Iterable
3
+ from typing import Optional, Any, List, Dict, Iterable
4
4
 
5
5
  from flowfile_core.schemas import input_schema
6
6
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
7
7
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
8
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.interface import ReadableDataTypeGroup, DataTypeGroup
9
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.type_registry import convert_pl_type_to_string
8
10
  import polars as pl
9
- # TODO: rename flow_file_column to flowfile_column
10
- DataTypeGroup = Literal['numeric', 'str', 'date']
11
-
12
-
13
- def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
14
- if isinstance(pl_type, pl.List):
15
- inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
16
- return f"pl.List({inner_str})"
17
- elif isinstance(pl_type, pl.Array):
18
- inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
19
- return f"pl.Array({inner_str})"
20
- elif isinstance(pl_type, pl.Decimal):
21
- precision = pl_type.precision if hasattr(pl_type, 'precision') else None
22
- scale = pl_type.scale if hasattr(pl_type, 'scale') else None
23
- if precision is not None and scale is not None:
24
- return f"pl.Decimal({precision}, {scale})"
25
- elif precision is not None:
26
- return f"pl.Decimal({precision})"
27
- else:
28
- return "pl.Decimal()"
29
- elif isinstance(pl_type, pl.Struct):
30
- # Handle Struct with field definitions
31
- fields = []
32
- if hasattr(pl_type, 'fields'):
33
- for field in pl_type.fields:
34
- field_name = field.name
35
- field_type = convert_pl_type_to_string(field.dtype, inner=True)
36
- fields.append(f'pl.Field("{field_name}", {field_type})')
37
- field_str = ", ".join(fields)
38
- return f"pl.Struct([{field_str}])"
39
- else:
40
- # For base types, we want the full pl.TypeName format
41
- return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
42
11
 
43
12
 
44
13
  @dataclass
@@ -52,6 +21,7 @@ class FlowfileColumn:
52
21
  number_of_empty_values: int
53
22
  number_of_unique_values: int
54
23
  example_values: str
24
+ data_type_group: ReadableDataTypeGroup
55
25
  __sql_type: Optional[Any]
56
26
  __is_unique: Optional[bool]
57
27
  __nullable: Optional[bool]
@@ -75,6 +45,7 @@ class FlowfileColumn:
75
45
  self.__is_unique = None
76
46
  self.__sql_type = None
77
47
  self.__perc_unique = None
48
+ self.data_type_group = self.get_readable_datatype_group()
78
49
 
79
50
  def __repr__(self):
80
51
  """
@@ -220,6 +191,20 @@ class FlowfileColumn:
220
191
  return 'numeric'
221
192
  elif self.data_type in ('datetime', 'date', 'Date', 'Datetime', 'Time'):
222
193
  return 'date'
194
+ else:
195
+ return 'str'
196
+
197
+ def get_readable_datatype_group(self) -> ReadableDataTypeGroup:
198
+ if self.data_type in ('Utf8', 'VARCHAR', 'CHAR', 'NVARCHAR', 'String'):
199
+ return 'String'
200
+ elif self.data_type in ('fixed_decimal', 'decimal', 'float', 'integer', 'boolean', 'double', 'Int16', 'Int32',
201
+ 'Int64', 'Float32', 'Float64', 'Decimal', 'Binary', 'Boolean', 'Uint8', 'Uint16',
202
+ 'Uint32', 'Uint64'):
203
+ return 'Numeric'
204
+ elif self.data_type in ('datetime', 'date', 'Date', 'Datetime', 'Time'):
205
+ return 'Date'
206
+ else:
207
+ return 'Other'
223
208
 
224
209
  def get_polars_type(self) -> PlType:
225
210
  pl_datatype = cast_str_to_polars_type(self.data_type)
@@ -0,0 +1,36 @@
1
+
2
+ from typing import Type, Literal, List, Dict, Union, Tuple
3
+ import polars as pl
4
+ DataTypeGroup = Literal['numeric', 'string', 'datetime', 'boolean', 'binary', 'complex', 'unknown']
5
+
6
+
7
+ def convert_pl_type_to_string(pl_type: pl.DataType, inner: bool = False) -> str:
8
+ if isinstance(pl_type, pl.List):
9
+ inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
10
+ return f"pl.List({inner_str})"
11
+ elif isinstance(pl_type, pl.Array):
12
+ inner_str = convert_pl_type_to_string(pl_type.inner, inner=True)
13
+ return f"pl.Array({inner_str})"
14
+ elif isinstance(pl_type, pl.Decimal):
15
+ precision = pl_type.precision if hasattr(pl_type, 'precision') else None
16
+ scale = pl_type.scale if hasattr(pl_type, 'scale') else None
17
+ if precision is not None and scale is not None:
18
+ return f"pl.Decimal({precision}, {scale})"
19
+ elif precision is not None:
20
+ return f"pl.Decimal({precision})"
21
+ else:
22
+ return "pl.Decimal()"
23
+ elif isinstance(pl_type, pl.Struct):
24
+ # Handle Struct with field definitions
25
+ fields = []
26
+ if hasattr(pl_type, 'fields'):
27
+ for field in pl_type.fields:
28
+ field_name = field.name
29
+ field_type = convert_pl_type_to_string(field.dtype, inner=True)
30
+ fields.append(f'pl.Field("{field_name}", {field_type})')
31
+ field_str = ", ".join(fields)
32
+ return f"pl.Struct([{field_str}])"
33
+ else:
34
+ # For base types, we want the full pl.TypeName format
35
+ return str(pl_type.base_type()) if not inner else f"pl.{pl_type}"
36
+
@@ -1,4 +1,4 @@
1
- from flowfile_core.schemas.transform_schema import FuzzyMatchInput, SelectInput, JoinInputs
1
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInput, SelectInput, JoinInputs, FuzzyMatchInputManager
2
2
  from flowfile_core.flowfile.flow_data_engine.join import verify_join_select_integrity, verify_join_map_integrity
3
3
  import polars as pl
4
4
  from typing import TYPE_CHECKING, Tuple, List
@@ -15,37 +15,37 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: Joi
15
15
  Returns:
16
16
  None
17
17
  """
18
- select_map = {select.new_name: select for select in join_inputs.renames}
18
+ select_map = {select.old_name: select for select in join_inputs.renames}
19
19
  ordered_renames = [select_map[col] for col in col_order if col in select_map]
20
20
  join_inputs.renames = ordered_renames
21
21
 
22
22
 
23
23
  def _ensure_all_columns_have_select(left: "FlowDataEngine",
24
24
  right: "FlowDataEngine",
25
- fuzzy_match_input: FuzzyMatchInput):
25
+ fuzzy_match_input_manager: FuzzyMatchInputManager):
26
26
  """
27
27
  Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
28
28
  statements.
29
29
  Args:
30
- left (FlowDataEngine):
31
- right (FlowDataEngine):
32
- fuzzy_match_input ():
30
+ left (FlowDataEngine): Left FlowDataEngine
31
+ right (FlowDataEngine): Right FlowDataEngine
32
+ fuzzy_match_input_manager (FuzzyMatchInputManager): Fuzzy match input manager
33
33
 
34
34
  Returns:
35
35
  None
36
36
  """
37
- right_cols_in_select = {c.old_name for c in fuzzy_match_input.right_select.renames}
38
- left_cols_in_select = {c.old_name for c in fuzzy_match_input.left_select.renames}
37
+ right_cols_in_select = {c.old_name for c in fuzzy_match_input_manager.right_select.renames}
38
+ left_cols_in_select = {c.old_name for c in fuzzy_match_input_manager.left_select.renames}
39
39
 
40
- fuzzy_match_input.left_select.renames.extend(
40
+ fuzzy_match_input_manager.left_select.renames.extend(
41
41
  [SelectInput(col) for col in left.columns if col not in left_cols_in_select])
42
- fuzzy_match_input.right_select.renames.extend(
42
+ fuzzy_match_input_manager.right_select.renames.extend(
43
43
  [SelectInput(col) for col in right.columns if col not in right_cols_in_select]
44
44
  )
45
45
 
46
46
 
47
47
  def prepare_for_fuzzy_match(left: "FlowDataEngine", right: "FlowDataEngine",
48
- fuzzy_match_input: FuzzyMatchInput) -> Tuple[pl.LazyFrame, pl.LazyFrame]:
48
+ fuzzy_match_input_manager: FuzzyMatchInputManager) -> Tuple[pl.LazyFrame, pl.LazyFrame]:
49
49
  """
50
50
  Prepare two FlowDataEngines for fuzzy matching.
51
51
 
@@ -58,22 +58,22 @@ def prepare_for_fuzzy_match(left: "FlowDataEngine", right: "FlowDataEngine",
58
58
  """
59
59
  left.lazy = True
60
60
  right.lazy = True
61
- _ensure_all_columns_have_select(left, right, fuzzy_match_input)
62
- _order_join_inputs_based_on_col_order(left.columns, fuzzy_match_input.left_select)
63
- _order_join_inputs_based_on_col_order(right.columns, fuzzy_match_input.right_select)
64
-
65
- verify_join_select_integrity(fuzzy_match_input, left_columns=left.columns, right_columns=right.columns)
66
- if not verify_join_map_integrity(fuzzy_match_input, left_columns=left.schema, right_columns=right.schema):
61
+ _ensure_all_columns_have_select(left, right, fuzzy_match_input_manager)
62
+ _order_join_inputs_based_on_col_order(left.columns, fuzzy_match_input_manager.left_select.join_inputs)
63
+ _order_join_inputs_based_on_col_order(right.columns, fuzzy_match_input_manager.right_select.join_inputs)
64
+ verify_join_select_integrity(fuzzy_match_input_manager.fuzzy_input, left_columns=left.columns, right_columns=right.columns)
65
+ if not verify_join_map_integrity(fuzzy_match_input_manager.fuzzy_input, left_columns=left.schema,
66
+ right_columns=right.schema):
67
67
  raise Exception('Join is not valid by the data fields')
68
- fuzzy_match_input = fuzzy_match_input
69
- fuzzy_match_input.auto_rename()
70
68
 
71
- right_select = [v.old_name for v in fuzzy_match_input.right_select.renames if
69
+ fuzzy_match_input_manager.auto_rename()
70
+
71
+ right_select = [v.old_name for v in fuzzy_match_input_manager.right_select.renames if
72
72
  (v.keep or v.join_key) and v.is_available]
73
- left_select = [v.old_name for v in fuzzy_match_input.left_select.renames if
73
+ left_select = [v.old_name for v in fuzzy_match_input_manager.left_select.renames if
74
74
  (v.keep or v.join_key) and v.is_available]
75
75
  left_df: pl.LazyFrame | pl.DataFrame = left.data_frame.select(left_select).rename(
76
- fuzzy_match_input.left_select.rename_table)
76
+ fuzzy_match_input_manager.left_select.rename_table)
77
77
  right_df: pl.LazyFrame | pl.DataFrame = right.data_frame.select(right_select).rename(
78
- fuzzy_match_input.right_select.rename_table)
78
+ fuzzy_match_input_manager.right_select.rename_table)
79
79
  return left_df, right_df
@@ -16,7 +16,7 @@ def rename_df_table_for_join(left_df: T, right_df: T, join_key_rename: transfor
16
16
  right_df.rename({r[0]: r[1] for r in join_key_rename.right.join_key_renames}))
17
17
 
18
18
 
19
- def get_undo_rename_mapping_join(join_input: transform_schemas.JoinInput) -> Dict[str, str]:
19
+ def get_undo_rename_mapping_join(join_input: transform_schemas.JoinInputManager) -> Dict[str, str]:
20
20
  join_key_rename = join_input.get_join_key_renames(True)
21
21
  return {r[1]: r[0] for r in join_key_rename.right.join_key_renames + join_key_rename.left.join_key_renames}
22
22
 
@@ -4,9 +4,14 @@ from flowfile_core.schemas import transform_schema
4
4
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
5
5
 
6
6
 
7
- def verify_join_select_integrity(join_input: transform_schema.JoinInput | transform_schema.CrossJoinInput,
8
- left_columns: List[str],
9
- right_columns: List[str]):
7
+ def verify_join_select_integrity(
8
+ join_input:
9
+ transform_schema.JoinInput |
10
+ transform_schema.CrossJoinInput |
11
+ transform_schema.FuzzyMatchInput |
12
+ transform_schema.JoinInputsManager,
13
+ left_columns: List[str],
14
+ right_columns: List[str]):
10
15
  """
11
16
  Verify column availability for join selection and update availability flags.
12
17
 
@@ -27,7 +32,7 @@ def verify_join_select_integrity(join_input: transform_schema.JoinInput | transf
27
32
  c.is_available = True
28
33
 
29
34
 
30
- def verify_join_map_integrity(join_input: transform_schema.JoinInput,
35
+ def verify_join_map_integrity(join_input: transform_schema.JoinInput | transform_schema.FuzzyMatchInput | transform_schema.JoinInputManager,
31
36
  left_columns: List[FlowfileColumn],
32
37
  right_columns: List[FlowfileColumn]
33
38
  ):