Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. flowfile/__init__.py +3 -1
  2. flowfile/api.py +1 -2
  3. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
  5. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
  6. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
  7. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
  8. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
  9. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
  10. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
  11. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
  12. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
  14. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
  15. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
  16. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
  17. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
  18. flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
  19. flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
  20. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
  21. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
  22. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
  23. flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
  24. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
  25. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
  26. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
  27. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
  28. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
  29. flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
  30. flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
  31. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
  32. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
  33. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
  34. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
  35. flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
  36. flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
  37. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
  38. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
  39. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
  40. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
  41. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
  42. flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
  43. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
  44. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
  45. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
  46. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
  47. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
  48. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
  49. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
  50. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
  51. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
  52. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
  53. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
  54. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
  55. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
  56. flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
  57. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
  58. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
  59. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
  60. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
  61. flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
  62. flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
  63. flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
  64. flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
  65. flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
  66. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
  67. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
  68. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
  69. flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
  70. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  71. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
  72. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
  73. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
  74. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
  75. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
  76. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
  77. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
  78. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
  79. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
  80. flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
  81. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
  82. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
  83. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
  84. flowfile/web/static/index.html +1 -1
  85. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
  86. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
  87. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  88. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  89. flowfile_core/__init__.py +3 -0
  90. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  91. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  92. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  93. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  94. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  95. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  96. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  97. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
  98. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  99. flowfile_core/flowfile/flow_graph.py +129 -26
  100. flowfile_core/flowfile/flow_node/flow_node.py +3 -0
  101. flowfile_core/flowfile/flow_node/models.py +2 -1
  102. flowfile_core/flowfile/handler.py +5 -5
  103. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  104. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  105. flowfile_core/flowfile/node_designer/__init__.py +1 -1
  106. flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
  107. flowfile_core/flowfile/node_designer/custom_node.py +1 -1
  108. flowfile_core/flowfile/node_designer/ui_components.py +1 -1
  109. flowfile_core/flowfile/schema_callbacks.py +8 -5
  110. flowfile_core/flowfile/setting_generator/settings.py +15 -9
  111. flowfile_core/routes/routes.py +8 -10
  112. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  113. flowfile_core/schemas/input_schema.py +222 -65
  114. flowfile_core/schemas/output_model.py +1 -1
  115. flowfile_core/schemas/schemas.py +145 -32
  116. flowfile_core/schemas/transform_schema.py +1083 -413
  117. flowfile_core/schemas/yaml_types.py +103 -0
  118. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
  119. flowfile_frame/__init__.py +3 -1
  120. flowfile_frame/flow_frame.py +15 -18
  121. flowfile_frame/flow_frame_methods.py +12 -9
  122. flowfile_worker/__init__.py +3 -0
  123. flowfile_worker/create/__init__.py +3 -21
  124. flowfile_worker/create/funcs.py +68 -56
  125. flowfile_worker/create/models.py +130 -62
  126. flowfile_worker/routes.py +5 -8
  127. tools/migrate/README.md +56 -0
  128. tools/migrate/__init__.py +12 -0
  129. tools/migrate/__main__.py +131 -0
  130. tools/migrate/legacy_schemas.py +621 -0
  131. tools/migrate/migrate.py +598 -0
  132. tools/migrate/tests/__init__.py +0 -0
  133. tools/migrate/tests/conftest.py +23 -0
  134. tools/migrate/tests/test_migrate.py +627 -0
  135. tools/migrate/tests/test_migration_e2e.py +1010 -0
  136. tools/migrate/tests/test_node_migrations.py +813 -0
  137. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  138. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
  139. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -23,15 +23,10 @@ from flowfile_core.flowfile.sources.external_sources.sql_source.models import (D
23
23
  DatabaseExternalWriteSettings)
24
24
  from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
25
25
  from flowfile_core.schemas.input_schema import (
26
- ReceivedCsvTable,
27
- ReceivedExcelTable,
28
- ReceivedJsonTable,
29
- ReceivedParquetTable
26
+ ReceivedTable
30
27
  )
31
28
  from flowfile_core.utils.arrow_reader import read
32
29
 
33
- ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
34
-
35
30
 
36
31
  def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = 'store') -> Status:
37
32
  encoded_operation = encodebytes(lf.serialize()).decode()
@@ -74,7 +69,7 @@ def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
74
69
  return Status(**v.json())
75
70
 
76
71
 
77
- def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTableCollection,
72
+ def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTable,
78
73
  file_type: str = Literal['csv', 'parquet', 'json', 'excel']):
79
74
  f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
80
75
  params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
@@ -194,87 +189,161 @@ def cancel_task(file_ref: str) -> bool:
194
189
 
195
190
 
196
191
  class BaseFetcher:
197
- result: Optional[Any] = None
198
- started: bool = False
199
- running: bool = False
200
- error_code: int = 0
201
- error_description: Optional[str] = None
202
- file_ref: Optional[str] = None
192
+ """
193
+ Thread-safe fetcher for polling worker status and retrieving results.
194
+ """
203
195
 
204
196
  def __init__(self, file_ref: str = None):
205
197
  self.file_ref = file_ref if file_ref else str(uuid4())
206
- self.stop_event = threading.Event()
207
- self.thread = threading.Thread(target=self._fetch_cached_df)
208
- self.result = None
209
- self.error_description = None
210
- self.running = False
211
- self.started = False
212
- self.condition = threading.Condition()
213
- self.error_code = 0
198
+
199
+ # Thread synchronization
200
+ self._lock = threading.Lock()
201
+ self._condition = threading.Condition(self._lock)
202
+ self._stop_event = threading.Event()
203
+ self._thread = None
204
+
205
+ # State variables - use properties for thread-safe access
206
+ self._result: Optional[Any] = None
207
+ self._started: bool = False
208
+ self._running: bool = False
209
+ self._error_code: int = 0
210
+ self._error_description: Optional[str] = None
211
+
212
+ # Public properties for compatibility with subclasses
213
+ @property
214
+ def result(self) -> Optional[Any]:
215
+ with self._lock:
216
+ return self._result
217
+
218
+ @property
219
+ def started(self) -> bool:
220
+ with self._lock:
221
+ return self._started
222
+
223
+ @property
224
+ def running(self) -> bool:
225
+ with self._lock:
226
+ return self._running
227
+
228
+ @running.setter
229
+ def running(self, value: bool):
230
+ """Allow subclasses to set running status and auto-start if needed."""
231
+ with self._lock:
232
+ self._running = value
233
+ # If subclass sets running=True, auto-start the thread
234
+ if value and not self._started:
235
+ self._start_thread()
236
+
237
+ @property
238
+ def error_code(self) -> int:
239
+ with self._lock:
240
+ return self._error_code
241
+
242
+ @property
243
+ def error_description(self) -> Optional[str]:
244
+ with self._lock:
245
+ return self._error_description
246
+
247
+ def _start_thread(self):
248
+ """Internal method to start thread (must be called under lock)."""
249
+ if not self._started:
250
+ self._thread = threading.Thread(target=self._fetch_cached_df, daemon=True)
251
+ self._thread.start()
252
+ self._started = True
214
253
 
215
254
  def _fetch_cached_df(self):
216
- with self.condition:
217
- if self.running:
218
- logger.info('Already running the fetching')
219
- return
255
+ """Background thread that polls for results."""
256
+ sleep_time = 0.5
220
257
 
221
- sleep_time = .5
222
- self.running = True
223
- while not self.stop_event.is_set():
258
+ # Don't check _running here - subclasses already set it
259
+ try:
260
+ while not self._stop_event.is_set():
224
261
  try:
225
- r = requests.get(f'{WORKER_URL}/status/{self.file_ref}')
262
+ r = requests.get(f'{WORKER_URL}/status/{self.file_ref}', timeout=10)
263
+
226
264
  if r.status_code == 200:
227
265
  status = Status(**r.json())
266
+
228
267
  if status.status == 'Completed':
229
268
  self._handle_completion(status)
230
269
  return
231
270
  elif status.status == 'Error':
232
271
  self._handle_error(1, status.error_message)
233
- break
272
+ return
234
273
  elif status.status == 'Unknown Error':
235
- self._handle_error(-1,
236
- 'There was an unknown error with the process, '
237
- 'and the process got killed by the server')
238
- break
274
+ self._handle_error(
275
+ -1,
276
+ 'There was an unknown error with the process, '
277
+ 'and the process got killed by the server'
278
+ )
279
+ return
239
280
  else:
240
- self._handle_error(2, r.text)
241
- break
281
+ self._handle_error(2, f"HTTP {r.status_code}: {r.text}")
282
+ return
283
+
242
284
  except requests.RequestException as e:
243
285
  self._handle_error(2, f"Request failed: {e}")
244
- break
286
+ return
245
287
 
246
- sleep(sleep_time)
288
+ # Sleep without holding the lock
289
+ if not self._stop_event.wait(timeout=sleep_time):
290
+ continue
291
+ else:
292
+ break
247
293
 
294
+ # Only reached if stop_event was set
248
295
  self._handle_cancellation()
249
296
 
297
+ except Exception as e:
298
+ # Catch any unexpected errors
299
+ logger.exception("Unexpected error in fetch thread")
300
+ self._handle_error(-1, f"Unexpected error: {e}")
301
+
250
302
  def _handle_completion(self, status):
251
- self.running = False
252
- self.condition.notify_all()
253
- if status.result_type == 'polars':
254
- self.result = get_df_result(status.results)
255
- else:
256
- self.result = status.results
257
-
258
- def _handle_error(self, code, description):
259
- self.error_code = code
260
- self.error_description = description
261
- self.running = False
262
- self.condition.notify_all()
303
+ """Handle successful completion. Must be called from fetch thread."""
304
+ with self._condition:
305
+ try:
306
+ if status.result_type == 'polars':
307
+ self._result = get_df_result(status.results)
308
+ else:
309
+ self._result = status.results
310
+ except Exception as e:
311
+ logger.exception("Error processing result")
312
+ self._error_code = -1
313
+ self._error_description = f"Error processing result: {e}"
314
+ finally:
315
+ self._running = False
316
+ self._condition.notify_all()
317
+
318
+ def _handle_error(self, code: int, description: str):
319
+ """Handle error state. Must be called from fetch thread."""
320
+ with self._condition:
321
+ self._error_code = code
322
+ self._error_description = description
323
+ self._running = False
324
+ self._condition.notify_all()
263
325
 
264
326
  def _handle_cancellation(self):
265
- logger.warning("Fetch operation cancelled")
266
- if self.error_description is not None:
267
- logger.warning(self.error_description)
268
- self.running = False
269
- self.condition.notify_all()
327
+ """Handle cancellation. Must be called from fetch thread."""
328
+ with self._condition:
329
+ if self._error_description is None:
330
+ self._error_description = "Task cancelled"
331
+ logger.warning(f"Fetch operation cancelled: {self._error_description}")
332
+ self._running = False
333
+ self._condition.notify_all()
270
334
 
271
335
  def start(self):
272
- if self.running:
273
- logger.info('Already running the fetching')
274
- return
275
- if not self.started:
276
- self.thread.start()
277
- self.started = True
336
+ """Start the background fetch thread."""
337
+ with self._lock:
338
+ if self._started:
339
+ logger.info('Fetcher already started')
340
+ return
341
+ if self._running:
342
+ logger.info('Already running the fetching')
343
+ return
344
+
345
+ self._running = True
346
+ self._start_thread()
278
347
 
279
348
  def cancel(self):
280
349
  """
@@ -282,30 +351,67 @@ class BaseFetcher:
282
351
  Also cleans up any resources being used.
283
352
  """
284
353
  logger.warning('Cancelling the operation')
354
+
355
+ # Cancel on the worker side
285
356
  try:
286
357
  cancel_task(self.file_ref)
287
358
  except Exception as e:
288
359
  logger.error(f'Failed to cancel task on worker: {str(e)}')
289
360
 
290
- # Then stop the local monitoring thread
291
- self.stop_event.set()
292
- self.thread.join()
361
+ # Signal the thread to stop
362
+ self._stop_event.set()
293
363
 
294
- # Update local state
295
- with self.condition:
296
- self.running = False
297
- self.error_description = "Task cancelled by user"
298
- self.condition.notify_all()
364
+ # Wait for thread to finish
365
+ if self._thread and self._thread.is_alive():
366
+ self._thread.join(timeout=5.0)
367
+ if self._thread.is_alive():
368
+ logger.warning("Fetch thread did not stop within timeout")
299
369
 
300
370
  def get_result(self) -> Optional[Any]:
301
- if not self.started:
302
- self.start()
303
- with self.condition:
304
- while self.running and self.result is None:
305
- self.condition.wait() # Wait until notified
306
- if self.error_description is not None:
307
- raise Exception(self.error_description)
308
- return self.result
371
+ """
372
+ Get the result, blocking until it's available.
373
+
374
+ Returns:
375
+ The fetched result.
376
+
377
+ Raises:
378
+ Exception: If an error occurred during fetching.
379
+ """
380
+ # Start if not already started (for manual usage)
381
+ with self._lock:
382
+ if not self._started:
383
+ if not self._running:
384
+ self._running = True
385
+ self._start_thread()
386
+
387
+ # Wait for completion
388
+ with self._condition:
389
+ while self._running:
390
+ self._condition.wait()
391
+
392
+ # Check for errors
393
+ with self._lock:
394
+ if self._error_description is not None:
395
+ raise Exception(self._error_description)
396
+ return self._result
397
+
398
+ @property
399
+ def is_running(self) -> bool:
400
+ """Check if the fetcher is currently running."""
401
+ with self._lock:
402
+ return self._running
403
+
404
+ @property
405
+ def has_error(self) -> bool:
406
+ """Check if the fetcher encountered an error."""
407
+ with self._lock:
408
+ return self._error_description is not None
409
+
410
+ @property
411
+ def error_info(self) -> tuple[int, Optional[str]]:
412
+ """Get error code and description."""
413
+ with self._lock:
414
+ return self._error_code, self._error_description
309
415
 
310
416
 
311
417
  class ExternalDfFetcher(BaseFetcher):
@@ -354,7 +460,7 @@ class ExternalFuzzyMatchFetcher(BaseFetcher):
354
460
 
355
461
 
356
462
  class ExternalCreateFetcher(BaseFetcher):
357
- def __init__(self, received_table: ReceivedTableCollection, node_id: int, flow_id: int,
463
+ def __init__(self, received_table: ReceivedTable, node_id: int, flow_id: int,
358
464
  file_type: str = 'csv', wait_on_completion: bool = True):
359
465
  r = trigger_create_operation(received_table=received_table, file_type=file_type,
360
466
  node_id=node_id, flow_id=flow_id)
@@ -23,8 +23,10 @@ def get_data_type(vals: Iterable[Any]):
23
23
  def calculate_schema(lf: pl.LazyFrame) -> List[Dict]:
24
24
  r = ExternalDfFetcher(lf=lf, operation_type='calculate_schema', wait_on_completion=False, flow_id=-1, node_id=-1)
25
25
  schema_stats: List[Dict] = r.get_result()
26
+
26
27
  for schema_stat in schema_stats:
27
28
  schema_stat['pl_datatype'] = getattr(pl.datatypes, schema_stat['pl_datatype'])
29
+
28
30
  return schema_stats
29
31
 
30
32
 
@@ -1,9 +1,11 @@
1
1
  import datetime
2
- import pickle
3
2
 
4
3
  import os
4
+ import yaml
5
+ import json
5
6
 
6
7
  import polars as pl
8
+ from pathlib import Path
7
9
 
8
10
  import fastexcel
9
11
  from fastapi.exceptions import HTTPException
@@ -19,6 +21,7 @@ from flowfile_core.flowfile.sources.external_sources.factory import data_source_
19
21
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
20
22
 
21
23
  from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
24
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
22
25
  from flowfile_core.utils.arrow_reader import get_read_top_n
23
26
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
24
27
  from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (get_open_xlsx_datatypes,
@@ -52,6 +55,22 @@ from flowfile_core.flowfile.database_connection_manager.db_connections import (g
52
55
  get_local_cloud_connection)
53
56
  from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
54
57
  from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
58
+ from importlib.metadata import version, PackageNotFoundError
59
+
60
+ try:
61
+ __version__ = version("Flowfile")
62
+ except PackageNotFoundError:
63
+ __version__ = "0.0.0-dev"
64
+
65
+
66
+ def represent_list_json(dumper, data):
67
+ """Use inline style for short simple lists, block style for complex ones."""
68
+ if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
69
+ return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True)
70
+ return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=False)
71
+
72
+
73
+ yaml.add_representer(list, represent_list_json)
55
74
 
56
75
 
57
76
  def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
@@ -152,7 +171,7 @@ def get_cloud_connection_settings(connection_name: str,
152
171
  HTTPException: If the connection settings cannot be found.
153
172
  """
154
173
  cloud_connection_settings = get_local_cloud_connection(connection_name, user_id)
155
- if cloud_connection_settings is None and auth_mode in ("env_vars", "auto"):
174
+ if cloud_connection_settings is None and auth_mode in ("env_vars", transform_schema.AUTO_DATA_TYPE):
156
175
  # If the auth mode is aws-cli, we do not need connection settings
157
176
  cloud_connection_settings = FullCloudStorageConnection(storage_type="s3", auth_method="env_vars")
158
177
  elif cloud_connection_settings is None and auth_mode == "aws-cli":
@@ -223,7 +242,7 @@ class FlowGraph:
223
242
  self._node_ids = []
224
243
  self._node_db = {}
225
244
  self.cache_results = cache_results
226
- self.__name__ = name if name else id(self)
245
+ self.__name__ = name if name else "flow_" + str(id(self))
227
246
  self.depends_on = {}
228
247
  if path_ref is not None:
229
248
  self.add_datasource(input_schema.NodeDatasource(file_path=path_ref))
@@ -733,11 +752,11 @@ class FlowGraph:
733
752
  """
734
753
 
735
754
  error = ""
736
- if function_settings.function.field.data_type not in (None, "Auto"):
755
+ if function_settings.function.field.data_type not in (None, transform_schema.AUTO_DATA_TYPE):
737
756
  output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
738
757
  else:
739
758
  output_type = None
740
- if output_type not in (None, "Auto"):
759
+ if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
741
760
  new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
742
761
  data_type=str(output_type))]
743
762
  else:
@@ -755,6 +774,7 @@ class FlowGraph:
755
774
  setting_input=function_settings,
756
775
  input_node_ids=[function_settings.depending_on_id]
757
776
  )
777
+ # TODO: Add validation here
758
778
  if error != "":
759
779
  node = self.get_node(function_settings.node_id)
760
780
  node.results.errors = error
@@ -771,13 +791,11 @@ class FlowGraph:
771
791
  Returns:
772
792
  The `FlowGraph` instance for method chaining.
773
793
  """
774
-
775
794
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
776
795
  for left_select in cross_join_settings.cross_join_input.left_select.renames:
777
796
  left_select.is_available = True if left_select.old_name in main.schema else False
778
797
  for right_select in cross_join_settings.cross_join_input.right_select.renames:
779
798
  right_select.is_available = True if right_select.old_name in right.schema else False
780
-
781
799
  return main.do_cross_join(cross_join_input=cross_join_settings.cross_join_input,
782
800
  auto_generate_selection=cross_join_settings.auto_generate_selection,
783
801
  verify_integrity=False,
@@ -800,13 +818,11 @@ class FlowGraph:
800
818
  Returns:
801
819
  The `FlowGraph` instance for method chaining.
802
820
  """
803
-
804
821
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
805
822
  for left_select in join_settings.join_input.left_select.renames:
806
823
  left_select.is_available = True if left_select.old_name in main.schema else False
807
824
  for right_select in join_settings.join_input.right_select.renames:
808
825
  right_select.is_available = True if right_select.old_name in right.schema else False
809
-
810
826
  return main.join(join_input=join_settings.join_input,
811
827
  auto_generate_selection=join_settings.auto_generate_selection,
812
828
  verify_integrity=False,
@@ -844,7 +860,7 @@ class FlowGraph:
844
860
  return FlowDataEngine(f.get_result())
845
861
 
846
862
  def schema_callback():
847
- fm_input_copy = deepcopy(fuzzy_settings.join_input) # Deepcopy create an unique object per func
863
+ fm_input_copy = FuzzyMatchInputManager(fuzzy_settings.join_input) # Deepcopy create an unique object per func
848
864
  node = self.get_node(node_id=fuzzy_settings.node_id)
849
865
  return calculate_fuzzy_match_schema(fm_input_copy,
850
866
  left_schema=node.node_inputs.main_inputs[0].schema,
@@ -1131,7 +1147,6 @@ class FlowGraph:
1131
1147
  """
1132
1148
 
1133
1149
  def _func(df: FlowDataEngine):
1134
- output_file.output_settings.populate_abs_file_path()
1135
1150
  execute_remote = self.execution_location != 'local'
1136
1151
  df.output(output_fs=output_file.output_settings, flow_id=self.flow_id, node_id=output_file.node_id,
1137
1152
  execute_remote=execute_remote)
@@ -1451,10 +1466,10 @@ class FlowGraph:
1451
1466
  Args:
1452
1467
  input_file: The settings for the read operation.
1453
1468
  """
1454
-
1455
- if input_file.received_file.file_type in ('xlsx', 'excel') and input_file.received_file.sheet_name == '':
1469
+ if (input_file.received_file.file_type in ('xlsx', 'excel') and
1470
+ input_file.received_file.table_settings.sheet_name == ''):
1456
1471
  sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
1457
- input_file.received_file.sheet_name = sheet_name
1472
+ input_file.received_file.table_settings.sheet_name = sheet_name
1458
1473
 
1459
1474
  received_file = input_file.received_file
1460
1475
  input_file.received_file.set_absolute_filepath()
@@ -1463,7 +1478,7 @@ class FlowGraph:
1463
1478
  input_file.received_file.set_absolute_filepath()
1464
1479
  if input_file.received_file.file_type == 'parquet':
1465
1480
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1466
- elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.encoding:
1481
+ elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.table_settings.encoding:
1467
1482
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1468
1483
  else:
1469
1484
  input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
@@ -1500,12 +1515,12 @@ class FlowGraph:
1500
1515
  # If the file is an Excel file, we need to use the openpyxl engine to read the schema
1501
1516
  schema_callback = get_xlsx_schema_callback(engine='openpyxl',
1502
1517
  file_path=received_file.file_path,
1503
- sheet_name=received_file.sheet_name,
1504
- start_row=received_file.start_row,
1505
- end_row=received_file.end_row,
1506
- start_column=received_file.start_column,
1507
- end_column=received_file.end_column,
1508
- has_headers=received_file.has_headers)
1518
+ sheet_name=received_file.table_settings.sheet_name,
1519
+ start_row=received_file.table_settings.start_row,
1520
+ end_row=received_file.table_settings.end_row,
1521
+ start_column=received_file.table_settings.start_column,
1522
+ end_column=received_file.table_settings.end_column,
1523
+ has_headers=received_file.table_settings.has_headers)
1509
1524
  else:
1510
1525
  schema_callback = None
1511
1526
  else:
@@ -1636,6 +1651,13 @@ class FlowGraph:
1636
1651
  run_type=run_type
1637
1652
  )
1638
1653
 
1654
+ def create_empty_run_information(self) -> RunInformation:
1655
+ return RunInformation(
1656
+ flow_id=self.flow_id, start_time=None, end_time=None,
1657
+ success=None, number_of_nodes=0, node_step_result=[],
1658
+ run_type="init"
1659
+ )
1660
+
1639
1661
  def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
1640
1662
  """Executes a specific node in the graph by its ID."""
1641
1663
  if self.flow_settings.is_running:
@@ -1746,6 +1768,7 @@ class FlowGraph:
1746
1768
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1747
1769
  node_logger.info(f'Completed node with success: {node_result.success}')
1748
1770
  self.latest_run_info.nodes_completed += 1
1771
+ self.latest_run_info.end_time = datetime.datetime.now()
1749
1772
  self.flow_logger.info('Flow completed!')
1750
1773
  self.end_datetime = datetime.datetime.now()
1751
1774
  self.flow_settings.is_running = False
@@ -1757,7 +1780,7 @@ class FlowGraph:
1757
1780
  finally:
1758
1781
  self.flow_settings.is_running = False
1759
1782
 
1760
- def get_run_info(self) -> RunInformation | None:
1783
+ def get_run_info(self) -> RunInformation:
1761
1784
  """Gets a summary of the most recent graph execution.
1762
1785
 
1763
1786
  Returns:
@@ -1765,7 +1788,7 @@ class FlowGraph:
1765
1788
  """
1766
1789
  is_running = self.flow_settings.is_running
1767
1790
  if self.latest_run_info is None:
1768
- return
1791
+ return self.create_empty_run_information()
1769
1792
 
1770
1793
  elif not is_running and self.latest_run_info.success is not None:
1771
1794
  return self.latest_run_info
@@ -1806,6 +1829,42 @@ class FlowGraph:
1806
1829
  node = self._node_db[node_id]
1807
1830
  return node.get_node_data(flow_id=self.flow_id, include_example=include_example)
1808
1831
 
1832
+ def get_flowfile_data(self) -> schemas.FlowfileData:
1833
+ start_node_ids = {v.node_id for v in self._flow_starts}
1834
+
1835
+ nodes = []
1836
+ for node in self.nodes:
1837
+ node_info = node.get_node_information()
1838
+ flowfile_node = schemas.FlowfileNode(
1839
+ id=node_info.id,
1840
+ type=node_info.type,
1841
+ is_start_node=node.node_id in start_node_ids,
1842
+ description=node_info.description,
1843
+ x_position=int(node_info.x_position),
1844
+ y_position=int(node_info.y_position),
1845
+ left_input_id=node_info.left_input_id,
1846
+ right_input_id=node_info.right_input_id,
1847
+ input_ids=node_info.input_ids,
1848
+ outputs=node_info.outputs,
1849
+ setting_input=node_info.setting_input,
1850
+ )
1851
+ nodes.append(flowfile_node)
1852
+
1853
+ settings = schemas.FlowfileSettings(
1854
+ description=self.flow_settings.description,
1855
+ execution_mode=self.flow_settings.execution_mode,
1856
+ execution_location=self.flow_settings.execution_location,
1857
+ auto_save=self.flow_settings.auto_save,
1858
+ show_detailed_progress=self.flow_settings.show_detailed_progress,
1859
+ )
1860
+ return schemas.FlowfileData(
1861
+ flowfile_version=__version__,
1862
+ flowfile_id=self.flow_id,
1863
+ flowfile_name=self.__name__,
1864
+ flowfile_settings=settings,
1865
+ nodes=nodes,
1866
+ )
1867
+
1809
1868
  def get_node_storage(self) -> schemas.FlowInformation:
1810
1869
  """Serializes the entire graph's state into a storable format.
1811
1870
 
@@ -1838,19 +1897,63 @@ class FlowGraph:
1838
1897
  for node in self.nodes:
1839
1898
  node.remove_cache()
1840
1899
 
1900
+ def _handle_flow_renaming(self, new_name: str, new_path: Path):
1901
+ """
1902
+ Handle the rename of a flow when it is being saved.
1903
+ """
1904
+ if self.flow_settings and self.flow_settings.path and Path(self.flow_settings.path).absolute() != new_path.absolute():
1905
+ self.__name__ = new_name
1906
+ self.flow_settings.save_location = str(new_path.absolute())
1907
+ self.flow_settings.name = new_name
1908
+ if self.flow_settings and not self.flow_settings.save_location:
1909
+ self.flow_settings.save_location = str(new_path.absolute())
1910
+ self.__name__ = new_name
1911
+ self.flow_settings.name = new_name
1912
+
1841
1913
  def save_flow(self, flow_path: str):
1842
1914
  """Saves the current state of the flow graph to a file.
1843
1915
 
1916
+ Supports multiple formats based on file extension:
1917
+ - .yaml / .yml: New YAML format
1918
+ - .json: JSON format
1919
+
1844
1920
  Args:
1845
1921
  flow_path: The path where the flow file will be saved.
1846
1922
  """
1847
1923
  logger.info("Saving flow to %s", flow_path)
1848
- os.makedirs(os.path.dirname(flow_path), exist_ok=True)
1924
+ path = Path(flow_path)
1925
+ os.makedirs(path.parent, exist_ok=True)
1926
+ suffix = path.suffix.lower()
1927
+ new_flow_name = path.name.replace(suffix, "")
1928
+ self._handle_flow_renaming(new_flow_name, path)
1929
+ self.flow_settings.modified_on = datetime.datetime.now().timestamp()
1849
1930
  try:
1850
- with open(flow_path, 'wb') as f:
1851
- pickle.dump(self.get_node_storage(), f)
1931
+ if suffix == '.flowfile':
1932
+ raise DeprecationWarning(
1933
+ f"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
1934
+ "Or stay on v0.4.1 if you still need .flowfile support.\n\n"
1935
+ )
1936
+ elif suffix in ('.yaml', '.yml'):
1937
+ flowfile_data = self.get_flowfile_data()
1938
+ data = flowfile_data.model_dump(mode='json')
1939
+ with open(flow_path, 'w', encoding='utf-8') as f:
1940
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1941
+ elif suffix == '.json':
1942
+ flowfile_data = self.get_flowfile_data()
1943
+ data = flowfile_data.model_dump(mode='json')
1944
+ with open(flow_path, 'w', encoding='utf-8') as f:
1945
+ json.dump(data, f, indent=2, ensure_ascii=False)
1946
+
1947
+ else:
1948
+ flowfile_data = self.get_flowfile_data()
1949
+ logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
1950
+ data = flowfile_data.model_dump(mode='json')
1951
+ with open(flow_path, 'w', encoding='utf-8') as f:
1952
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1953
+
1852
1954
  except Exception as e:
1853
1955
  logger.error(f"Error saving flow: {e}")
1956
+ raise
1854
1957
 
1855
1958
  self.flow_settings.path = flow_path
1856
1959