Flowfile 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (138) hide show
  1. flowfile/__init__.py +1 -1
  2. flowfile/api.py +0 -1
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-d7c2c028.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d467329f.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-071b8b00.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
  8. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  9. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  10. flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
  11. flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
  12. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  13. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  14. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-cf68ec7a.js} +14 -84
  15. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
  16. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-349e33a8.js} +2 -2
  17. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-8075bd28.js} +14 -114
  18. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  19. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-3e2dda89.js} +13 -74
  20. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  21. flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
  22. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-609a265c.js} +8 -79
  23. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-97cff793.js} +12 -85
  24. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  25. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-09de0ec9.js} +18 -85
  26. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  27. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  28. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-bdf70248.js} +16 -87
  29. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-0b5a0e05.js} +13 -159
  30. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  31. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  32. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-eaddadde.js} +12 -75
  33. flowfile/web/static/assets/{Join-5a78a203.js → Join-3313371b.js} +15 -85
  34. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  35. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  36. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-e8bfc0be.js} +11 -82
  37. flowfile/web/static/assets/{Output-411ecaee.js → Output-7303bb09.js} +13 -243
  38. flowfile/web/static/assets/Output-ddc9079f.css +37 -0
  39. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-3b1c54ef.js} +14 -138
  40. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  41. flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
  42. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  43. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  44. flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
  45. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-aa12e25d.js} +13 -80
  46. flowfile/web/static/assets/Read-6b17491f.css +62 -0
  47. flowfile/web/static/assets/Read-a2bfc618.js +243 -0
  48. flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
  49. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-48ee1a3b.js} +8 -80
  50. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  51. flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
  52. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-f06cb97a.js} +8 -77
  53. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-37f34886.js} +2 -2
  54. flowfile/web/static/assets/{Select-727688dc.js → Select-b60e6c47.js} +11 -85
  55. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  56. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  57. flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
  58. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  59. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-75b6cf4f.js} +2 -40
  60. flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
  61. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  62. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-51b1ee4d.js} +12 -97
  63. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-26835f8f.js} +14 -83
  64. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  65. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-88a4cd0c.js} +2 -2
  66. flowfile/web/static/assets/Union-4d0088eb.js +77 -0
  67. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  68. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-7d554a62.js} +22 -91
  69. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  70. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  71. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-4668595c.js} +12 -166
  72. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  73. flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
  74. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-5324d566.js} +4 -264
  75. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  76. flowfile/web/static/assets/{api-cb00cce6.js → api-271ed117.js} +1 -1
  77. flowfile/web/static/assets/{api-023d1733.js → api-31e4fea6.js} +1 -1
  78. flowfile/web/static/assets/{designer-2197d782.css → designer-091bdc3f.css} +819 -184
  79. flowfile/web/static/assets/{designer-6c322d8e.js → designer-bf3d9487.js} +2191 -703
  80. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-4d0a1cea.js} +1 -1
  81. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-025888df.js} +1 -1
  82. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-1df991ec.js} +2 -2
  83. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-d3b2b2ac.js} +3 -3
  84. flowfile/web/static/assets/{index-683fc198.js → index-d0518598.js} +210 -31
  85. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  86. flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
  87. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  88. flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
  89. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  90. flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
  91. flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
  92. flowfile/web/static/assets/readCsv-e8359522.js +178 -0
  93. flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
  94. flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
  95. flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
  96. flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
  97. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-ce823eee.js} +1 -1
  98. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-5476546e.js} +7 -7
  99. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  100. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-9ed00d50.js} +29 -33
  101. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-7bca2d9b.js} +1 -1
  102. flowfile/web/static/index.html +1 -1
  103. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/METADATA +1 -1
  104. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/RECORD +129 -97
  105. flowfile_core/configs/flow_logger.py +5 -13
  106. flowfile_core/configs/node_store/nodes.py +303 -44
  107. flowfile_core/configs/settings.py +2 -1
  108. flowfile_core/database/connection.py +5 -21
  109. flowfile_core/fileExplorer/funcs.py +239 -121
  110. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +28 -8
  111. flowfile_core/flowfile/flow_graph.py +97 -33
  112. flowfile_core/flowfile/flow_node/flow_node.py +41 -9
  113. flowfile_core/flowfile/handler.py +22 -3
  114. flowfile_core/flowfile/schema_callbacks.py +8 -4
  115. flowfile_core/flowfile/setting_generator/settings.py +0 -1
  116. flowfile_core/main.py +4 -1
  117. flowfile_core/routes/routes.py +59 -10
  118. flowfile_core/schemas/input_schema.py +0 -1
  119. flowfile_core/schemas/output_model.py +5 -2
  120. flowfile_core/schemas/schemas.py +2 -0
  121. flowfile_core/schemas/transform_schema.py +1 -0
  122. flowfile_worker/__init__.py +6 -35
  123. flowfile_worker/main.py +5 -2
  124. flowfile_worker/routes.py +47 -5
  125. shared/__init__.py +15 -0
  126. shared/storage_config.py +243 -0
  127. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  128. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  129. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  130. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  131. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  132. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  133. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  134. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  135. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  136. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
  137. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
  138. {flowfile-0.3.9.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,15 @@
1
1
  import datetime
2
2
  import pickle
3
+
4
+ import os
5
+
3
6
  import polars as pl
7
+
4
8
  import fastexcel
5
9
  from fastapi.exceptions import HTTPException
6
10
  from time import time
7
11
  from functools import partial
8
- from typing import List, Dict, Union, Callable, Any, Optional, Tuple
12
+ from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
9
13
  from uuid import uuid1
10
14
  from copy import deepcopy
11
15
  from pyarrow.parquet import ParquetFile
@@ -175,11 +179,9 @@ class FlowGraph:
175
179
  schema: Optional[List[FlowfileColumn]] = None
176
180
  has_over_row_function: bool = False
177
181
  _flow_starts: List[Union[int, str]] = None
178
- node_results: List[NodeResult] = None
179
182
  latest_run_info: Optional[RunInformation] = None
180
183
  start_datetime: datetime = None
181
184
  end_datetime: datetime = None
182
- nodes_completed: int = 0
183
185
  _flow_settings: schemas.FlowSettings = None
184
186
  flow_logger: FlowLogger
185
187
 
@@ -206,11 +208,9 @@ class FlowGraph:
206
208
 
207
209
  self._flow_settings = flow_settings
208
210
  self.uuid = str(uuid1())
209
- self.nodes_completed = 0
210
211
  self.start_datetime = None
211
212
  self.end_datetime = None
212
213
  self.latest_run_info = None
213
- self.node_results = []
214
214
  self._flow_id = flow_settings.flow_id
215
215
  self.flow_logger = FlowLogger(flow_settings.flow_id)
216
216
  self._flow_starts: List[FlowNode] = []
@@ -814,11 +814,11 @@ class FlowGraph:
814
814
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
815
815
  node = self.get_node(node_id=fuzzy_settings.node_id)
816
816
  if self.execution_location == "local":
817
- return main.fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input,
817
+ return main.fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
818
818
  other=right,
819
819
  node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id))
820
820
 
821
- f = main.start_fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input, other=right, file_ref=node.hash,
821
+ f = main.start_fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input), other=right, file_ref=node.hash,
822
822
  flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
823
823
  logger.info("Started the fuzzy match action")
824
824
  node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
@@ -1599,6 +1599,66 @@ class FlowGraph:
1599
1599
  self.reset()
1600
1600
  self.flow_settings.execution_location = execution_location
1601
1601
 
1602
+ def validate_if_node_can_be_fetched(self, node_id: int) -> None:
1603
+ flow_node = self._node_db.get(node_id)
1604
+ if not flow_node:
1605
+ raise Exception("Node not found found")
1606
+ skip_nodes, execution_order = compute_execution_plan(
1607
+ nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1608
+ )
1609
+ if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
1610
+ raise Exception("Node can not be executed because it does not have it's inputs")
1611
+
1612
+ def create_initial_run_information(self, number_of_nodes: int,
1613
+ run_type: Literal["fetch_one", "full_run"]):
1614
+ return RunInformation(
1615
+ flow_id=self.flow_id, start_time=datetime.datetime.now(), end_time=None,
1616
+ success=None, number_of_nodes=number_of_nodes, node_step_result=[],
1617
+ run_type=run_type
1618
+ )
1619
+
1620
+ def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
1621
+ """Executes a specific node in the graph by its ID."""
1622
+ if self.flow_settings.is_running:
1623
+ raise Exception("Flow is already running")
1624
+ flow_node = self.get_node(node_id)
1625
+ self.flow_settings.is_running = True
1626
+ self.flow_settings.is_canceled = False
1627
+ self.flow_logger.clear_log_file()
1628
+ self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
1629
+ node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
1630
+ node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
1631
+ logger.info(f'Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}')
1632
+ try:
1633
+ self.latest_run_info.node_step_result.append(node_result)
1634
+ flow_node.execute_node(run_location=self.flow_settings.execution_location,
1635
+ performance_mode=False,
1636
+ node_logger=node_logger,
1637
+ optimize_for_downstream=False,
1638
+ reset_cache=True)
1639
+ node_result.error = str(flow_node.results.errors)
1640
+ if self.flow_settings.is_canceled:
1641
+ node_result.success = None
1642
+ node_result.success = None
1643
+ node_result.is_running = False
1644
+ node_result.success = flow_node.results.errors is None
1645
+ node_result.end_timestamp = time()
1646
+ node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1647
+ node_result.is_running = False
1648
+ self.latest_run_info.nodes_completed += 1
1649
+ self.latest_run_info.end_time = datetime.datetime.now()
1650
+ self.flow_settings.is_running = False
1651
+ return self.get_run_info()
1652
+ except Exception as e:
1653
+ node_result.error = 'Node did not run'
1654
+ node_result.success = False
1655
+ node_result.end_timestamp = time()
1656
+ node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1657
+ node_result.is_running = False
1658
+ node_logger.error(f'Error in node {flow_node.node_id}: {e}')
1659
+ finally:
1660
+ self.flow_settings.is_running = False
1661
+
1602
1662
  def run_graph(self) -> RunInformation | None:
1603
1663
  """Executes the entire data flow graph from start to finish.
1604
1664
 
@@ -1614,20 +1674,23 @@ class FlowGraph:
1614
1674
  if self.flow_settings.is_running:
1615
1675
  raise Exception('Flow is already running')
1616
1676
  try:
1677
+
1617
1678
  self.flow_settings.is_running = True
1618
1679
  self.flow_settings.is_canceled = False
1619
1680
  self.flow_logger.clear_log_file()
1620
- self.nodes_completed = 0
1621
- self.node_results = []
1622
- self.start_datetime = datetime.datetime.now()
1623
- self.end_datetime = None
1624
- self.latest_run_info = None
1625
1681
  self.flow_logger.info('Starting to run flowfile flow...')
1626
- skip_nodes, execution_order = compute_execution_plan(nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
1682
+
1683
+ skip_nodes, execution_order = compute_execution_plan(
1684
+ nodes=self.nodes,
1685
+ flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1686
+ )
1687
+
1688
+ self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
1627
1689
 
1628
1690
  skip_node_message(self.flow_logger, skip_nodes)
1629
1691
  execution_order_message(self.flow_logger, execution_order)
1630
1692
  performance_mode = self.flow_settings.execution_mode == 'Performance'
1693
+
1631
1694
  for node in execution_order:
1632
1695
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1633
1696
  if self.flow_settings.is_canceled:
@@ -1637,7 +1700,7 @@ class FlowGraph:
1637
1700
  node_logger.info(f'Skipping node {node.node_id}')
1638
1701
  continue
1639
1702
  node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1640
- self.node_results.append(node_result)
1703
+ self.latest_run_info.node_step_result.append(node_result)
1641
1704
  logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1642
1705
  node.execute_node(run_location=self.flow_settings.execution_location,
1643
1706
  performance_mode=performance_mode,
@@ -1663,7 +1726,7 @@ class FlowGraph:
1663
1726
  if not node_result.success:
1664
1727
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1665
1728
  node_logger.info(f'Completed node with success: {node_result.success}')
1666
- self.nodes_completed += 1
1729
+ self.latest_run_info.nodes_completed += 1
1667
1730
  self.flow_logger.info('Flow completed!')
1668
1731
  self.end_datetime = datetime.datetime.now()
1669
1732
  self.flow_settings.is_running = False
@@ -1675,28 +1738,23 @@ class FlowGraph:
1675
1738
  finally:
1676
1739
  self.flow_settings.is_running = False
1677
1740
 
1678
- def get_run_info(self) -> RunInformation:
1741
+ def get_run_info(self) -> RunInformation | None:
1679
1742
  """Gets a summary of the most recent graph execution.
1680
1743
 
1681
1744
  Returns:
1682
1745
  A RunInformation object with details about the last run.
1683
1746
  """
1747
+ is_running = self.flow_settings.is_running
1684
1748
  if self.latest_run_info is None:
1685
- node_results = self.node_results
1686
- success = all(nr.success for nr in node_results)
1687
- self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1688
- success=success,
1689
- node_step_result=node_results, flow_id=self.flow_id,
1690
- nodes_completed=self.nodes_completed,
1691
- number_of_nodes=len(self.nodes))
1692
- elif self.latest_run_info.nodes_completed != self.nodes_completed:
1693
- node_results = self.node_results
1694
- self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1695
- success=all(nr.success for nr in node_results),
1696
- node_step_result=node_results, flow_id=self.flow_id,
1697
- nodes_completed=self.nodes_completed,
1698
- number_of_nodes=len(self.nodes))
1699
- return self.latest_run_info
1749
+ return
1750
+
1751
+ elif not is_running and self.latest_run_info.success is not None:
1752
+ return self.latest_run_info
1753
+
1754
+ run_info = self.latest_run_info
1755
+ if not is_running:
1756
+ run_info.success = all(nr.success for nr in run_info.node_step_result)
1757
+ return run_info
1700
1758
 
1701
1759
  @property
1702
1760
  def node_connections(self) -> List[Tuple[int, int]]:
@@ -1767,8 +1825,14 @@ class FlowGraph:
1767
1825
  Args:
1768
1826
  flow_path: The path where the flow file will be saved.
1769
1827
  """
1770
- with open(flow_path, 'wb') as f:
1771
- pickle.dump(self.get_node_storage(), f)
1828
+ logger.info("Saving flow to %s", flow_path)
1829
+ os.makedirs(os.path.dirname(flow_path), exist_ok=True)
1830
+ try:
1831
+ with open(flow_path, 'wb') as f:
1832
+ pickle.dump(self.get_node_storage(), f)
1833
+ except Exception as e:
1834
+ logger.error(f"Error saving flow: {e}")
1835
+
1772
1836
  self.flow_settings.path = flow_path
1773
1837
 
1774
1838
  def get_frontend_data(self) -> dict:
@@ -12,7 +12,7 @@ from flowfile_core.configs.node_store import nodes as node_interface
12
12
  from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
13
13
  from time import sleep
14
14
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
15
- ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
15
+ ExternalDfFetcher, ExternalSampler, clear_task_from_worker, results_exists, get_external_df_result,
16
16
  ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
17
17
  from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
18
18
  NodeStepStats, NodeResults)
@@ -678,9 +678,10 @@ class FlowNode:
678
678
 
679
679
  if results_exists(self.hash):
680
680
  logger.warning('Not implemented')
681
+ clear_task_from_worker(self.hash)
681
682
 
682
683
  def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
683
- execution_location: schemas.ExecutionLocationsLiteral = "worker") -> bool:
684
+ execution_location: schemas.ExecutionLocationsLiteral = "remote") -> bool:
684
685
  """Determines if the node needs to be executed.
685
686
 
686
687
  The decision is based on its run state, caching settings, and execution mode.
@@ -723,6 +724,8 @@ class FlowNode:
723
724
  Raises:
724
725
  Exception: Propagates exceptions from the execution.
725
726
  """
727
+ self.clear_table_example()
728
+
726
729
  def example_data_generator():
727
730
  example_data = None
728
731
 
@@ -735,6 +738,7 @@ class FlowNode:
735
738
  resulting_data = self.get_resulting_data()
736
739
 
737
740
  if not performance_mode:
741
+ self.node_stats.has_run_with_current_setup = True
738
742
  self.results.example_data_generator = example_data_generator()
739
743
  self.node_schema.result_schema = self.results.resulting_data.schema
740
744
  self.node_stats.has_completed_last_run = True
@@ -854,8 +858,12 @@ class FlowNode:
854
858
  logger.warning('No external process to cancel')
855
859
  self.node_stats.is_canceled = True
856
860
 
857
- def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
858
- performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
861
+ def execute_node(self, run_location: schemas.ExecutionLocationsLiteral,
862
+ reset_cache: bool = False,
863
+ performance_mode: bool = False,
864
+ retry: bool = True,
865
+ node_logger: NodeLogger = None,
866
+ optimize_for_downstream: bool = True):
859
867
  """Orchestrates the execution, handling location, caching, and retries.
860
868
 
861
869
  Args:
@@ -864,25 +872,33 @@ class FlowNode:
864
872
  performance_mode: If True, optimizes for speed over diagnostics.
865
873
  retry: If True, allows retrying execution on recoverable errors.
866
874
  node_logger: The logger for this node execution.
875
+ optimize_for_downstream: If true, operations that shuffle the order of rows are fully cached and provided as
876
+ input to downstream steps
867
877
 
868
878
  Raises:
869
879
  Exception: If the node_logger is not defined.
870
880
  """
871
881
  if node_logger is None:
872
882
  raise Exception('Flow logger is not defined')
873
- # node_logger = flow_logger.get_node_logger(self.node_id)
883
+ # TODO: Simplify which route is being picked there are many duplicate checks
884
+
874
885
  if reset_cache:
875
886
  self.remove_cache()
876
887
  self.node_stats.has_run_with_current_setup = False
877
888
  self.node_stats.has_completed_last_run = False
889
+
878
890
  if self.is_setup:
879
891
  node_logger.info(f'Starting to run {self.__name__}')
880
892
  if (self.needs_run(performance_mode, node_logger, run_location) or self.node_template.node_group == "output"
881
893
  and not (run_location == 'local')):
894
+ self.clear_table_example()
882
895
  self.prepare_before_run()
896
+ self.reset()
883
897
  try:
884
- if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
885
- and not run_location == 'local')) or self.node_settings.cache_results:
898
+ if (((run_location == 'remote' or
899
+ (self.node_default.transform_type == 'wide' and optimize_for_downstream) and
900
+ not run_location == 'local'))
901
+ or self.node_settings.cache_results):
886
902
  node_logger.info('Running the node remotely')
887
903
  if self.node_settings.cache_results:
888
904
  performance_mode = False
@@ -924,7 +940,7 @@ class FlowNode:
924
940
  node_logger.error(f'Error with running the node: {e}')
925
941
  self.node_stats.error = str(e)
926
942
  self.node_stats.has_completed_last_run = False
927
- self.node_stats.has_run_with_current_setup = True
943
+
928
944
  else:
929
945
  node_logger.info('Node has already run, not running the node')
930
946
  else:
@@ -1108,6 +1124,17 @@ class FlowNode:
1108
1124
  if self.singular_input:
1109
1125
  return self.all_inputs[0]
1110
1126
 
1127
+ def clear_table_example(self) -> None:
1128
+ """
1129
+ Clear the table example in the results so that it clears the existing results
1130
+ Returns:
1131
+ None
1132
+ """
1133
+
1134
+ self.results.example_data = None
1135
+ self.results.example_data_generator = None
1136
+ self.results.example_data_path = None
1137
+
1111
1138
  def get_table_example(self, include_data: bool = False) -> TableExample | None:
1112
1139
  """Generates a `TableExample` model summarizing the node's output.
1113
1140
 
@@ -1136,10 +1163,15 @@ class FlowNode:
1136
1163
  data = []
1137
1164
  schema = [FileColumn.model_validate(c.get_column_repr()) for c in self.schema]
1138
1165
  fl = self.get_resulting_data()
1166
+ has_example_data = self.results.example_data_generator is not None
1167
+
1139
1168
  return TableExample(node_id=self.node_id,
1140
1169
  name=str(self.node_id), number_of_records=999,
1141
1170
  number_of_columns=fl.number_of_fields,
1142
- table_schema=schema, columns=fl.columns, data=data)
1171
+ table_schema=schema, columns=fl.columns, data=data,
1172
+ has_example_data=has_example_data,
1173
+ has_run_with_current_setup=self.node_stats.has_run_with_current_setup
1174
+ )
1143
1175
  else:
1144
1176
  logger.warning('getting the table example but the node has not run')
1145
1177
  try:
@@ -3,11 +3,25 @@ from dataclasses import dataclass
3
3
  from typing import Dict, List
4
4
  import os
5
5
  from pathlib import Path
6
+ from datetime import datetime
6
7
 
7
8
  from flowfile_core.flowfile.manage.open_flowfile import open_flow
8
9
  from flowfile_core.flowfile.flow_graph import FlowGraph
9
10
  from flowfile_core.schemas.schemas import FlowSettings
10
11
  from flowfile_core.flowfile.utils import create_unique_id
12
+ from shared.storage_config import storage
13
+
14
+
15
+ def get_flow_save_location(flow_name: str) -> Path:
16
+ """Gets the initial save location for flow files"""
17
+ if ".flowfile" not in flow_name:
18
+ flow_name += ".flowfile"
19
+ return storage.temp_directory_for_flows / flow_name
20
+
21
+
22
+ def create_flow_name() -> str:
23
+ """Creates a unique flow name"""
24
+ return datetime.now().strftime("%Y%m%d_%H_%M_%S")+"_flow.flowfile"
11
25
 
12
26
 
13
27
  @dataclass
@@ -57,7 +71,7 @@ class FlowfileHandler:
57
71
  else:
58
72
  raise Exception('Flow not found')
59
73
 
60
- def add_flow(self, name: str, flow_path: str) -> int:
74
+ def add_flow(self, name: str = None, flow_path: str = None) -> int:
61
75
  """
62
76
  Creates a new flow with a reference to the flow path
63
77
  Args:
@@ -69,8 +83,13 @@ class FlowfileHandler:
69
83
 
70
84
  """
71
85
  next_id = create_unique_id()
72
- flow_info = FlowSettings(name=name, flow_id=next_id, save_location='', path=flow_path)
73
- _ = self.register_flow(flow_info)
86
+ if not name:
87
+ name = create_flow_name()
88
+ if not flow_path:
89
+ flow_path = get_flow_save_location(name)
90
+ flow_info = FlowSettings(name=name, flow_id=next_id, save_location=str(flow_path), path=str(flow_path))
91
+ flow = self.register_flow(flow_info)
92
+ flow.save_flow(flow.flow_settings.path)
74
93
  return next_id
75
94
 
76
95
  def get_flow_info(self, flow_id: int) -> FlowSettings:
@@ -61,21 +61,25 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
61
61
  join_inputs=fm_input.left_select)
62
62
  _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
63
63
  join_inputs=fm_input.right_select)
64
+ for column in fm_input.left_select.renames:
65
+ if column.join_key:
66
+ column.keep = True
67
+ for column in fm_input.right_select.renames:
68
+ if column.join_key:
69
+ column.keep = True
64
70
  left_schema_dict, right_schema_dict = ({ls.name: ls for ls in left_schema}, {rs.name: rs for rs in right_schema})
65
71
  fm_input.auto_rename()
66
-
67
72
  right_renames = {column.old_name: column.new_name for column in fm_input.right_select.renames}
68
73
  new_join_mapping = rename_fuzzy_right_mapping(fm_input.join_mapping, right_renames)
69
-
70
74
  output_schema = []
71
75
  for column in fm_input.left_select.renames:
72
76
  column_schema = left_schema_dict.get(column.old_name)
73
- if column_schema and column.keep:
77
+ if column_schema and (column.keep or column.join_key):
74
78
  output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
75
79
  example_values=column_schema.example_values))
76
80
  for column in fm_input.right_select.renames:
77
81
  column_schema = right_schema_dict.get(column.old_name)
78
- if column_schema and column.keep:
82
+ if column_schema and (column.keep or column.join_key):
79
83
  output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
80
84
  example_values=column_schema.example_values))
81
85
  set_name_in_fuzzy_mappings(new_join_mapping)
@@ -56,7 +56,6 @@ def cross_join(node_data: "NodeData") -> NodeData:
56
56
  ji = transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
57
57
  right_select=node_data.right_input.columns)
58
58
  ji.auto_rename()
59
- print(ji)
60
59
  node_data.setting_input = input_schema.NodeCrossJoin(flow_id=node_data.flow_id,
61
60
  node_id=node_data.node_id,
62
61
  cross_join_input=ji)
flowfile_core/main.py CHANGED
@@ -7,6 +7,8 @@ import uvicorn
7
7
  from fastapi import FastAPI
8
8
  from fastapi.middleware.cors import CORSMiddleware
9
9
 
10
+ from shared.storage_config import storage
11
+
10
12
  from flowfile_core import ServerRun
11
13
  from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,)
12
14
 
@@ -18,13 +20,13 @@ from flowfile_core.routes.logs import router as logs_router
18
20
  from flowfile_core.routes.cloud_connections import router as cloud_connections_router
19
21
 
20
22
  from flowfile_core.configs.flow_logger import clear_all_flow_logs
23
+ storage.cleanup_directories()
21
24
 
22
25
  os.environ["FLOWFILE_MODE"] = "electron"
23
26
 
24
27
  should_exit = False
25
28
  server_instance = None
26
29
 
27
-
28
30
  @asynccontextmanager
29
31
  async def shutdown_handler(app: FastAPI):
30
32
  """Handles the graceful startup and shutdown of the FastAPI application.
@@ -79,6 +81,7 @@ app.include_router(secrets_router, prefix="/secrets", tags=["secrets"])
79
81
  app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=["cloud_connections"])
80
82
 
81
83
 
84
+
82
85
  @app.post("/shutdown")
83
86
  async def shutdown():
84
87
  """An API endpoint to gracefully shut down the server.
@@ -26,7 +26,7 @@ from flowfile_core.configs.node_store import nodes
26
26
  from flowfile_core.configs.settings import IS_RUNNING_IN_DOCKER
27
27
  # File handling
28
28
  from flowfile_core.fileExplorer.funcs import (
29
- FileExplorer,
29
+ SecureFileExplorer,
30
30
  FileInfo,
31
31
  get_files_from_directory
32
32
  )
@@ -39,9 +39,11 @@ from flowfile_core.flowfile.extensions import get_instant_func_results
39
39
  from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import create_sql_source_from_db_settings
40
40
  from flowfile_core.run_lock import get_flow_run_lock
41
41
  # Schema and models
42
+
43
+ from shared.storage_config import storage
42
44
  from flowfile_core.schemas import input_schema, schemas, output_model
43
45
  from flowfile_core.utils import excel_file_manager
44
- from flowfile_core.utils.fileManager import create_dir, remove_paths
46
+ from flowfile_core.utils.fileManager import create_dir
45
47
  from flowfile_core.utils.utils import camel_case_to_snake_case
46
48
  from flowfile_core import flow_file_handler
47
49
  from flowfile_core.flowfile.database_connection_manager.db_connections import (store_database_connection,
@@ -54,7 +56,10 @@ from flowfile_core.database.connection import get_db
54
56
  router = APIRouter(dependencies=[Depends(get_current_active_user)])
55
57
 
56
58
  # Initialize services
57
- file_explorer = FileExplorer('/app/shared' if IS_RUNNING_IN_DOCKER else None)
59
+ file_explorer = SecureFileExplorer(
60
+ start_path=storage.user_data_directory,
61
+ sandbox_root=storage.user_data_directory
62
+ )
58
63
 
59
64
 
60
65
  def get_node_model(setting_name_ref: str):
@@ -148,7 +153,7 @@ async def get_directory_contents(directory: str, file_types: List[str] = None,
148
153
  Returns:
149
154
  A list of `FileInfo` objects representing the directory's contents.
150
155
  """
151
- directory_explorer = FileExplorer(directory)
156
+ directory_explorer = SecureFileExplorer(directory, storage.user_data_directory)
152
157
  try:
153
158
  return directory_explorer.list_contents(show_hidden=include_hidden, file_types=file_types)
154
159
  except Exception as e:
@@ -198,6 +203,24 @@ async def get_active_flow_file_sessions() -> List[schemas.FlowSettings]:
198
203
  return [flf.flow_settings for flf in flow_file_handler.flowfile_flows]
199
204
 
200
205
 
206
+ @router.post("/node/trigger_fetch_data", tags=['editor'])
207
+ async def trigger_fetch_node_data(flow_id: int, node_id: int, background_tasks: BackgroundTasks):
208
+ """Fetches and refreshes the data for a specific node."""
209
+ flow = flow_file_handler.get_flow(flow_id)
210
+ lock = get_flow_run_lock(flow_id)
211
+ async with lock:
212
+ if flow.flow_settings.is_running:
213
+ raise HTTPException(422, 'Flow is already running')
214
+ try:
215
+ flow.validate_if_node_can_be_fetched(node_id)
216
+ except Exception as e:
217
+ raise HTTPException(422, str(e))
218
+ background_tasks.add_task(flow.trigger_fetch_node, node_id)
219
+ return JSONResponse(content={"message": "Data started",
220
+ "flow_id": flow_id,
221
+ "node_id": node_id}, status_code=status.HTTP_200_OK)
222
+
223
+
201
224
  @router.post('/flow/run/', tags=['editor'])
202
225
  async def run_flow(flow_id: int, background_tasks: BackgroundTasks) -> JSONResponse:
203
226
  """Executes a flow in a background task.
@@ -228,6 +251,16 @@ def cancel_flow(flow_id: int):
228
251
  flow.cancel()
229
252
 
230
253
 
254
+ @router.post("/flow/apply_standard_layout/", tags=["editor"])
255
+ def apply_standard_layout(flow_id: int):
256
+ flow = flow_file_handler.get_flow(flow_id)
257
+ if not flow:
258
+ raise HTTPException(status_code=404, detail="Flow not found")
259
+ if flow.flow_settings.is_running:
260
+ raise HTTPException(422, "Flow is running")
261
+ flow.apply_layout()
262
+
263
+
231
264
  @router.get('/flow/run_status/', tags=['editor'],
232
265
  response_model=output_model.RunInformation)
233
266
  def get_run_status(flow_id: int, response: Response):
@@ -238,10 +271,12 @@ def get_run_status(flow_id: int, response: Response):
238
271
  flow = flow_file_handler.get_flow(flow_id)
239
272
  if not flow:
240
273
  raise HTTPException(status_code=404, detail="Flow not found")
274
+ if flow.latest_run_info is None:
275
+ raise HTTPException(status_code=404, detail="No run information available")
241
276
  if flow.flow_settings.is_running:
242
277
  response.status_code = status.HTTP_202_ACCEPTED
243
- return flow.get_run_info()
244
- response.status_code = status.HTTP_200_OK
278
+ else:
279
+ response.status_code = status.HTTP_200_OK
245
280
  return flow.get_run_info()
246
281
 
247
282
 
@@ -439,11 +474,24 @@ def get_generated_code(flow_id: int) -> str:
439
474
 
440
475
 
441
476
  @router.post('/editor/create_flow/', tags=['editor'])
442
- def create_flow(flow_path: str):
477
+ def create_flow(flow_path: str = None, name: str = None):
443
478
  """Creates a new, empty flow file at the specified path and registers a session for it."""
444
- flow_path = Path(flow_path)
445
- logger.info('Creating flow')
446
- return flow_file_handler.add_flow(name=flow_path.stem, flow_path=str(flow_path))
479
+ if flow_path is not None and name is None:
480
+ name = Path(flow_path).stem
481
+ elif flow_path is not None and name is not None:
482
+ if name not in flow_path and flow_path.endswith(".flowfile"):
483
+ raise HTTPException(422, 'The name must be part of the flow path when a full path is provided')
484
+ elif name in flow_path and not flow_path.endswith(".flowfile"):
485
+ flow_path = str(Path(flow_path) / (name + ".flowfile"))
486
+ elif name not in flow_path and name.endswith(".flowfile"):
487
+ flow_path = str(Path(flow_path) / name)
488
+ elif name not in flow_path and not name.endswith(".flowfile"):
489
+ flow_path = str(Path(flow_path) / (name + ".flowfile"))
490
+ if flow_path is not None:
491
+ flow_path_ref = Path(flow_path)
492
+ if not flow_path_ref.parent.exists():
493
+ raise HTTPException(422, 'The directory does not exist')
494
+ return flow_file_handler.add_flow(name=name, flow_path=flow_path)
447
495
 
448
496
 
449
497
  @router.post('/editor/close_flow/', tags=['editor'])
@@ -471,6 +519,7 @@ def add_generic_settings(input_data: Dict[str, Any], node_type: str, current_use
471
519
  add_func = getattr(flow, 'add_' + node_type)
472
520
  parsed_input = None
473
521
  setting_name_ref = 'node' + node_type.replace('_', '')
522
+
474
523
  if add_func is None:
475
524
  raise HTTPException(404, 'could not find the function')
476
525
  try:
@@ -196,7 +196,6 @@ class NodeBase(BaseModel):
196
196
  user_id: Optional[int] = None
197
197
  is_flow_output: Optional[bool] = False
198
198
 
199
-
200
199
  class NodeSingleInput(NodeBase):
201
200
  """A base model for any node that takes a single data input."""
202
201
  depending_on_id: Optional[int] = -1
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Optional, Any
1
+ from typing import List, Dict, Optional, Any, Literal
2
2
  from pydantic import BaseModel, Field
3
3
  from datetime import datetime
4
4
  import time
@@ -21,10 +21,11 @@ class RunInformation(BaseModel):
21
21
  flow_id: int
22
22
  start_time: Optional[datetime] = Field(default_factory=datetime.now)
23
23
  end_time: Optional[datetime] = None
24
- success: bool
24
+ success: Optional[bool] = None
25
25
  nodes_completed: int = 0
26
26
  number_of_nodes: int = 0
27
27
  node_step_result: List[NodeResult]
28
+ run_type: Literal["fetch_one", "full_run"]
28
29
 
29
30
 
30
31
  class BaseItem(BaseModel):
@@ -61,6 +62,8 @@ class TableExample(BaseModel):
61
62
  table_schema: List[FileColumn]
62
63
  columns: List[str]
63
64
  data: Optional[List[Dict]] = {}
65
+ has_example_data: bool = False
66
+ has_run_with_current_setup: bool = False
64
67
 
65
68
 
66
69
  class NodeData(BaseModel):
@@ -138,6 +138,8 @@ class NodeTemplate(BaseModel):
138
138
  node_group: str
139
139
  prod_ready: bool = True
140
140
  can_be_start: bool = False
141
+ drawer_title: str = "Node title"
142
+ drawer_intro: str = "Drawer into"
141
143
 
142
144
 
143
145
  class NodeInformation(BaseModel):