Flowfile 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (169) hide show
  1. flowfile/__init__.py +6 -1
  2. flowfile/api.py +0 -1
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-109ecc3c.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-19cdd67a.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-48e0ae20.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  8. flowfile/web/static/assets/ColumnSelector-ecaf7c44.js +83 -0
  9. flowfile/web/static/assets/ContextMenu-2b348c4c.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-a779eed7.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-eca26a03.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-a88f8142.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-cb863dff.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-819d3267.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-84ee2834.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-060dd412.js} +14 -114
  22. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-7fc7750f.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-82c95991.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-e1a6ddc7.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-8aca894a.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  30. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-e33686d9.js} +18 -85
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-abda150d.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-4ecad1d7.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-656d07f3.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-b84ec849.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-346f4135.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-61b98268.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-2a7c8312.js +63 -0
  43. flowfile/web/static/assets/NumericInput-e36602c2.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-211a1990.js +35 -0
  45. flowfile/web/static/assets/Output-ddc9079f.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-eb041599.js} +13 -243
  47. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  48. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-f5c774f4.js} +14 -138
  49. flowfile/web/static/assets/PivotValidation-26546cbc.js +61 -0
  50. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  52. flowfile/web/static/assets/PivotValidation-e150a24b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-da3a7abf.js} +13 -80
  54. flowfile/web/static/assets/Read-0c768769.js +243 -0
  55. flowfile/web/static/assets/Read-6b17491f.css +62 -0
  56. flowfile/web/static/assets/RecordCount-84736276.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-60055e6d.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-8a486004.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-2d662611.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-ef586cab.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-2e4a6965.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-310b61c0.js} +2 -40
  65. flowfile/web/static/assets/SettingsSection-5634f439.js +45 -0
  66. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  67. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  68. flowfile/web/static/assets/SettingsSection-7c68b19f.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-7298811a.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-43807bad.js +62 -0
  71. flowfile/web/static/assets/SliderInput-53105476.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-4fdebe74.js} +12 -97
  75. flowfile/web/static/assets/TextInput-28366b7e.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-9cad14ba.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-73ffa692.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-598add30.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-f620cd32.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-66239e83.js} +2 -2
  82. flowfile/web/static/assets/Union-26b10614.js +77 -0
  83. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-33b9edbb.js} +22 -91
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-ef69d0e2.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-8658388e.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-4d7861f4.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-023d1733.js → api-2d1394bd.js} +1 -1
  93. flowfile/web/static/assets/{api-cb00cce6.js → api-c908fffe.js} +1 -1
  94. flowfile/web/static/assets/{designer-6c322d8e.js → designer-1667687d.js} +2201 -705
  95. flowfile/web/static/assets/{designer-2197d782.css → designer-665e9408.css} +836 -201
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-5eed779e.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-41ebe3c2.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-0670d32d.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-38410ebf.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5ec791df.js} +210 -31
  102. flowfile/web/static/assets/outputCsv-059583b6.js +86 -0
  103. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  104. flowfile/web/static/assets/outputExcel-76b1e02c.js +56 -0
  105. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  106. flowfile/web/static/assets/outputParquet-440fd4c7.js +31 -0
  107. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  108. flowfile/web/static/assets/readCsv-9813903a.js +178 -0
  109. flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
  110. flowfile/web/static/assets/readExcel-7f40d237.js +203 -0
  111. flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
  112. flowfile/web/static/assets/readParquet-22d56002.js +26 -0
  113. flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
  114. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-b3cb072e.js} +1 -1
  115. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-7ad95bca.js} +7 -7
  116. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  117. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  118. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-b1dfaa46.js} +59 -33
  119. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-22bac17c.js} +1 -1
  120. flowfile/web/static/index.html +2 -2
  121. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/METADATA +1 -1
  122. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/RECORD +160 -102
  123. flowfile_core/configs/flow_logger.py +5 -13
  124. flowfile_core/configs/node_store/__init__.py +30 -0
  125. flowfile_core/configs/node_store/nodes.py +383 -99
  126. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  127. flowfile_core/configs/settings.py +2 -1
  128. flowfile_core/database/connection.py +5 -21
  129. flowfile_core/fileExplorer/funcs.py +239 -121
  130. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  131. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  132. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  133. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +28 -8
  134. flowfile_core/flowfile/flow_graph.py +117 -34
  135. flowfile_core/flowfile/flow_node/flow_node.py +45 -13
  136. flowfile_core/flowfile/handler.py +22 -3
  137. flowfile_core/flowfile/manage/open_flowfile.py +9 -1
  138. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  139. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  140. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  141. flowfile_core/flowfile/node_designer/data_types.py +146 -0
  142. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  143. flowfile_core/flowfile/schema_callbacks.py +8 -4
  144. flowfile_core/flowfile/setting_generator/settings.py +0 -1
  145. flowfile_core/main.py +5 -1
  146. flowfile_core/routes/routes.py +73 -28
  147. flowfile_core/routes/user_defined_components.py +55 -0
  148. flowfile_core/schemas/input_schema.py +7 -1
  149. flowfile_core/schemas/output_model.py +5 -2
  150. flowfile_core/schemas/schemas.py +8 -3
  151. flowfile_core/schemas/transform_schema.py +1 -0
  152. flowfile_core/utils/validate_setup.py +3 -1
  153. flowfile_worker/__init__.py +6 -35
  154. flowfile_worker/main.py +5 -2
  155. flowfile_worker/routes.py +47 -5
  156. shared/__init__.py +15 -0
  157. shared/storage_config.py +258 -0
  158. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  159. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  160. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  161. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  162. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  163. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  164. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  165. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  166. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  167. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/LICENSE +0 -0
  168. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/WHEEL +0 -0
  169. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,55 @@
1
+
2
+ from typing import Dict, Any
3
+
4
+ from fastapi import APIRouter, HTTPException, Depends
5
+
6
+ from flowfile_core import flow_file_handler
7
+ # Core modules
8
+ from flowfile_core.auth.jwt import get_current_active_user
9
+ from flowfile_core.configs import logger
10
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
11
+ # File handling
12
+ from flowfile_core.schemas import input_schema
13
+ from flowfile_core.utils.utils import camel_case_to_snake_case
14
+
15
+ # External dependencies
16
+
17
+
18
+ router = APIRouter()
19
+
20
+
21
+ @router.get("/custom-node-schema", summary="Get a simple UI schema")
22
+ def get_simple_custom_object(flow_id: int, node_id: int):
23
+ """
24
+ This endpoint returns a hardcoded JSON object that represents the UI
25
+ for our SimpleFilterNode.
26
+ """
27
+ try:
28
+ node = flow_file_handler.get_node(flow_id=flow_id, node_id=node_id)
29
+ except Exception as e:
30
+ raise HTTPException(status_code=404, detail=str(e))
31
+ user_defined_node = CUSTOM_NODE_STORE.get(node.node_type)
32
+
33
+ if not user_defined_node:
34
+ raise HTTPException(status_code=404, detail=f"Node type '{node.node_type}' not found")
35
+ if node.is_setup:
36
+ settings = node.setting_input.settings
37
+ return user_defined_node.from_settings(settings).get_frontend_schema()
38
+ return user_defined_node().get_frontend_schema()
39
+
40
+
41
+ @router.post("/update_user_defined_node", tags=["transform"])
42
+ def update_user_defined_node(input_data: Dict[str, Any], node_type: str, current_user=Depends(get_current_active_user)):
43
+ input_data['user_id'] = current_user.id
44
+ node_type = camel_case_to_snake_case(node_type)
45
+ flow_id = int(input_data.get('flow_id'))
46
+ logger.info(f'Updating the data for flow: {flow_id}, node {input_data["node_id"]}')
47
+ flow = flow_file_handler.get_flow(flow_id)
48
+ user_defined_model = CUSTOM_NODE_STORE.get(node_type)
49
+ if not user_defined_model:
50
+ raise HTTPException(status_code=404, detail=f"Node type '{node_type}' not found")
51
+
52
+ user_defined_node_settings = input_schema.UserDefinedNode.model_validate(input_data)
53
+ initialized_model = user_defined_model.from_settings(user_defined_node_settings.settings)
54
+
55
+ flow.add_user_defined_node(custom_node=initialized_model, user_defined_node_settings=user_defined_node_settings)
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Literal, Iterator
1
+ from typing import List, Optional, Literal, Iterator, Any
2
2
  from flowfile_core.schemas import transform_schema
3
3
  from pathlib import Path
4
4
  import os
@@ -195,6 +195,7 @@ class NodeBase(BaseModel):
195
195
  description: Optional[str] = ''
196
196
  user_id: Optional[int] = None
197
197
  is_flow_output: Optional[bool] = False
198
+ is_user_defined: Optional[bool] = False # Indicator if the node is a user defined node
198
199
 
199
200
 
200
201
  class NodeSingleInput(NodeBase):
@@ -517,3 +518,8 @@ class NodeRecordCount(NodeSingleInput):
517
518
  class NodePolarsCode(NodeMultiInput):
518
519
  """Settings for a node that executes arbitrary user-provided Polars code."""
519
520
  polars_code_input: transform_schema.PolarsCodeInput
521
+
522
+
523
+ class UserDefinedNode(NodeMultiInput):
524
+ """Settings for a node that contains the user defined node information"""
525
+ settings: Any
@@ -1,4 +1,4 @@
1
- from typing import List, Dict, Optional, Any
1
+ from typing import List, Dict, Optional, Any, Literal
2
2
  from pydantic import BaseModel, Field
3
3
  from datetime import datetime
4
4
  import time
@@ -21,10 +21,11 @@ class RunInformation(BaseModel):
21
21
  flow_id: int
22
22
  start_time: Optional[datetime] = Field(default_factory=datetime.now)
23
23
  end_time: Optional[datetime] = None
24
- success: bool
24
+ success: Optional[bool] = None
25
25
  nodes_completed: int = 0
26
26
  number_of_nodes: int = 0
27
27
  node_step_result: List[NodeResult]
28
+ run_type: Literal["fetch_one", "full_run"]
28
29
 
29
30
 
30
31
  class BaseItem(BaseModel):
@@ -61,6 +62,8 @@ class TableExample(BaseModel):
61
62
  table_schema: List[FileColumn]
62
63
  columns: List[str]
63
64
  data: Optional[List[Dict]] = {}
65
+ has_example_data: bool = False
66
+ has_run_with_current_setup: bool = False
64
67
 
65
68
 
66
69
  class NodeData(BaseModel):
@@ -5,6 +5,9 @@ from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
5
5
  ExecutionModeLiteral = Literal['Development', 'Performance']
6
6
  ExecutionLocationsLiteral = Literal['local', 'remote']
7
7
 
8
+ # Type literals for classifying nodes.
9
+ NodeTypeLiteral = Literal['input', 'output', 'process']
10
+ TransformTypeLiteral = Literal['narrow', 'wide', 'other']
8
11
 
9
12
  def get_global_execution_location() -> ExecutionLocationsLiteral:
10
13
  """
@@ -135,9 +138,14 @@ class NodeTemplate(BaseModel):
135
138
  output: int
136
139
  image: str
137
140
  multi: bool = False
141
+ node_type: NodeTypeLiteral
142
+ transform_type: TransformTypeLiteral
138
143
  node_group: str
139
144
  prod_ready: bool = True
140
145
  can_be_start: bool = False
146
+ drawer_title: str = "Node title"
147
+ drawer_intro: str = "Drawer into"
148
+ custom_node: Optional[bool] = False
141
149
 
142
150
 
143
151
  class NodeInformation(BaseModel):
@@ -261,9 +269,6 @@ class VueFlowInput(BaseModel):
261
269
  node_inputs: List[NodeInput]
262
270
 
263
271
 
264
- # Type literals for classifying nodes.
265
- NodeTypeLiteral = Literal['input', 'output', 'process']
266
- TransformTypeLiteral = Literal['narrow', 'wide', 'other']
267
272
 
268
273
 
269
274
  class NodeDefault(BaseModel):
@@ -8,6 +8,7 @@ from typing import NamedTuple
8
8
 
9
9
  from pl_fuzzy_frame_match.models import FuzzyMapping
10
10
 
11
+ FuzzyMap = FuzzyMapping # For backwards compatibility
11
12
 
12
13
  def get_func_type_mapping(func: str):
13
14
  """Infers the output data type of common aggregation functions."""
@@ -3,7 +3,7 @@ as have a component in flowfile_frontend"""
3
3
 
4
4
  from flowfile_core.schemas import input_schema
5
5
  from flowfile_core.flowfile.flow_graph import FlowGraph
6
- from flowfile_core.configs.node_store.nodes import nodes_list, NodeTemplate
6
+ from flowfile_core.configs.node_store import nodes_list, NodeTemplate
7
7
  import inspect
8
8
 
9
9
 
@@ -31,6 +31,8 @@ def validate_setup():
31
31
  Raises ValueError if any node is missing either.
32
32
  """
33
33
  for node in nodes_list:
34
+ if node.custom_node:
35
+ continue
34
36
  check_if_node_has_add_function_in_flow_graph(node)
35
37
  check_if_node_has_input_schema_definition(node)
36
38
 
@@ -1,15 +1,15 @@
1
1
  from typing import Dict
2
- import tempfile
3
2
  import threading
4
3
  import multiprocessing
5
- import os
6
- import shutil
7
- multiprocessing.set_start_method('spawn', force=True)
4
+ from shared.storage_config import storage
8
5
 
6
+ multiprocessing.set_start_method('spawn', force=True)
9
7
 
10
8
  from multiprocessing import get_context
11
9
  from flowfile_worker.models import Status
10
+
12
11
  mp_context = get_context("spawn")
12
+
13
13
  status_dict: Dict[str, Status] = dict()
14
14
  process_dict = dict()
15
15
 
@@ -17,39 +17,10 @@ status_dict_lock = threading.Lock()
17
17
  process_dict_lock = threading.Lock()
18
18
 
19
19
 
20
- class SharedTempDirectory:
21
- """A class that mimics tempfile.TemporaryDirectory but uses a fixed directory"""
22
- def __init__(self, dir_path):
23
- self._path = dir_path
24
- os.makedirs(self._path, exist_ok=True)
25
-
26
- @property
27
- def name(self):
28
- return self._path
29
-
30
- def cleanup(self):
31
- """Remove all contents of the temp directory"""
32
- try:
33
- shutil.rmtree(self._path)
34
- os.makedirs(self._path, exist_ok=True)
35
- print(f"Cleaned up temporary directory: {self._path}")
36
- except Exception as e:
37
- print(f"Error during cleanup: {e}")
38
-
39
- def __enter__(self):
40
- return self.name
41
-
42
- def __exit__(self, exc, value, tb):
43
- self.cleanup()
44
-
45
-
46
20
  CACHE_EXPIRATION_TIME = 24 * 60 * 60
47
21
 
48
22
 
49
- TEMP_DIR = os.getenv('TEMP_DIR')
50
- if TEMP_DIR:
51
- CACHE_DIR = SharedTempDirectory(TEMP_DIR)
52
- else:
53
- CACHE_DIR = tempfile.TemporaryDirectory()
23
+ CACHE_DIR = storage.cache_directory
24
+
54
25
 
55
26
  PROCESS_MEMORY_USAGE: Dict[str, float] = dict()
flowfile_worker/main.py CHANGED
@@ -4,8 +4,11 @@ import signal
4
4
 
5
5
  from contextlib import asynccontextmanager
6
6
  from fastapi import FastAPI
7
+
8
+ from shared.storage_config import storage
9
+
7
10
  from flowfile_worker.routes import router
8
- from flowfile_worker import mp_context, CACHE_DIR
11
+ from flowfile_worker import mp_context
9
12
  from flowfile_worker.configs import logger, FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT
10
13
 
11
14
 
@@ -30,7 +33,7 @@ async def shutdown_handler(app: FastAPI):
30
33
  logger.error(f"Error cleaning up process: {e}")
31
34
 
32
35
  try:
33
- CACHE_DIR.cleanup()
36
+ storage.cleanup_directories()
34
37
  except Exception as e:
35
38
  print(f"Error cleaning up cache directory: {e}")
36
39
 
flowfile_worker/routes.py CHANGED
@@ -17,13 +17,21 @@ from flowfile_worker.external_sources.sql_source.main import read_sql_source, wr
17
17
  router = APIRouter()
18
18
 
19
19
 
20
+ def create_and_get_default_cache_dir(flowfile_flow_id: int) -> str:
21
+ default_cache_dir = CACHE_DIR / str(flowfile_flow_id)
22
+ default_cache_dir.mkdir(parents=True, exist_ok=True)
23
+ return str(default_cache_dir)
24
+
25
+
20
26
  @router.post("/submit_query/")
21
27
  def submit_query(polars_script: models.PolarsScript, background_tasks: BackgroundTasks) -> models.Status:
22
28
  logger.info(f"Processing query with operation: {polars_script.operation_type}")
23
29
 
24
30
  try:
25
31
  polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
26
- polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else CACHE_DIR.name
32
+ default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
33
+
34
+ polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
27
35
  polars_serializable_object = polars_script.polars_serializable_object()
28
36
  file_path = os.path.join(polars_script.cache_dir, f"{polars_script.task_id}.arrow")
29
37
  result_type = "polars" if polars_script.operation_type == "store" else "other"
@@ -49,8 +57,9 @@ def store_sample(polars_script: models.PolarsScriptSample, background_tasks: Bac
49
57
  logger.info(f"Processing sample storage with size: {polars_script.sample_size}")
50
58
 
51
59
  try:
60
+ default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
52
61
  polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
53
- polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else CACHE_DIR.name
62
+ polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
54
63
  polars_serializable_object = polars_script.polars_serializable_object()
55
64
 
56
65
  file_path = os.path.join(polars_script.cache_dir, f"{polars_script.task_id}.arrow")
@@ -210,7 +219,8 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
210
219
 
211
220
  try:
212
221
  task_id = str(uuid.uuid4())
213
- file_path = os.path.join(CACHE_DIR.name, f"{task_id}.arrow")
222
+ file_path = os.path.join(create_and_get_default_cache_dir(database_read_settings.flowfile_flow_id),
223
+ f"{task_id}.arrow")
214
224
  status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_path,
215
225
  result_type="polars")
216
226
  status_dict[task_id] = status
@@ -246,7 +256,7 @@ def create_table(file_type: FileType, received_table: Dict, background_tasks: Ba
246
256
 
247
257
  try:
248
258
  task_id = str(uuid.uuid4())
249
- file_ref = os.path.join(CACHE_DIR.name, f"{task_id}.arrow")
259
+ file_ref = os.path.join(create_and_get_default_cache_dir(flowfile_flow_id), f"{task_id}.arrow")
250
260
 
251
261
  status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_ref,
252
262
  result_type="polars")
@@ -382,8 +392,9 @@ async def add_fuzzy_join(polars_script: models.FuzzyJoinInput, background_tasks:
382
392
  """
383
393
  logger.info("Starting fuzzy join operation")
384
394
  try:
395
+ default_cache_dir = create_and_get_default_cache_dir(polars_script.flowfile_flow_id)
385
396
  polars_script.task_id = str(uuid.uuid4()) if polars_script.task_id is None else polars_script.task_id
386
- polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else CACHE_DIR.name
397
+ polars_script.cache_dir = polars_script.cache_dir if polars_script.cache_dir is not None else default_cache_dir
387
398
  left_serializable_object = polars_script.left_df_operation.polars_serializable_object()
388
399
  right_serializable_object = polars_script.right_df_operation.polars_serializable_object()
389
400
 
@@ -405,6 +416,37 @@ async def add_fuzzy_join(polars_script: models.FuzzyJoinInput, background_tasks:
405
416
  raise HTTPException(status_code=500, detail=str(e))
406
417
 
407
418
 
419
+ @router.delete("/clear_task/{task_id}")
420
+ def clear_task(task_id: str):
421
+ """
422
+ Clear task data and status by ID.
423
+
424
+ Args:
425
+ task_id: Unique identifier of the task to clear
426
+ Returns:
427
+ dict: Success message
428
+ Raises:
429
+ HTTPException: If task not found
430
+ """
431
+
432
+ logger.info(f"Clearing task: {task_id}")
433
+ status = status_dict.get(task_id)
434
+ if not status:
435
+ logger.warning(f"Task not found for clearing: {task_id}")
436
+ raise HTTPException(status_code=404, detail="Task not found")
437
+ try:
438
+ if os.path.exists(status.file_ref):
439
+ os.remove(status.file_ref)
440
+ logger.debug(f"Removed file: {status.file_ref}")
441
+ except Exception as e:
442
+ logger.error(f"Error removing file {status.file_ref}: {str(e)}", exc_info=True)
443
+ with status_dict_lock:
444
+ status_dict.pop(task_id, None)
445
+ PROCESS_MEMORY_USAGE.pop(task_id, None)
446
+ logger.info(f"Successfully cleared task: {task_id}")
447
+ return {"message": f"Task {task_id} has been cleared."}
448
+
449
+
408
450
  @router.post("/cancel_task/{task_id}")
409
451
  def cancel_task(task_id: str):
410
452
  """Cancel a running task by ID.
shared/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ Shared utilities for Flowfile services.
3
+ This package contains common functionality that can be used across
4
+ flowfile_core, flowfile_worker, and other components without creating
5
+ circular dependencies.
6
+ """
7
+
8
+ from .storage_config import storage, get_cache_directory, get_temp_directory, get_flows_directory
9
+
10
+ __all__ = [
11
+ 'storage',
12
+ 'get_cache_directory',
13
+ 'get_temp_directory',
14
+ 'get_flows_directory'
15
+ ]
@@ -0,0 +1,258 @@
1
+ # shared/storage_config.py - Updated for Option 3
2
+ """
3
+ Centralized storage configuration for Flowfile.
4
+ This module can be imported by both core and worker without creating dependencies.
5
+ """
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Optional, Literal
9
+
10
+ DirectoryOptions = Literal["temp_directory", "logs_directory",
11
+ "system_logs_directory", "database_directory",
12
+ "cache_directory", "flows_directory", "user_defined_nodes_directory"]
13
+
14
+
15
+ class FlowfileStorage:
16
+ """Centralized storage manager for Flowfile applications."""
17
+
18
+ def __init__(self):
19
+ self._base_dir: Optional[Path] = None
20
+ self._user_data_dir: Optional[Path] = None
21
+ self._ensure_directories()
22
+
23
+ @property
24
+ def base_directory(self) -> Path:
25
+ """Get the base Flowfile storage directory (for internal container communication)."""
26
+ if self._base_dir is None:
27
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
28
+ # In Docker, internal storage stays inside /app
29
+ base_path = os.environ.get("FLOWFILE_STORAGE_DIR", "/app/internal_storage")
30
+ else:
31
+ # Local development
32
+ base_path = os.environ.get("FLOWFILE_STORAGE_DIR")
33
+ if not base_path:
34
+ home_dir = Path.home()
35
+ base_path = home_dir / ".flowfile"
36
+
37
+ self._base_dir = Path(base_path)
38
+ return self._base_dir
39
+
40
+ @property
41
+ def user_data_directory(self) -> Path:
42
+ """Get the user data directory (completely separate from application code)."""
43
+ if self._user_data_dir is None:
44
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
45
+ # In Docker, user data is at /data/user (completely outside /app)
46
+ user_data_path = os.environ.get("FLOWFILE_USER_DATA_DIR", "/data/user")
47
+ else:
48
+ # Local development - use user's home directory
49
+ user_data_path = Path.home()
50
+
51
+ self._user_data_dir = Path(user_data_path)
52
+ return self._user_data_dir
53
+
54
+ @property
55
+ def cache_directory(self) -> Path:
56
+ """Cache directory for worker-core communication (internal)."""
57
+ return self.base_directory / "cache"
58
+
59
+ def get_flow_cache_directory(self, flow_id: int) -> Path:
60
+ """Get or create a cache directory for a specific flow (internal)."""
61
+ flow_cache_dir = self.cache_directory / str(flow_id)
62
+ flow_cache_dir.mkdir(parents=True, exist_ok=True)
63
+ return flow_cache_dir
64
+
65
+ @property
66
+ def system_logs_directory(self) -> Path:
67
+ """Directory for system logs (internal)."""
68
+ return self.base_directory / "system_logs"
69
+
70
+ @property
71
+ def flows_directory(self) -> Path:
72
+ """Directory for flow storage (user-accessible)."""
73
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
74
+ # In Docker, flows are in separate user data area
75
+ return self.user_data_directory / "flows"
76
+ else:
77
+ # Local development - flows in ~/.flowfile/flows
78
+ return self.base_directory / "flows"
79
+
80
+ @property
81
+ def uploads_directory(self) -> Path:
82
+ """Directory for user uploads (user-accessible)."""
83
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
84
+ # In Docker, uploads are in separate user data area
85
+ return self.user_data_directory / "uploads"
86
+ else:
87
+ # Local development - uploads in ~/.flowfile/uploads
88
+ return self.base_directory / "uploads"
89
+
90
+ @property
91
+ def user_defined_nodes_directory(self) -> Path:
92
+ """Directory for user-defined custom nodes (user-accessible)."""
93
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
94
+ return self.user_data_directory / "user_defined_nodes"
95
+ else:
96
+ return self.base_directory / "user_defined_nodes"
97
+
98
+ @property
99
+ def user_defined_nodes_icons(self) -> Path:
100
+ """Directory for user-defined custom node icon (user-accessible)."""
101
+ return self.user_defined_nodes_directory / "icons"
102
+
103
+ @property
104
+ def outputs_directory(self) -> Path:
105
+ """Directory for user outputs (user-accessible)."""
106
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
107
+ # In Docker, outputs are in separate user data area
108
+ return self.user_data_directory / "outputs"
109
+ else:
110
+ # Local development - outputs in ~/.flowfile/outputs
111
+ return self.base_directory / "outputs"
112
+
113
+ @property
114
+ def database_directory(self) -> Path:
115
+ """Directory for local database files (internal)."""
116
+ return self.base_directory / "database"
117
+
118
+ @property
119
+ def logs_directory(self) -> Path:
120
+ """Directory for application logs (internal)."""
121
+ return self.base_directory / "logs"
122
+
123
+ @property
124
+ def temp_directory(self) -> Path:
125
+ """Directory for temporary files (internal)."""
126
+ return self.base_directory / "temp"
127
+
128
+ @property
129
+ def temp_directory_for_flows(self) -> Path:
130
+ """Directory for temporary files specific to flows (internal)."""
131
+ return self.temp_directory / "flows"
132
+
133
+ def _ensure_directories(self) -> None:
134
+ """Create all necessary directories if they don't exist."""
135
+ # Internal directories (always created in base_directory)
136
+ internal_directories = [
137
+ self.cache_directory,
138
+ self.database_directory,
139
+ self.logs_directory,
140
+ self.temp_directory,
141
+ self.system_logs_directory,
142
+ self.temp_directory_for_flows,
143
+ ]
144
+
145
+ # User-accessible directories (location depends on environment)
146
+ user_directories = [
147
+ self.flows_directory,
148
+ self.uploads_directory,
149
+ self.outputs_directory,
150
+ self.user_defined_nodes_directory,
151
+ self.user_defined_nodes_icons,
152
+ ]
153
+
154
+ for directory in internal_directories + user_directories:
155
+ directory.mkdir(parents=True, exist_ok=True)
156
+
157
+ def get_cache_file_path(self, filename: str) -> Path:
158
+ """Get full path for a cache file (internal)."""
159
+ return self.cache_directory / filename
160
+
161
+ def get_flow_file_path(self, filename: str) -> Path:
162
+ """Get full path for a flow file (user-accessible)."""
163
+ return self.flows_directory / filename
164
+
165
+ def get_upload_file_path(self, filename: str) -> Path:
166
+ """Get full path for an uploaded file (user-accessible)."""
167
+ return self.uploads_directory / filename
168
+
169
+ def get_output_file_path(self, filename: str) -> Path:
170
+ """Get full path for an output file (user-accessible)."""
171
+ return self.outputs_directory / filename
172
+
173
+ def get_log_file_path(self, filename: str) -> Path:
174
+ """Get full path for an application log file (internal)."""
175
+ return self.logs_directory / filename
176
+
177
+ def get_system_log_file_path(self, filename: str) -> Path:
178
+ """Get full path for a system log file (internal)."""
179
+ return self.system_logs_directory / filename
180
+
181
+ def get_temp_file_path(self, filename: str) -> Path:
182
+ """Get full path for a temporary file (internal)."""
183
+ return self.temp_directory / filename
184
+
185
+ def cleanup_directory(self, directory_option: DirectoryOptions, storage_duration_hours: int = 24) -> None:
186
+ """Clean up any directory of the folder"""
187
+ import time
188
+ import shutil
189
+
190
+ if not hasattr(self, directory_option):
191
+ raise Exception(f"Directory does not exist in {self.base_directory}")
192
+
193
+ directory = getattr(self, directory_option)
194
+ if not isinstance(directory, Path):
195
+ raise Exception(f"Directory attribute {directory_option} is not a Path object")
196
+
197
+ if not directory.exists():
198
+ return
199
+
200
+ current_time = time.time()
201
+ cutoff_time = current_time - (storage_duration_hours * 60 * 60)
202
+
203
+ for item in directory.iterdir():
204
+ try:
205
+ if item.stat().st_mtime < cutoff_time:
206
+ if item.is_file():
207
+ item.unlink()
208
+ elif item.is_dir():
209
+ shutil.rmtree(item)
210
+ except (OSError, FileNotFoundError):
211
+ # Handle permission errors or files that disappeared
212
+ continue
213
+
214
+ def cleanup_directories(self) -> None:
215
+ """Clean up temporary files older than specified hours."""
216
+ self.cleanup_directory("temp_directory", storage_duration_hours=24)
217
+ self.cleanup_directory("cache_directory", storage_duration_hours=1)
218
+ self.cleanup_directory("logs_directory", storage_duration_hours=168)
219
+ self.cleanup_directory("system_logs_directory", storage_duration_hours=168)
220
+
221
+
222
+ storage = FlowfileStorage()
223
+
224
+
225
+ # Convenience functions for backward compatibility
226
+ def get_cache_directory() -> str:
227
+ """Get cache directory path as string."""
228
+ return str(storage.cache_directory)
229
+
230
+
231
+ def get_temp_directory() -> str:
232
+ """Get temp directory path as string."""
233
+ return str(storage.temp_directory)
234
+
235
+
236
+ def get_flows_directory() -> str:
237
+ """Get flows directory path as string."""
238
+ return str(storage.flows_directory)
239
+
240
+
241
+ def get_uploads_directory() -> str:
242
+ """Get uploads directory path as string."""
243
+ return str(storage.uploads_directory)
244
+
245
+
246
+ def get_outputs_directory() -> str:
247
+ """Get outputs directory path as string."""
248
+ return str(storage.outputs_directory)
249
+
250
+
251
+ def get_logs_directory() -> str:
252
+ """Get application logs directory path as string."""
253
+ return str(storage.logs_directory)
254
+
255
+
256
+ def get_system_logs_directory() -> str:
257
+ """Get system logs directory path as string."""
258
+ return str(storage.system_logs_directory)