Flowfile 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (98) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
  5. flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
  6. flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
  7. flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
  8. flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  9. flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
  10. flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
  11. flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
  12. flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
  13. flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
  14. flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
  15. flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
  16. flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
  17. flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
  18. flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
  19. flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
  20. flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
  21. flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
  22. flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
  23. flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
  24. flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
  25. flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
  26. flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
  27. flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
  28. flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
  29. flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
  30. flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
  31. flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
  32. flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
  33. flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
  34. flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
  35. flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
  36. flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
  37. flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
  38. flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
  39. flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
  40. flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
  41. flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
  42. flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
  43. flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
  44. flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
  45. flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
  46. flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
  47. flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
  48. flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
  49. flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
  50. flowfile/web/static/index.html +1 -1
  51. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/METADATA +2 -2
  52. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/RECORD +96 -94
  53. flowfile_core/__init__.py +1 -0
  54. flowfile_core/auth/jwt.py +39 -0
  55. flowfile_core/configs/node_store/nodes.py +1 -0
  56. flowfile_core/configs/settings.py +6 -5
  57. flowfile_core/flowfile/code_generator/code_generator.py +71 -0
  58. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -1
  59. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +597 -309
  60. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  61. flowfile_core/flowfile/flow_graph.py +619 -191
  62. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  63. flowfile_core/flowfile/flow_node/flow_node.py +500 -89
  64. flowfile_core/flowfile/flow_node/models.py +125 -20
  65. flowfile_core/flowfile/handler.py +2 -33
  66. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  67. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  68. flowfile_core/flowfile/utils.py +36 -5
  69. flowfile_core/main.py +32 -13
  70. flowfile_core/routes/cloud_connections.py +7 -11
  71. flowfile_core/routes/logs.py +2 -6
  72. flowfile_core/routes/public.py +1 -0
  73. flowfile_core/routes/routes.py +127 -51
  74. flowfile_core/routes/secrets.py +72 -14
  75. flowfile_core/schemas/__init__.py +8 -0
  76. flowfile_core/schemas/input_schema.py +92 -64
  77. flowfile_core/schemas/output_model.py +19 -3
  78. flowfile_core/schemas/schemas.py +144 -11
  79. flowfile_core/schemas/transform_schema.py +82 -17
  80. flowfile_frame/__init__.py +9 -1
  81. flowfile_frame/cloud_storage/__init__.py +0 -0
  82. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  83. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  84. flowfile_frame/expr.py +28 -1
  85. flowfile_frame/expr.pyi +76 -61
  86. flowfile_frame/flow_frame.py +232 -110
  87. flowfile_frame/flow_frame.pyi +140 -91
  88. flowfile_frame/flow_frame_methods.py +150 -12
  89. flowfile_frame/group_frame.py +3 -0
  90. flowfile_frame/utils.py +25 -3
  91. test_utils/s3/data_generator.py +1 -0
  92. test_utils/s3/demo_data_generator.py +186 -0
  93. test_utils/s3/fixtures.py +6 -1
  94. flowfile_core/schemas/defaults.py +0 -9
  95. flowfile_core/schemas/models.py +0 -193
  96. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  97. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  98. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +0 -0
@@ -1,15 +1,32 @@
1
1
 
2
2
  import pyarrow as pa
3
3
  from typing import List, Union, Callable, Optional, Literal
4
+ from dataclasses import dataclass
5
+
6
+ # Forward declaration for type hints to avoid circular imports
7
+ if False:
8
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
4
9
 
5
10
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
6
11
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
7
12
  from flowfile_core.schemas import schemas
8
- from dataclasses import dataclass
9
13
 
10
14
 
11
15
  @dataclass
12
16
  class NodeStepPromise:
17
+ """
18
+ A lightweight, temporary representation of a node during the initial
19
+ graph construction phase, before full `FlowNode` objects are created.
20
+
21
+ Attributes:
22
+ node_id: The unique identifier for the node.
23
+ name: The display name of the node.
24
+ is_start: A boolean indicating if this is a starting node with no inputs.
25
+ leads_to_id: A list of node IDs that this node connects to.
26
+ left_input: The ID of the node connected to the left input port.
27
+ right_input: The ID of the node connected to the right input port.
28
+ depends_on: A list of node IDs that this node depends on for main inputs.
29
+ """
13
30
  node_id: Union[str, int]
14
31
  name: str
15
32
  is_start: bool
@@ -20,6 +37,9 @@ class NodeStepPromise:
20
37
 
21
38
 
22
39
  class NodeStepStats:
40
+ """
41
+ Tracks the execution status and statistics of a `FlowNode`.
42
+ """
23
43
  error: str = None
24
44
  _has_run_with_current_setup: bool = False
25
45
  has_completed_last_run: bool = False
@@ -31,23 +51,46 @@ class NodeStepStats:
31
51
  has_completed_last_run: bool = False,
32
52
  active: bool = True,
33
53
  is_canceled: bool = False):
54
+ """
55
+ Initializes the node's statistics.
56
+
57
+ :param error: Any error message from the last run.
58
+ :param has_run_with_current_setup: Flag indicating if the node has run successfully with its current configuration.
59
+ :param has_completed_last_run: Flag indicating if the last triggered run finished (successfully or not).
60
+ :param active: Flag indicating if the node is active in the flow.
61
+ :param is_canceled: Flag indicating if the last run was canceled.
62
+ """
34
63
  self.error = error
35
64
  self._has_run_with_current_setup = has_run_with_current_setup
36
65
  self.has_completed_last_run = has_completed_last_run
37
66
  self.active = active
38
67
  self.is_canceled = is_canceled
39
68
 
40
- def __repr__(self):
69
+ def __repr__(self) -> str:
70
+ """
71
+ Provides a string representation of the node's stats.
72
+ :return: A string detailing the current stats.
73
+ """
41
74
  return (f"NodeStepStats(error={self.error}, has_run_with_current_setup={self.has_run_with_current_setup}, "
42
75
  f"has_completed_last_run={self.has_completed_last_run}, "
43
76
  f"active={self.active}, is_canceled={self.is_canceled})")
44
77
 
45
78
  @property
46
79
  def has_run_with_current_setup(self) -> bool:
80
+ """
81
+ Checks if the node has run successfully with its current settings and inputs.
82
+ This is the primary flag for caching.
83
+ :return: True if the node is considered up-to-date, False otherwise.
84
+ """
47
85
  return self._has_run_with_current_setup
48
86
 
49
87
  @has_run_with_current_setup.setter
50
88
  def has_run_with_current_setup(self, value: bool):
89
+ """
90
+ Sets the run status of the node.
91
+ If set to True, it implies the last run was completed successfully.
92
+ :param value: The new boolean status.
93
+ """
51
94
  if value:
52
95
  self._has_run_with_current_setup = True
53
96
  self.has_completed_last_run = True
@@ -56,6 +99,17 @@ class NodeStepStats:
56
99
 
57
100
 
58
101
  class NodeStepSettings:
102
+ """
103
+ Holds the configuration settings that control a node's execution behavior.
104
+
105
+ Attributes:
106
+ cache_results: If True, the node will cache its results to avoid re-computation.
107
+ renew_schema: If True, the schema will be re-evaluated on changes.
108
+ streamable: If True, the node can process data in a streaming fashion.
109
+ setup_errors: If True, indicates a non-blocking error occurred during setup.
110
+ breaking_setup_errors: If True, indicates an error occurred that prevents execution.
111
+ execute_location: The preferred location for execution ('auto', 'local', 'remote').
112
+ """
59
113
  cache_results: bool = False
60
114
  renew_schema: bool = True
61
115
  streamable: bool = True
@@ -65,20 +119,40 @@ class NodeStepSettings:
65
119
 
66
120
 
67
121
  class NodeStepInputs:
122
+ """
123
+ Manages the input connections for a `FlowNode`.
124
+
125
+ Attributes:
126
+ left_input: The `FlowNode` connected to the left input port.
127
+ right_input: The `FlowNode` connected to the right input port.
128
+ main_inputs: A list of `FlowNode` objects connected to the main input port(s).
129
+ """
68
130
  left_input: "FlowNode" = None
69
131
  right_input: "FlowNode" = None
70
132
  main_inputs: List["FlowNode"] = None
71
133
 
72
134
  @property
73
- def input_ids(self) -> List[int]:
135
+ def input_ids(self) -> List[int] | None:
136
+ """
137
+ Gets the IDs of all connected input nodes.
138
+ :return: A list of integer node IDs.
139
+ """
74
140
  if self.main_inputs is not None:
75
141
  return [node_input.node_information.id for node_input in self.get_all_inputs()]
76
142
 
77
143
  def get_all_inputs(self) -> List["FlowNode"]:
144
+ """
145
+ Retrieves a single list containing all input nodes (main, left, and right).
146
+ :return: A list of all connected `FlowNode` objects.
147
+ """
78
148
  main_inputs = self.main_inputs or []
79
149
  return [v for v in main_inputs + [self.left_input, self.right_input] if v is not None]
80
150
 
81
151
  def __repr__(self) -> str:
152
+ """
153
+ Provides a string representation of the node's inputs.
154
+ :return: A string detailing the connected inputs.
155
+ """
82
156
  left_repr = f"Left Input: {self.left_input}" if self.left_input else "Left Input: None"
83
157
  right_repr = f"Right Input: {self.right_input}" if self.right_input else "Right Input: None"
84
158
  main_inputs_repr = f"Main Inputs: {self.main_inputs}" if self.main_inputs else "Main Inputs: None"
@@ -86,27 +160,46 @@ class NodeStepInputs:
86
160
 
87
161
  def validate_if_input_connection_exists(self, node_input_id: int,
88
162
  connection_name: Literal['main', 'left', 'right']) -> bool:
89
- if connection_name == 'main':
90
- return any((node_input.node_information.id == node_input_id for node_input in self.main_inputs))
91
- if connection_name == 'left':
163
+ """
164
+ Checks if a connection from a specific node ID exists on a given port.
165
+
166
+ :param node_input_id: The ID of the source node to check for.
167
+ :param connection_name: The name of the input port ('main', 'left', 'right').
168
+ :return: True if the connection exists, False otherwise.
169
+ """
170
+ if connection_name == 'main' and self.main_inputs:
171
+ return any(node_input.node_information.id == node_input_id for node_input in self.main_inputs)
172
+ if connection_name == 'left' and self.left_input:
92
173
  return self.left_input.node_information.id == node_input_id
93
174
  if connection_name == 'right':
94
175
  return self.right_input.node_information.id == node_input_id
95
176
 
96
177
 
97
178
  class NodeSchemaInformation:
98
- result_schema: Optional[List[FlowfileColumn]] = None # resulting schema of the function
99
- predicted_schema: Optional[List[FlowfileColumn]] = None # predicted resulting schema of the function
100
- input_columns: List[str] = [] # columns that are needed for the function
101
- drop_columns: List[str] = [] # columns that will not be available after the function
102
- output_columns: List[FlowfileColumn] = [] # columns that will be added with the function
179
+ """
180
+ Stores all schema-related information for a `FlowNode`.
181
+
182
+ Attributes:
183
+ result_schema: The actual output schema after a successful execution.
184
+ predicted_schema: The predicted output schema, calculated without full execution.
185
+ input_columns: A list of column names the node requires from its inputs.
186
+ drop_columns: A list of column names that will be dropped by the node.
187
+ output_columns: A list of `FlowfileColumn` objects that will be added by the node.
188
+ """
189
+ result_schema: Optional[List[FlowfileColumn]] = None
190
+ predicted_schema: Optional[List[FlowfileColumn]] = None
191
+ input_columns: List[str] = []
192
+ drop_columns: List[str] = []
193
+ output_columns: List[FlowfileColumn] = []
103
194
 
104
195
 
105
196
  class NodeResults:
106
- _resulting_data: Optional[FlowDataEngine] = None # after successful execution this will contain the Flowfile
107
- example_data: Optional[
108
- FlowDataEngine] = None # after success this will contain a sample of the data (to provide frontend data)
109
- example_data_path: Optional[str] = None # Path to the arrow table file
197
+ """
198
+ Stores the outputs of a `FlowNode`'s execution, including data, errors, and metadata.
199
+ """
200
+ _resulting_data: Optional[FlowDataEngine] = None
201
+ example_data: Optional[FlowDataEngine] = None
202
+ example_data_path: Optional[str] = None
110
203
  example_data_generator: Optional[Callable[[], pa.Table]] = None
111
204
  run_time: int = -1
112
205
  errors: Optional[str] = None
@@ -122,19 +215,31 @@ class NodeResults:
122
215
  self.example_data_generator = None
123
216
  self.analysis_data_generator = None
124
217
 
125
- def get_example_data(self) -> pa.Table | None:
218
+ def get_example_data(self) -> Optional[pa.Table]:
219
+ """
220
+ Executes the generator to fetch a sample of the resulting data.
221
+ :return: A PyArrow Table containing a sample of the data, or None.
222
+ """
126
223
  if self.example_data_generator:
127
224
  return self.example_data_generator()
128
225
 
129
226
  @property
130
- def resulting_data(self) -> FlowDataEngine:
227
+ def resulting_data(self) -> Optional[FlowDataEngine]:
228
+ """
229
+ Gets the full resulting data from the node's execution.
230
+ :return: A `FlowDataEngine` instance containing the result, or None.
231
+ """
131
232
  return self._resulting_data
132
233
 
133
234
  @resulting_data.setter
134
- def resulting_data(self, d: FlowDataEngine):
235
+ def resulting_data(self, d: Optional[FlowDataEngine]):
236
+ """
237
+ Sets the resulting data.
238
+ :param d: The `FlowDataEngine` instance to store.
239
+ """
135
240
  self._resulting_data = d
136
241
 
137
242
  def reset(self):
243
+ """Resets all result attributes to their default, empty state."""
138
244
  self._resulting_data = None
139
- self.run_time = -1
140
-
245
+ self.run_time = -1
@@ -1,8 +1,3 @@
1
- import time
2
- import random
3
- import uuid
4
- import socket
5
- import hashlib
6
1
 
7
2
  from dataclasses import dataclass
8
3
  from typing import Dict, List
@@ -12,33 +7,7 @@ from pathlib import Path
12
7
  from flowfile_core.flowfile.manage.open_flowfile import open_flow
13
8
  from flowfile_core.flowfile.flow_graph import FlowGraph
14
9
  from flowfile_core.schemas.schemas import FlowSettings
15
- from flowfile_core.configs import logger
16
-
17
-
18
- def create_unique_id() -> int:
19
- """
20
- Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
21
- Returns:
22
- int: unique id within 32 bits (4 bytes)
23
- """
24
- # Get various entropy sources
25
- time_ms = int(time.time() * 1000)
26
- pid = os.getpid()
27
- random_bytes = random.getrandbits(32)
28
- mac_addr = uuid.getnode() # MAC address as integer
29
- hostname = socket.gethostname()
30
-
31
- # Combine all sources into a string
32
- seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
33
-
34
- # Create a hash of all entropy sources
35
- hash_obj = hashlib.md5(seed.encode())
36
- hash_int = int(hash_obj.hexdigest(), 16)
37
-
38
- # Ensure the result fits within 32 bits (4 bytes)
39
- unique_id = hash_int & 0xFFFFFFFF
40
-
41
- return unique_id
10
+ from flowfile_core.flowfile.utils import create_unique_id
42
11
 
43
12
 
44
13
  @dataclass
@@ -71,7 +40,7 @@ class FlowfileHandler:
71
40
  raise 'flow already registered'
72
41
  else:
73
42
  name = flow_settings.name if flow_settings.name else flow_settings.flow_id
74
- self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_id=flow_settings.flow_id, flow_settings=flow_settings)
43
+ self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_settings=flow_settings)
75
44
  return self.get_flow(flow_settings.flow_id)
76
45
 
77
46
  def get_flow(self, flow_id: int) -> FlowGraph | None:
@@ -70,8 +70,7 @@ def open_flow(flow_path: Path) -> FlowGraph:
70
70
  flow_storage_obj.flow_name = str(flow_path.stem)
71
71
  ensure_compatibility(flow_storage_obj, str(flow_path))
72
72
  ingestion_order = determine_insertion_order(flow_storage_obj)
73
- new_flow = FlowGraph(flow_id=flow_storage_obj.flow_id, name=flow_storage_obj.flow_name,
74
- flow_settings=flow_storage_obj.flow_settings)
73
+ new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
75
74
  for node_id in ingestion_order:
76
75
  node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
77
76
  node_promise = input_schema.NodePromise(flow_id=new_flow.flow_id, node_id=node_info.id,
@@ -4,7 +4,6 @@ from collections import defaultdict, deque
4
4
  from typing import List, Dict, Set, Tuple, TYPE_CHECKING
5
5
 
6
6
  if TYPE_CHECKING:
7
- # Make sure this import path is correct for your project structure
8
7
  from flowfile_core.flowfile.flow_graph import FlowGraph
9
8
 
10
9
 
@@ -64,7 +63,6 @@ def calculate_layered_layout(
64
63
  if child_node.node_id in node_ids:
65
64
  if child_node.node_id not in adj[node.node_id]:
66
65
  adj[node.node_id].append(child_node.node_id)
67
- # Assuming primary method works or in_degree is handled elsewhere
68
66
  in_degree[child_node.node_id] += 1
69
67
 
70
68
  stages: Dict[int, List[int]] = defaultdict(list)
@@ -1,11 +1,15 @@
1
1
  import os
2
- import hashlib
3
2
  import json
4
3
  import shutil
5
4
 
6
- from datetime import datetime, date, time
5
+ import datetime
7
6
  from typing import List
8
7
  from decimal import Decimal
8
+ import time
9
+ import random
10
+ import uuid
11
+ import socket
12
+ import hashlib
9
13
 
10
14
 
11
15
  def generate_sha256_hash(data: bytes):
@@ -26,11 +30,11 @@ def snake_case_to_camel_case(text: str) -> str:
26
30
 
27
31
 
28
32
  def json_default(val):
29
- if isinstance(val, datetime):
33
+ if isinstance(val, datetime.datetime):
30
34
  return val.isoformat(timespec='microseconds')
31
- elif isinstance(val, date):
35
+ elif isinstance(val, datetime.date):
32
36
  return val.isoformat()
33
- elif isinstance(val, time):
37
+ elif isinstance(val, datetime.time):
34
38
  return val.isoformat()
35
39
  elif hasattr(val, '__dict__'):
36
40
  return val.__dict__
@@ -113,3 +117,30 @@ def batch_generator(input_list: List, batch_size: int = 10000):
113
117
  yield input_list
114
118
  input_list = []
115
119
  run = False
120
+
121
+
122
+ def create_unique_id() -> int:
123
+ """
124
+ Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
125
+ Returns:
126
+ int: unique id within 32 bits (4 bytes)
127
+ """
128
+ # Get various entropy sources
129
+ time_ms = int(time.time() * 1000)
130
+ pid = os.getpid()
131
+ random_bytes = random.getrandbits(32)
132
+ mac_addr = uuid.getnode() # MAC address as integer
133
+ hostname = socket.gethostname()
134
+
135
+ # Combine all sources into a string
136
+ seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
137
+
138
+ # Create a hash of all entropy sources
139
+
140
+ hash_obj = hashlib.sha256(seed.encode())
141
+ hash_int = int(hash_obj.hexdigest(), 16)
142
+
143
+ # Ensure the result fits within 32 bits (4 bytes)
144
+ unique_id = hash_int & 0xFFFFFFFF
145
+
146
+ return unique_id
flowfile_core/main.py CHANGED
@@ -27,7 +27,11 @@ server_instance = None
27
27
 
28
28
  @asynccontextmanager
29
29
  async def shutdown_handler(app: FastAPI):
30
- """Handle graceful shutdown of the application."""
30
+ """Handles the graceful startup and shutdown of the FastAPI application.
31
+
32
+ This context manager ensures that resources, such as log files, are cleaned
33
+ up properly when the application is terminated.
34
+ """
31
35
  print('Starting core application...')
32
36
  try:
33
37
  yield
@@ -77,31 +81,46 @@ app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=[
77
81
 
78
82
  @app.post("/shutdown")
79
83
  async def shutdown():
80
- """Endpoint to handle graceful shutdown"""
81
- ServerRun.exit = True
82
- print(f"ServerRun.exit = {ServerRun.exit}")
83
- if server_instance:
84
- # Schedule the shutdown
85
- await asyncio.create_task(trigger_shutdown())
86
- return {"message": "Shutting down"}
84
+ """An API endpoint to gracefully shut down the server.
85
+
86
+ This endpoint sets a flag that the Uvicorn server checks, allowing it
87
+ to terminate cleanly. A background task is used to trigger the shutdown
88
+ after the HTTP response has been sent.
89
+ """
90
+ # Use a background task to trigger the shutdown after the response is sent
91
+ background_tasks = ServerRun()
92
+ background_tasks.add_task(trigger_shutdown)
93
+ return {"message": "Server is shutting down"}
87
94
 
88
95
 
89
96
  async def trigger_shutdown():
90
- """Trigger the actual shutdown after responding to the client"""
91
- await asyncio.sleep(1) # Give time for the response to be sent
97
+ """(Internal) Triggers the actual server shutdown.
98
+
99
+ Waits for a moment to allow the `/shutdown` response to be sent before
100
+ telling the Uvicorn server instance to exit.
101
+ """
102
+ await asyncio.sleep(1)
92
103
  if server_instance:
93
104
  server_instance.should_exit = True
94
105
 
95
106
 
96
107
  def signal_handler(signum, frame):
97
- """Handle shutdown signals"""
108
+ """Handles OS signals like SIGINT (Ctrl+C) and SIGTERM for graceful shutdown."""
98
109
  print(f"Received signal {signum}")
99
110
  if server_instance:
100
111
  server_instance.should_exit = True
101
112
 
102
113
 
103
114
  def run(host: str = None, port: int = None):
104
- """Run the FastAPI app with graceful shutdown"""
115
+ """Runs the FastAPI application using Uvicorn.
116
+
117
+ This function configures and starts the Uvicorn server, setting up
118
+ signal handlers to ensure a graceful shutdown.
119
+
120
+ Args:
121
+ host: The host to bind the server to. Defaults to `SERVER_HOST` from settings.
122
+ port: The port to bind the server to. Defaults to `SERVER_PORT` from settings.
123
+ """
105
124
  global server_instance
106
125
 
107
126
  # Use values from settings if not explicitly provided
@@ -134,7 +153,7 @@ def run(host: str = None, port: int = None):
134
153
  print("Received interrupt signal, shutting down...")
135
154
  finally:
136
155
  server_instance = None
137
- print("Core server shutdown complete")
156
+ print("Server has shut down.")
138
157
 
139
158
 
140
159
  if __name__ == "__main__":
@@ -27,13 +27,11 @@ def create_cloud_storage_connection(input_connection: FullCloudStorageConnection
27
27
  """
28
28
  Create a new cloud storage connection.
29
29
  Parameters
30
- ----------
31
- input_connection: FullCloudStorageConnection schema containing connection details
32
- current_user: User obtained from Depends(get_current_active_user)
33
- db: Session obtained from Depends(get_db)
30
+ input_connection: FullCloudStorageConnection schema containing connection details
31
+ current_user: User obtained from Depends(get_current_active_user)
32
+ db: Session obtained from Depends(get_db)
34
33
  Returns
35
- -------
36
- Dict with a success message
34
+ Dict with a success message
37
35
  """
38
36
  logger.info(f'Create cloud connection {input_connection.connection_name}')
39
37
  try:
@@ -70,12 +68,10 @@ def get_cloud_connections(
70
68
  """
71
69
  Get all cloud storage connections for the current user.
72
70
  Parameters
73
- ----------
74
- db: Session obtained from Depends(get_db)
75
- current_user: User obtained from Depends(get_current_active_user)
71
+ db: Session obtained from Depends(get_db)
72
+ current_user: User obtained from Depends(get_current_active_user)
76
73
 
77
74
  Returns
78
- -------
79
- List[FullCloudStorageConnectionInterface]
75
+ List[FullCloudStorageConnectionInterface]
80
76
  """
81
77
  return get_all_cloud_connections_interface(db, current_user.id)
@@ -33,9 +33,7 @@ async def format_sse_message(data: str) -> str:
33
33
 
34
34
  @router.post("/logs/{flow_id}", tags=['flow_logging'])
35
35
  async def add_log(flow_id: int, log_message: str):
36
- """
37
- Adds a log message to the log file for a given flow_id.
38
- """
36
+ """Adds a log message to the log file for a given flow_id."""
39
37
  flow = flow_file_handler.get_flow(flow_id)
40
38
  if not flow:
41
39
  raise HTTPException(status_code=404, detail="Flow not found")
@@ -45,9 +43,7 @@ async def add_log(flow_id: int, log_message: str):
45
43
 
46
44
  @router.post("/raw_logs", tags=['flow_logging'])
47
45
  async def add_raw_log(raw_log_input: schemas.RawLogInput):
48
- """
49
- Adds a log message to the log file for a given flow_id.
50
- """
46
+ """Adds a log message to the log file for a given flow_id."""
51
47
  logger.info('Adding raw logs')
52
48
  flow = flow_file_handler.get_flow(raw_log_input.flowfile_flow_id)
53
49
  if not flow:
@@ -7,4 +7,5 @@ router = APIRouter()
7
7
 
8
8
  @router.get("/", tags=['admin'])
9
9
  async def docs_redirect():
10
+ """ Redirects to the documentation page."""
10
11
  return RedirectResponse(url='/docs')