Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (145) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  5. flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
  6. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  7. flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
  8. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  9. flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
  10. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
  11. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
  14. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
  15. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
  16. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
  17. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
  20. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
  21. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
  22. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
  23. flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
  24. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  25. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
  26. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  27. flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
  28. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
  29. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
  30. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
  31. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
  32. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
  33. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
  34. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
  35. flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
  36. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
  37. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
  38. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
  39. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
  40. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
  41. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
  42. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
  43. flowfile/web/static/assets/api-6ef0dcef.js +80 -0
  44. flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
  45. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  46. flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
  47. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
  48. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
  49. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
  50. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
  51. flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
  52. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
  53. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
  54. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
  55. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
  56. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
  57. flowfile/web/static/index.html +1 -1
  58. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
  59. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
  60. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
  61. flowfile_core/__init__.py +3 -0
  62. flowfile_core/auth/jwt.py +39 -0
  63. flowfile_core/configs/node_store/nodes.py +9 -6
  64. flowfile_core/configs/settings.py +6 -5
  65. flowfile_core/database/connection.py +63 -15
  66. flowfile_core/database/init_db.py +0 -1
  67. flowfile_core/database/models.py +49 -2
  68. flowfile_core/flowfile/code_generator/code_generator.py +472 -17
  69. flowfile_core/flowfile/connection_manager/models.py +1 -1
  70. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  71. flowfile_core/flowfile/extensions.py +1 -1
  72. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  73. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  74. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
  75. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  76. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  77. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  78. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  79. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  80. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  81. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  82. flowfile_core/flowfile/flow_graph.py +718 -253
  83. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  84. flowfile_core/flowfile/flow_node/flow_node.py +563 -117
  85. flowfile_core/flowfile/flow_node/models.py +154 -20
  86. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  87. flowfile_core/flowfile/handler.py +2 -33
  88. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  89. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  90. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  91. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  92. flowfile_core/flowfile/utils.py +35 -26
  93. flowfile_core/main.py +35 -15
  94. flowfile_core/routes/cloud_connections.py +77 -0
  95. flowfile_core/routes/logs.py +2 -7
  96. flowfile_core/routes/public.py +1 -0
  97. flowfile_core/routes/routes.py +130 -90
  98. flowfile_core/routes/secrets.py +72 -14
  99. flowfile_core/schemas/__init__.py +8 -0
  100. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  101. flowfile_core/schemas/input_schema.py +121 -71
  102. flowfile_core/schemas/output_model.py +19 -3
  103. flowfile_core/schemas/schemas.py +150 -12
  104. flowfile_core/schemas/transform_schema.py +175 -35
  105. flowfile_core/utils/utils.py +40 -1
  106. flowfile_core/utils/validate_setup.py +41 -0
  107. flowfile_frame/__init__.py +9 -1
  108. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  109. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  110. flowfile_frame/expr.py +28 -1
  111. flowfile_frame/expr.pyi +76 -61
  112. flowfile_frame/flow_frame.py +481 -208
  113. flowfile_frame/flow_frame.pyi +140 -91
  114. flowfile_frame/flow_frame_methods.py +160 -22
  115. flowfile_frame/group_frame.py +3 -0
  116. flowfile_frame/utils.py +25 -3
  117. flowfile_worker/external_sources/s3_source/main.py +216 -0
  118. flowfile_worker/external_sources/s3_source/models.py +142 -0
  119. flowfile_worker/funcs.py +51 -6
  120. flowfile_worker/models.py +22 -2
  121. flowfile_worker/routes.py +40 -38
  122. flowfile_worker/utils.py +1 -1
  123. test_utils/s3/commands.py +46 -0
  124. test_utils/s3/data_generator.py +292 -0
  125. test_utils/s3/demo_data_generator.py +186 -0
  126. test_utils/s3/fixtures.py +214 -0
  127. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  128. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  129. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  130. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  131. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  132. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  133. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  134. flowfile_core/schemas/defaults.py +0 -9
  135. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  136. flowfile_core/schemas/models.py +0 -193
  137. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  138. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  139. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  140. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  141. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  142. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  143. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
  144. {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  145. {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
@@ -1,15 +1,32 @@
1
1
 
2
2
  import pyarrow as pa
3
3
  from typing import List, Union, Callable, Optional, Literal
4
+ from dataclasses import dataclass
5
+
6
+ # Forward declaration for type hints to avoid circular imports
7
+ if False:
8
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
4
9
 
5
10
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
6
11
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
7
12
  from flowfile_core.schemas import schemas
8
- from dataclasses import dataclass
9
13
 
10
14
 
11
15
  @dataclass
12
16
  class NodeStepPromise:
17
+ """
18
+ A lightweight, temporary representation of a node during the initial
19
+ graph construction phase, before full `FlowNode` objects are created.
20
+
21
+ Attributes:
22
+ node_id: The unique identifier for the node.
23
+ name: The display name of the node.
24
+ is_start: A boolean indicating if this is a starting node with no inputs.
25
+ leads_to_id: A list of node IDs that this node connects to.
26
+ left_input: The ID of the node connected to the left input port.
27
+ right_input: The ID of the node connected to the right input port.
28
+ depends_on: A list of node IDs that this node depends on for main inputs.
29
+ """
13
30
  node_id: Union[str, int]
14
31
  name: str
15
32
  is_start: bool
@@ -20,13 +37,79 @@ class NodeStepPromise:
20
37
 
21
38
 
22
39
  class NodeStepStats:
40
+ """
41
+ Tracks the execution status and statistics of a `FlowNode`.
42
+ """
23
43
  error: str = None
24
- has_run: bool = False
44
+ _has_run_with_current_setup: bool = False
45
+ has_completed_last_run: bool = False
25
46
  active: bool = True
26
47
  is_canceled: bool = False
27
48
 
49
+ def __init__(self, error: str = None,
50
+ has_run_with_current_setup: bool = False,
51
+ has_completed_last_run: bool = False,
52
+ active: bool = True,
53
+ is_canceled: bool = False):
54
+ """
55
+ Initializes the node's statistics.
56
+
57
+ :param error: Any error message from the last run.
58
+ :param has_run_with_current_setup: Flag indicating if the node has run successfully with its current configuration.
59
+ :param has_completed_last_run: Flag indicating if the last triggered run finished (successfully or not).
60
+ :param active: Flag indicating if the node is active in the flow.
61
+ :param is_canceled: Flag indicating if the last run was canceled.
62
+ """
63
+ self.error = error
64
+ self._has_run_with_current_setup = has_run_with_current_setup
65
+ self.has_completed_last_run = has_completed_last_run
66
+ self.active = active
67
+ self.is_canceled = is_canceled
68
+
69
+ def __repr__(self) -> str:
70
+ """
71
+ Provides a string representation of the node's stats.
72
+ :return: A string detailing the current stats.
73
+ """
74
+ return (f"NodeStepStats(error={self.error}, has_run_with_current_setup={self.has_run_with_current_setup}, "
75
+ f"has_completed_last_run={self.has_completed_last_run}, "
76
+ f"active={self.active}, is_canceled={self.is_canceled})")
77
+
78
+ @property
79
+ def has_run_with_current_setup(self) -> bool:
80
+ """
81
+ Checks if the node has run successfully with its current settings and inputs.
82
+ This is the primary flag for caching.
83
+ :return: True if the node is considered up-to-date, False otherwise.
84
+ """
85
+ return self._has_run_with_current_setup
86
+
87
+ @has_run_with_current_setup.setter
88
+ def has_run_with_current_setup(self, value: bool):
89
+ """
90
+ Sets the run status of the node.
91
+ If set to True, it implies the last run was completed successfully.
92
+ :param value: The new boolean status.
93
+ """
94
+ if value:
95
+ self._has_run_with_current_setup = True
96
+ self.has_completed_last_run = True
97
+ else:
98
+ self._has_run_with_current_setup = False
99
+
28
100
 
29
101
  class NodeStepSettings:
102
+ """
103
+ Holds the configuration settings that control a node's execution behavior.
104
+
105
+ Attributes:
106
+ cache_results: If True, the node will cache its results to avoid re-computation.
107
+ renew_schema: If True, the schema will be re-evaluated on changes.
108
+ streamable: If True, the node can process data in a streaming fashion.
109
+ setup_errors: If True, indicates a non-blocking error occurred during setup.
110
+ breaking_setup_errors: If True, indicates an error occurred that prevents execution.
111
+ execute_location: The preferred location for execution ('auto', 'local', 'remote').
112
+ """
30
113
  cache_results: bool = False
31
114
  renew_schema: bool = True
32
115
  streamable: bool = True
@@ -36,20 +119,40 @@ class NodeStepSettings:
36
119
 
37
120
 
38
121
  class NodeStepInputs:
122
+ """
123
+ Manages the input connections for a `FlowNode`.
124
+
125
+ Attributes:
126
+ left_input: The `FlowNode` connected to the left input port.
127
+ right_input: The `FlowNode` connected to the right input port.
128
+ main_inputs: A list of `FlowNode` objects connected to the main input port(s).
129
+ """
39
130
  left_input: "FlowNode" = None
40
131
  right_input: "FlowNode" = None
41
132
  main_inputs: List["FlowNode"] = None
42
133
 
43
134
  @property
44
- def input_ids(self) -> List[int]:
135
+ def input_ids(self) -> List[int] | None:
136
+ """
137
+ Gets the IDs of all connected input nodes.
138
+ :return: A list of integer node IDs.
139
+ """
45
140
  if self.main_inputs is not None:
46
141
  return [node_input.node_information.id for node_input in self.get_all_inputs()]
47
142
 
48
143
  def get_all_inputs(self) -> List["FlowNode"]:
144
+ """
145
+ Retrieves a single list containing all input nodes (main, left, and right).
146
+ :return: A list of all connected `FlowNode` objects.
147
+ """
49
148
  main_inputs = self.main_inputs or []
50
149
  return [v for v in main_inputs + [self.left_input, self.right_input] if v is not None]
51
150
 
52
151
  def __repr__(self) -> str:
152
+ """
153
+ Provides a string representation of the node's inputs.
154
+ :return: A string detailing the connected inputs.
155
+ """
53
156
  left_repr = f"Left Input: {self.left_input}" if self.left_input else "Left Input: None"
54
157
  right_repr = f"Right Input: {self.right_input}" if self.right_input else "Right Input: None"
55
158
  main_inputs_repr = f"Main Inputs: {self.main_inputs}" if self.main_inputs else "Main Inputs: None"
@@ -57,27 +160,46 @@ class NodeStepInputs:
57
160
 
58
161
  def validate_if_input_connection_exists(self, node_input_id: int,
59
162
  connection_name: Literal['main', 'left', 'right']) -> bool:
60
- if connection_name == 'main':
61
- return any((node_input.node_information.id == node_input_id for node_input in self.main_inputs))
62
- if connection_name == 'left':
163
+ """
164
+ Checks if a connection from a specific node ID exists on a given port.
165
+
166
+ :param node_input_id: The ID of the source node to check for.
167
+ :param connection_name: The name of the input port ('main', 'left', 'right').
168
+ :return: True if the connection exists, False otherwise.
169
+ """
170
+ if connection_name == 'main' and self.main_inputs:
171
+ return any(node_input.node_information.id == node_input_id for node_input in self.main_inputs)
172
+ if connection_name == 'left' and self.left_input:
63
173
  return self.left_input.node_information.id == node_input_id
64
174
  if connection_name == 'right':
65
175
  return self.right_input.node_information.id == node_input_id
66
176
 
67
177
 
68
178
  class NodeSchemaInformation:
69
- result_schema: Optional[List[FlowfileColumn]] = [] # resulting schema of the function
70
- predicted_schema: Optional[List[FlowfileColumn]] = [] # predicted resulting schema of the function
71
- input_columns: List[str] = [] # columns that are needed for the function
72
- drop_columns: List[str] = [] # columns that will not be available after the function
73
- output_columns: List[FlowfileColumn] = [] # columns that will be added with the function
179
+ """
180
+ Stores all schema-related information for a `FlowNode`.
181
+
182
+ Attributes:
183
+ result_schema: The actual output schema after a successful execution.
184
+ predicted_schema: The predicted output schema, calculated without full execution.
185
+ input_columns: A list of column names the node requires from its inputs.
186
+ drop_columns: A list of column names that will be dropped by the node.
187
+ output_columns: A list of `FlowfileColumn` objects that will be added by the node.
188
+ """
189
+ result_schema: Optional[List[FlowfileColumn]] = None
190
+ predicted_schema: Optional[List[FlowfileColumn]] = None
191
+ input_columns: List[str] = []
192
+ drop_columns: List[str] = []
193
+ output_columns: List[FlowfileColumn] = []
74
194
 
75
195
 
76
196
  class NodeResults:
77
- _resulting_data: Optional[FlowDataEngine] = None # after successful execution this will contain the Flowfile
78
- example_data: Optional[
79
- FlowDataEngine] = None # after success this will contain a sample of the data (to provide frontend data)
80
- example_data_path: Optional[str] = None # Path to the arrow table file
197
+ """
198
+ Stores the outputs of a `FlowNode`'s execution, including data, errors, and metadata.
199
+ """
200
+ _resulting_data: Optional[FlowDataEngine] = None
201
+ example_data: Optional[FlowDataEngine] = None
202
+ example_data_path: Optional[str] = None
81
203
  example_data_generator: Optional[Callable[[], pa.Table]] = None
82
204
  run_time: int = -1
83
205
  errors: Optional[str] = None
@@ -93,19 +215,31 @@ class NodeResults:
93
215
  self.example_data_generator = None
94
216
  self.analysis_data_generator = None
95
217
 
96
- def get_example_data(self) -> pa.Table | None:
218
+ def get_example_data(self) -> Optional[pa.Table]:
219
+ """
220
+ Executes the generator to fetch a sample of the resulting data.
221
+ :return: A PyArrow Table containing a sample of the data, or None.
222
+ """
97
223
  if self.example_data_generator:
98
224
  return self.example_data_generator()
99
225
 
100
226
  @property
101
- def resulting_data(self) -> FlowDataEngine:
227
+ def resulting_data(self) -> Optional[FlowDataEngine]:
228
+ """
229
+ Gets the full resulting data from the node's execution.
230
+ :return: A `FlowDataEngine` instance containing the result, or None.
231
+ """
102
232
  return self._resulting_data
103
233
 
104
234
  @resulting_data.setter
105
- def resulting_data(self, d: FlowDataEngine):
235
+ def resulting_data(self, d: Optional[FlowDataEngine]):
236
+ """
237
+ Sets the resulting data.
238
+ :param d: The `FlowDataEngine` instance to store.
239
+ """
106
240
  self._resulting_data = d
107
241
 
108
242
  def reset(self):
243
+ """Resets all result attributes to their default, empty state."""
109
244
  self._resulting_data = None
110
- self.run_time = -1
111
-
245
+ self.run_time = -1
@@ -14,6 +14,7 @@ class SingleExecutionFuture(Generic[T]):
14
14
  func: Callable[[], T]
15
15
  on_error: Optional[Callable[[Exception], Any]]
16
16
  result_value: Optional[T]
17
+ has_run_at_least_once: bool = False # Indicates if the function has been run at least once
17
18
 
18
19
  def __init__(
19
20
  self,
@@ -26,6 +27,7 @@ class SingleExecutionFuture(Generic[T]):
26
27
  self.func = func
27
28
  self.on_error = on_error
28
29
  self.result_value = None
30
+ self.has_run_at_least_once = False
29
31
 
30
32
  def start(self) -> None:
31
33
  """Start the function execution if not already started."""
@@ -35,8 +37,7 @@ class SingleExecutionFuture(Generic[T]):
35
37
 
36
38
  def cleanup(self) -> None:
37
39
  """Clean up resources by clearing the future and shutting down the executor."""
38
- # if self.future:
39
- # self.future = None
40
+ self.has_run_at_least_once = True
40
41
  self.executor.shutdown(wait=False)
41
42
 
42
43
  def __call__(self) -> Optional[T]:
@@ -1,8 +1,3 @@
1
- import time
2
- import random
3
- import uuid
4
- import socket
5
- import hashlib
6
1
 
7
2
  from dataclasses import dataclass
8
3
  from typing import Dict, List
@@ -12,33 +7,7 @@ from pathlib import Path
12
7
  from flowfile_core.flowfile.manage.open_flowfile import open_flow
13
8
  from flowfile_core.flowfile.flow_graph import FlowGraph
14
9
  from flowfile_core.schemas.schemas import FlowSettings
15
- from flowfile_core.configs import logger
16
-
17
-
18
- def create_unique_id() -> int:
19
- """
20
- Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
21
- Returns:
22
- int: unique id within 32 bits (4 bytes)
23
- """
24
- # Get various entropy sources
25
- time_ms = int(time.time() * 1000)
26
- pid = os.getpid()
27
- random_bytes = random.getrandbits(32)
28
- mac_addr = uuid.getnode() # MAC address as integer
29
- hostname = socket.gethostname()
30
-
31
- # Combine all sources into a string
32
- seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
33
-
34
- # Create a hash of all entropy sources
35
- hash_obj = hashlib.md5(seed.encode())
36
- hash_int = int(hash_obj.hexdigest(), 16)
37
-
38
- # Ensure the result fits within 32 bits (4 bytes)
39
- unique_id = hash_int & 0xFFFFFFFF
40
-
41
- return unique_id
10
+ from flowfile_core.flowfile.utils import create_unique_id
42
11
 
43
12
 
44
13
  @dataclass
@@ -71,7 +40,7 @@ class FlowfileHandler:
71
40
  raise 'flow already registered'
72
41
  else:
73
42
  name = flow_settings.name if flow_settings.name else flow_settings.flow_id
74
- self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_id=flow_settings.flow_id, flow_settings=flow_settings)
43
+ self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_settings=flow_settings)
75
44
  return self.get_flow(flow_settings.flow_id)
76
45
 
77
46
  def get_flow(self, flow_id: int) -> FlowGraph | None:
@@ -70,8 +70,7 @@ def open_flow(flow_path: Path) -> FlowGraph:
70
70
  flow_storage_obj.flow_name = str(flow_path.stem)
71
71
  ensure_compatibility(flow_storage_obj, str(flow_path))
72
72
  ingestion_order = determine_insertion_order(flow_storage_obj)
73
- new_flow = FlowGraph(flow_id=flow_storage_obj.flow_id, name=flow_storage_obj.flow_name,
74
- flow_settings=flow_storage_obj.flow_settings)
73
+ new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
75
74
  for node_id in ingestion_order:
76
75
  node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
77
76
  node_promise = input_schema.NodePromise(flow_id=new_flow.flow_id, node_id=node_info.id,
@@ -1,3 +1 @@
1
1
  from flowfile_core.flowfile.sources.external_sources import custom_external_sources
2
- # from flowfile.sources.external_sources.custom_external_sources.external_source import check_for_key_vault_existence
3
- # from flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
@@ -1,22 +1,19 @@
1
1
  from flowfile_core.flowfile.sources.external_sources.custom_external_sources.external_source import CustomExternalSource
2
- from flowfile_core.flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
3
2
 
4
3
 
5
- def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource | AirbyteSource:
4
+ def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource:
6
5
  """
7
- Factory function to generate either CustomExternalSource or AirbyteSource.
6
+ Factory function to generate either CustomExternalSource .
8
7
 
9
8
  Args:
10
- source_type (str): The type of source to create ("custom" or "airbyte").
9
+ source_type (str): The type of source to create ("custom").
11
10
  **kwargs: The keyword arguments required for the specific source type.
12
11
 
13
12
  Returns:
14
- Union[CustomExternalSource, AirbyteSource]: An instance of the selected data source type.
13
+ Union[CustomExternalSource]: An instance of the selected data source type.
15
14
  """
16
15
  if source_type == "custom":
17
16
  return CustomExternalSource(**kwargs)
18
- elif source_type == "airbyte":
19
- return AirbyteSource(**kwargs)
20
17
  else:
21
18
  raise ValueError(f"Unknown source type: {source_type}")
22
19
 
@@ -4,7 +4,6 @@ from collections import defaultdict, deque
4
4
  from typing import List, Dict, Set, Tuple, TYPE_CHECKING
5
5
 
6
6
  if TYPE_CHECKING:
7
- # Make sure this import path is correct for your project structure
8
7
  from flowfile_core.flowfile.flow_graph import FlowGraph
9
8
 
10
9
 
@@ -64,7 +63,6 @@ def calculate_layered_layout(
64
63
  if child_node.node_id in node_ids:
65
64
  if child_node.node_id not in adj[node.node_id]:
66
65
  adj[node.node_id].append(child_node.node_id)
67
- # Assuming primary method works or in_degree is handled elsewhere
68
66
  in_degree[child_node.node_id] += 1
69
67
 
70
68
  stages: Dict[int, List[int]] = defaultdict(list)
@@ -1,15 +1,15 @@
1
1
  import os
2
- import hashlib
3
2
  import json
4
- import polars as pl
5
3
  import shutil
6
4
 
7
- from datetime import datetime, date, time
5
+ import datetime
8
6
  from typing import List
9
7
  from decimal import Decimal
10
-
11
- from flowfile_core.flowfile.flow_data_engine.utils import standardize_col_dtype
12
- from flowfile_core.schemas import input_schema
8
+ import time
9
+ import random
10
+ import uuid
11
+ import socket
12
+ import hashlib
13
13
 
14
14
 
15
15
  def generate_sha256_hash(data: bytes):
@@ -30,11 +30,11 @@ def snake_case_to_camel_case(text: str) -> str:
30
30
 
31
31
 
32
32
  def json_default(val):
33
- if isinstance(val, datetime):
33
+ if isinstance(val, datetime.datetime):
34
34
  return val.isoformat(timespec='microseconds')
35
- elif isinstance(val, date):
35
+ elif isinstance(val, datetime.date):
36
36
  return val.isoformat()
37
- elif isinstance(val, time):
37
+ elif isinstance(val, datetime.time):
38
38
  return val.isoformat()
39
39
  elif hasattr(val, '__dict__'):
40
40
  return val.__dict__
@@ -61,7 +61,7 @@ def get_hash(val):
61
61
  if hasattr(val, 'overridden_hash') and val.overridden_hash():
62
62
  val = hash(val)
63
63
  elif hasattr(val, '__dict__'):
64
- val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y'}}
64
+ val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y', 'description'}}
65
65
  elif hasattr(val, 'json'):
66
66
  pass
67
67
  return generate_sha256_hash(json_dumps(val).encode('utf-8'))
@@ -119,19 +119,28 @@ def batch_generator(input_list: List, batch_size: int = 10000):
119
119
  run = False
120
120
 
121
121
 
122
- def _handle_raw_data(node_manual_input: input_schema.NodeManualInput):
123
- """Ensure compatibility with the new typed raw data and the old dict form data type"""
124
- if (not (hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
125
- and (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
126
- values = [standardize_col_dtype([vv for vv in c]) for c in zip(*(r.values()
127
- for r in node_manual_input.raw_data))]
128
- data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
129
- _columns = [input_schema.MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in
130
- node_manual_input.raw_data[0].keys()]
131
-
132
- node_manual_input.raw_data_format = input_schema.RawData(columns=_columns, data=values)
133
- elif ((hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
134
- and not (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
135
- node_manual_input.raw_data = [{c.name: node_manual_input.raw_data_format.data[ci][ri] for ci, c in
136
- enumerate(node_manual_input.raw_data_format.columns)}
137
- for ri in range(len(node_manual_input.raw_data_format.data[0]))]
122
+ def create_unique_id() -> int:
123
+ """
124
+ Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
125
+ Returns:
126
+ int: unique id within 32 bits (4 bytes)
127
+ """
128
+ # Get various entropy sources
129
+ time_ms = int(time.time() * 1000)
130
+ pid = os.getpid()
131
+ random_bytes = random.getrandbits(32)
132
+ mac_addr = uuid.getnode() # MAC address as integer
133
+ hostname = socket.gethostname()
134
+
135
+ # Combine all sources into a string
136
+ seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
137
+
138
+ # Create a hash of all entropy sources
139
+
140
+ hash_obj = hashlib.sha256(seed.encode())
141
+ hash_int = int(hash_obj.hexdigest(), 16)
142
+
143
+ # Ensure the result fits within 32 bits (4 bytes)
144
+ unique_id = hash_int & 0xFFFFFFFF
145
+
146
+ return unique_id
flowfile_core/main.py CHANGED
@@ -8,14 +8,14 @@ from fastapi import FastAPI
8
8
  from fastapi.middleware.cors import CORSMiddleware
9
9
 
10
10
  from flowfile_core import ServerRun
11
- from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,
12
- OFFLOAD_TO_WORKER)
11
+ from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,)
13
12
 
14
13
  from flowfile_core.routes.auth import router as auth_router
15
14
  from flowfile_core.routes.secrets import router as secrets_router
16
15
  from flowfile_core.routes.routes import router
17
16
  from flowfile_core.routes.public import router as public_router
18
17
  from flowfile_core.routes.logs import router as logs_router
18
+ from flowfile_core.routes.cloud_connections import router as cloud_connections_router
19
19
 
20
20
  from flowfile_core.configs.flow_logger import clear_all_flow_logs
21
21
 
@@ -27,7 +27,11 @@ server_instance = None
27
27
 
28
28
  @asynccontextmanager
29
29
  async def shutdown_handler(app: FastAPI):
30
- """Handle graceful shutdown of the application."""
30
+ """Handles the graceful startup and shutdown of the FastAPI application.
31
+
32
+ This context manager ensures that resources, such as log files, are cleaned
33
+ up properly when the application is terminated.
34
+ """
31
35
  print('Starting core application...')
32
36
  try:
33
37
  yield
@@ -72,35 +76,51 @@ app.include_router(router)
72
76
  app.include_router(logs_router, tags=["logs"])
73
77
  app.include_router(auth_router, prefix="/auth", tags=["auth"])
74
78
  app.include_router(secrets_router, prefix="/secrets", tags=["secrets"])
79
+ app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=["cloud_connections"])
75
80
 
76
81
 
77
82
  @app.post("/shutdown")
78
83
  async def shutdown():
79
- """Endpoint to handle graceful shutdown"""
80
- ServerRun.exit = True
81
- print(f"ServerRun.exit = {ServerRun.exit}")
82
- if server_instance:
83
- # Schedule the shutdown
84
- await asyncio.create_task(trigger_shutdown())
85
- return {"message": "Shutting down"}
84
+ """An API endpoint to gracefully shut down the server.
85
+
86
+ This endpoint sets a flag that the Uvicorn server checks, allowing it
87
+ to terminate cleanly. A background task is used to trigger the shutdown
88
+ after the HTTP response has been sent.
89
+ """
90
+ # Use a background task to trigger the shutdown after the response is sent
91
+ background_tasks = ServerRun()
92
+ background_tasks.add_task(trigger_shutdown)
93
+ return {"message": "Server is shutting down"}
86
94
 
87
95
 
88
96
  async def trigger_shutdown():
89
- """Trigger the actual shutdown after responding to the client"""
90
- await asyncio.sleep(1) # Give time for the response to be sent
97
+ """(Internal) Triggers the actual server shutdown.
98
+
99
+ Waits for a moment to allow the `/shutdown` response to be sent before
100
+ telling the Uvicorn server instance to exit.
101
+ """
102
+ await asyncio.sleep(1)
91
103
  if server_instance:
92
104
  server_instance.should_exit = True
93
105
 
94
106
 
95
107
  def signal_handler(signum, frame):
96
- """Handle shutdown signals"""
108
+ """Handles OS signals like SIGINT (Ctrl+C) and SIGTERM for graceful shutdown."""
97
109
  print(f"Received signal {signum}")
98
110
  if server_instance:
99
111
  server_instance.should_exit = True
100
112
 
101
113
 
102
114
  def run(host: str = None, port: int = None):
103
- """Run the FastAPI app with graceful shutdown"""
115
+ """Runs the FastAPI application using Uvicorn.
116
+
117
+ This function configures and starts the Uvicorn server, setting up
118
+ signal handlers to ensure a graceful shutdown.
119
+
120
+ Args:
121
+ host: The host to bind the server to. Defaults to `SERVER_HOST` from settings.
122
+ port: The port to bind the server to. Defaults to `SERVER_PORT` from settings.
123
+ """
104
124
  global server_instance
105
125
 
106
126
  # Use values from settings if not explicitly provided
@@ -133,7 +153,7 @@ def run(host: str = None, port: int = None):
133
153
  print("Received interrupt signal, shutting down...")
134
154
  finally:
135
155
  server_instance = None
136
- print("Core server shutdown complete")
156
+ print("Server has shut down.")
137
157
 
138
158
 
139
159
  if __name__ == "__main__":
@@ -0,0 +1,77 @@
1
+ from typing import List
2
+
3
+ from fastapi import HTTPException, Depends, APIRouter
4
+ from sqlalchemy.orm import Session
5
+
6
+ # Core modules
7
+ from flowfile_core.auth.jwt import get_current_active_user
8
+ from flowfile_core.configs import logger
9
+ from flowfile_core.database.connection import get_db
10
+ from flowfile_core.flowfile.database_connection_manager.db_connections import (store_cloud_connection,
11
+ get_cloud_connection_schema,
12
+ get_all_cloud_connections_interface,
13
+ delete_cloud_connection)
14
+ # Schema and models
15
+ from flowfile_core.schemas.cloud_storage_schemas import FullCloudStorageConnection, FullCloudStorageConnectionInterface
16
+
17
+ # External dependencies
18
+ # File handling
19
+ router = APIRouter()
20
+
21
+
22
+ @router.post("/cloud_connection", tags=['cloud_connections'])
23
+ def create_cloud_storage_connection(input_connection: FullCloudStorageConnection,
24
+ current_user=Depends(get_current_active_user),
25
+ db: Session = Depends(get_db)
26
+ ):
27
+ """
28
+ Create a new cloud storage connection.
29
+ Parameters
30
+ input_connection: FullCloudStorageConnection schema containing connection details
31
+ current_user: User obtained from Depends(get_current_active_user)
32
+ db: Session obtained from Depends(get_db)
33
+ Returns
34
+ Dict with a success message
35
+ """
36
+ logger.info(f'Create cloud connection {input_connection.connection_name}')
37
+ try:
38
+ store_cloud_connection(db, input_connection, current_user.id)
39
+ except ValueError:
40
+ raise HTTPException(422, 'Connection name already exists')
41
+ except Exception as e:
42
+ logger.error(e)
43
+ raise HTTPException(422, str(e))
44
+ return {"message": "Cloud connection created successfully"}
45
+
46
+
47
+ @router.delete('/cloud_connection', tags=['cloud_connections'])
48
+ def delete_cloud_connection_with_connection_name(connection_name: str,
49
+ current_user=Depends(get_current_active_user),
50
+ db: Session = Depends(get_db)
51
+ ):
52
+ """
53
+ Delete a cloud connection.
54
+ """
55
+ logger.info(f'Deleting cloud connection {connection_name}')
56
+ cloud_storage_connection = get_cloud_connection_schema(db, connection_name, current_user.id)
57
+ if cloud_storage_connection is None:
58
+ raise HTTPException(404, 'Cloud connection connection not found')
59
+ delete_cloud_connection(db, connection_name, current_user.id)
60
+ return {"message": "Cloud connection deleted successfully"}
61
+
62
+
63
+ @router.get('/cloud_connections', tags=['cloud_connection'],
64
+ response_model=List[FullCloudStorageConnectionInterface])
65
+ def get_cloud_connections(
66
+ db: Session = Depends(get_db),
67
+ current_user=Depends(get_current_active_user)) -> List[FullCloudStorageConnectionInterface]:
68
+ """
69
+ Get all cloud storage connections for the current user.
70
+ Parameters
71
+ db: Session obtained from Depends(get_db)
72
+ current_user: User obtained from Depends(get_current_active_user)
73
+
74
+ Returns
75
+ List[FullCloudStorageConnectionInterface]
76
+ """
77
+ return get_all_cloud_connections_interface(db, current_user.id)