Flowfile 0.3.1.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (100) hide show
  1. flowfile/__init__.py +2 -1
  2. flowfile/api.py +5 -3
  3. flowfile/web/__init__.py +3 -0
  4. flowfile/web/static/assets/{AirbyteReader-cb0c1d4a.js → AirbyteReader-2b1cf2d8.js} +10 -9
  5. flowfile/web/static/assets/{CrossJoin-a514fa59.js → CrossJoin-cc3ab73c.js} +8 -8
  6. flowfile/web/static/assets/{DatabaseConnectionSettings-f2cecf33.js → DatabaseConnectionSettings-307c4652.js} +2 -2
  7. flowfile/web/static/assets/{DatabaseManager-83ee3c98.js → DatabaseManager-69faa6e1.js} +10 -6
  8. flowfile/web/static/assets/{DatabaseReader-dc0c6881.js → DatabaseReader-e4134cd0.js} +9 -9
  9. flowfile/web/static/assets/{DatabaseWriter-5afe9f8d.js → DatabaseWriter-d32d75b1.js} +9 -9
  10. flowfile/web/static/assets/{ExploreData-c7ee19cf.js → ExploreData-5eb48389.js} +18639 -18629
  11. flowfile/web/static/assets/{ExternalSource-17b23a01.js → ExternalSource-29489051.js} +8 -21
  12. flowfile/web/static/assets/{Filter-90856b4f.js → Filter-031332bb.js} +9 -9
  13. flowfile/web/static/assets/{Formula-38b71e9e.js → Formula-3b900540.js} +15 -15
  14. flowfile/web/static/assets/{Formula-d60a74f4.css → Formula-b8cefc31.css} +4 -4
  15. flowfile/web/static/assets/{FuzzyMatch-d0f1fe81.js → FuzzyMatch-dee31153.js} +9 -9
  16. flowfile/web/static/assets/{GraphSolver-0c86bbc6.js → GraphSolver-ca74eb47.js} +5 -5
  17. flowfile/web/static/assets/{GroupBy-f2772e9f.js → GroupBy-081b6591.js} +8 -7
  18. flowfile/web/static/assets/{Join-bc3e1cf7.js → Join-b467376f.js} +11 -10
  19. flowfile/web/static/assets/{ManualInput-03aa0245.js → ManualInput-ffffb80a.js} +11 -8
  20. flowfile/web/static/assets/{Output-5b35eee8.js → Output-9a87d4ba.js} +4 -4
  21. flowfile/web/static/assets/{Pivot-7164087c.js → Pivot-ee3e6093.js} +8 -7
  22. flowfile/web/static/assets/{PolarsCode-3abf6507.js → PolarsCode-03921254.js} +13 -11
  23. flowfile/web/static/assets/{PopOver-b37ff9be.js → PopOver-3bdf8951.js} +1 -1
  24. flowfile/web/static/assets/{Read-65966a3e.js → Read-67fee3a0.js} +6 -6
  25. flowfile/web/static/assets/{RecordCount-c66c6d6d.js → RecordCount-a2acd02d.js} +7 -6
  26. flowfile/web/static/assets/{RecordId-826dc095.js → RecordId-0c8bcd77.js} +10 -8
  27. flowfile/web/static/assets/{Sample-4ed555c8.js → Sample-60594a3a.js} +7 -6
  28. flowfile/web/static/assets/{SecretManager-eac1e97d.js → SecretManager-bbcec2ac.js} +2 -2
  29. flowfile/web/static/assets/{Select-085f05cc.js → Select-9540e6ca.js} +8 -8
  30. flowfile/web/static/assets/{SettingsSection-1f5e79c1.js → SettingsSection-48f28104.js} +1 -1
  31. flowfile/web/static/assets/{Sort-3e6cb414.js → Sort-6dbe3633.js} +6 -6
  32. flowfile/web/static/assets/{TextToRows-606349bc.js → TextToRows-27aab4a8.js} +18 -13
  33. flowfile/web/static/assets/{UnavailableFields-b41976ed.js → UnavailableFields-8143044b.js} +2 -2
  34. flowfile/web/static/assets/{Union-fca91665.js → Union-52460248.js} +7 -6
  35. flowfile/web/static/assets/{Unique-a59f830e.js → Unique-f6962644.js} +8 -8
  36. flowfile/web/static/assets/{Unpivot-c3815565.js → Unpivot-1ff1e938.js} +5 -5
  37. flowfile/web/static/assets/{api-22b338bd.js → api-3b345d92.js} +1 -1
  38. flowfile/web/static/assets/{designer-e5bbe26f.js → designer-4736134f.js} +72 -42
  39. flowfile/web/static/assets/{documentation-08045cf2.js → documentation-b9545eba.js} +1 -1
  40. flowfile/web/static/assets/{dropDown-5e7e9a5a.js → dropDown-d5a4014c.js} +1 -1
  41. flowfile/web/static/assets/{dropDownGeneric-50a91b99.js → dropDownGeneric-1f4e32ec.js} +2 -2
  42. flowfile/web/static/assets/{fullEditor-705c6ccb.js → fullEditor-f4791c23.js} +3 -3
  43. flowfile/web/static/assets/{genericNodeSettings-65587f20.js → genericNodeSettings-1d456350.js} +3 -3
  44. flowfile/web/static/assets/{index-552863fd.js → index-f25c9283.js} +2608 -1570
  45. flowfile/web/static/assets/{nodeTitle-cf9bae3c.js → nodeTitle-cad6fd9d.js} +3 -3
  46. flowfile/web/static/assets/{secretApi-3ad510e1.js → secretApi-01f07e2c.js} +1 -1
  47. flowfile/web/static/assets/{selectDynamic-bd644891.js → selectDynamic-f46a4e3f.js} +3 -3
  48. flowfile/web/static/assets/{vue-codemirror.esm-dd17b478.js → vue-codemirror.esm-eb98fc8b.js} +15 -14
  49. flowfile/web/static/assets/{vue-content-loader.es-6b36f05e.js → vue-content-loader.es-860c0380.js} +1 -1
  50. flowfile/web/static/index.html +1 -1
  51. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -3
  52. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +97 -88
  53. flowfile_core/configs/__init__.py +15 -4
  54. flowfile_core/configs/node_store/nodes.py +2 -4
  55. flowfile_core/configs/settings.py +5 -3
  56. flowfile_core/configs/utils.py +18 -0
  57. flowfile_core/flowfile/FlowfileFlow.py +84 -29
  58. flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
  59. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +55 -18
  60. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
  61. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
  62. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +34 -2
  63. flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
  64. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
  65. flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
  66. flowfile_core/flowfile/flow_graph_utils.py +320 -0
  67. flowfile_core/flowfile/flow_node/flow_node.py +2 -1
  68. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
  69. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -1
  70. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
  71. flowfile_core/flowfile/utils.py +34 -3
  72. flowfile_core/main.py +2 -3
  73. flowfile_core/routes/secrets.py +1 -1
  74. flowfile_core/schemas/input_schema.py +12 -14
  75. flowfile_core/schemas/transform_schema.py +25 -47
  76. flowfile_frame/__init__.py +11 -4
  77. flowfile_frame/adding_expr.py +280 -0
  78. flowfile_frame/config.py +9 -0
  79. flowfile_frame/expr.py +301 -83
  80. flowfile_frame/expr.pyi +2174 -0
  81. flowfile_frame/expr_name.py +258 -0
  82. flowfile_frame/flow_frame.py +616 -627
  83. flowfile_frame/flow_frame.pyi +336 -0
  84. flowfile_frame/flow_frame_methods.py +617 -0
  85. flowfile_frame/group_frame.py +89 -42
  86. flowfile_frame/join.py +1 -2
  87. flowfile_frame/lazy.py +704 -0
  88. flowfile_frame/lazy_methods.py +201 -0
  89. flowfile_frame/list_name_space.py +324 -0
  90. flowfile_frame/selectors.py +3 -0
  91. flowfile_frame/series.py +70 -0
  92. flowfile_frame/utils.py +80 -4
  93. flowfile/web/static/assets/GoogleSheet-854294a4.js +0 -2616
  94. flowfile/web/static/assets/GoogleSheet-92084da7.css +0 -233
  95. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +0 -74
  96. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
  97. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
  98. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
  99. /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
  100. /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
@@ -1,14 +1,27 @@
1
1
  from faker import Faker
2
2
  from functools import partial
3
+ from math import ceil
3
4
  from random import randint
4
5
  import polars as pl
5
6
  from typing import List, Dict, Any, Generator
6
7
 
7
8
 
8
- def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
9
+ def create_fake_data(n_records: int = 1000, optimized: bool = True) -> pl.DataFrame:
10
+ """
11
+
12
+ Args:
13
+ n_records (): Number of records to return
14
+ optimized (): Indicator if creation should be optimized, will result in more identical rows when True
15
+
16
+ Returns:
17
+ pl.DataFrame
18
+ """
9
19
  fake = Faker()
10
- selector = partial(randint,0)
11
- min_range = partial(min, n_records)
20
+ selector = partial(randint, 0)
21
+
22
+ max_n_records = min(10_000, n_records) if optimized else n_records
23
+
24
+ min_range = partial(min, max_n_records)
12
25
  # Pre-generation of static data
13
26
  cities = [fake.city() for _ in range(min_range(7000))]
14
27
  companies = [fake.company() for _ in range(min_range(100_000))]
@@ -19,7 +32,7 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
19
32
  first_names = [fake.first_name() for _ in range(min_range(100_000))]
20
33
  last_names = [fake.last_name() for _ in range(min_range(50_000))]
21
34
  domain_names = [fake.domain_name() for _ in range(10)]
22
- sales_data = [fake.random_int(0, 1000) for _ in range(n_records)]
35
+ sales_data = [fake.random_int(0, 1000) for _ in range(max_n_records)]
23
36
 
24
37
  def generate_name():
25
38
  return f"{first_names[selector(min_range(100_000))-1]} {last_names[selector(min_range(50_000))-1]}"
@@ -32,9 +45,8 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
32
45
 
33
46
  def generate_phone_number():
34
47
  return fake.phone_number()
35
-
36
48
  data = []
37
- for i in range(n_records):
49
+ for i in range(max_n_records):
38
50
  name = generate_name()
39
51
  data.append(dict(
40
52
  ID=randint(1, 1000000),
@@ -47,8 +59,14 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
47
59
  Work=companies[selector(min_range(100_000))-1],
48
60
  Zipcode=zipcodes[selector(min_range(200_000))-1],
49
61
  Country=countries[selector(min_range(50))-1],
50
- sales_data=sales_data[selector(n_records)-1]
62
+ sales_data=sales_data[selector(max_n_records)-1]
51
63
  ))
64
+ if max_n_records < n_records:
65
+ n_duplicates: int = ceil(n_records / max_n_records)
66
+ output = []
67
+ for _ in range(n_duplicates):
68
+ output.extend(data)
69
+ data = output[:n_records]
52
70
 
53
71
  return pl.DataFrame(data)
54
72
 
@@ -190,7 +190,7 @@ class BaseFetcher:
190
190
  logger.info('Already running the fetching')
191
191
  return
192
192
 
193
- sleep_time = 1
193
+ sleep_time = .5
194
194
  self.running = True
195
195
  while not self.stop_event.is_set():
196
196
  try:
@@ -205,7 +205,8 @@ class BaseFetcher:
205
205
  break
206
206
  elif status.status == 'Unknown Error':
207
207
  self._handle_error(-1,
208
- 'There was an unknown error with the process, and the process got killed by the server')
208
+ 'There was an unknown error with the process, '
209
+ 'and the process got killed by the server')
209
210
  break
210
211
  else:
211
212
  self._handle_error(2, r.text)
@@ -284,7 +285,7 @@ class ExternalDfFetcher(BaseFetcher):
284
285
 
285
286
  def __init__(self, flow_id: int, node_id: int | str, lf: pl.LazyFrame | pl.DataFrame, file_ref: str = None,
286
287
  wait_on_completion: bool = True,
287
- operation_type: OperationType = 'store'):
288
+ operation_type: OperationType = 'store', offload_to_worker: bool = True):
288
289
  super().__init__(file_ref=file_ref)
289
290
  lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
290
291
  r = trigger_df_operation(lf=lf, file_ref=self.file_ref, operation_type=operation_type,
@@ -210,3 +210,4 @@ def match_order(l: List[str], ref: List[str]) -> List[str]:
210
210
  org_order = ref_order.get(v, float('inf'))
211
211
  order.append(org_order)
212
212
  return [v for _, v in sorted(zip(order, l))]
213
+
@@ -0,0 +1,320 @@
1
+ from typing import Dict, Tuple, Optional, List, Set, Callable
2
+ from copy import deepcopy
3
+ from flowfile_core.schemas import input_schema, schemas
4
+ from flowfile_core.flowfile.FlowfileFlow import FlowGraph, add_connection
5
+
6
+
7
+ def combine_flow_graphs_with_mapping(
8
+ *flow_graphs: FlowGraph,
9
+ target_flow_id: Optional[int] = None) -> Tuple[FlowGraph, Dict[Tuple[int, int], int]]:
10
+ # Validate input parameters
11
+ _validate_input(flow_graphs)
12
+
13
+ # Generate a unique flow ID if not provided
14
+ if target_flow_id is None:
15
+ target_flow_id = _generate_unique_flow_id(flow_graphs)
16
+
17
+ flow_settings = _create_flow_settings(flow_graphs[0], target_flow_id)
18
+ combined_graph = FlowGraph(flow_id=target_flow_id, flow_settings=flow_settings)
19
+ node_id_mapping = _create_node_id_mapping(flow_graphs)
20
+ _add_nodes_to_combined_graph(flow_graphs, combined_graph, node_id_mapping, target_flow_id)
21
+ _add_connections_to_combined_graph(flow_graphs, combined_graph, node_id_mapping)
22
+ return combined_graph, node_id_mapping
23
+
24
+
25
+ def combine_flow_graphs(*flow_graphs: FlowGraph, target_flow_id: Optional[int] = None) -> FlowGraph:
26
+ """
27
+ Combine multiple flow graphs into a single graph, ensuring node IDs don't overlap.
28
+
29
+ Args:
30
+ *flow_graphs: Multiple FlowGraph instances to combine
31
+ target_flow_id: Optional ID for the new combined graph. If None, a new ID will be generated.
32
+
33
+ Returns:
34
+ A new FlowGraph containing all nodes and edges from the input graphs with remapped IDs
35
+
36
+ Raises:
37
+ ValueError: If no flow graphs are provided
38
+ """
39
+ # Validate input parameters
40
+ _validate_input(flow_graphs)
41
+
42
+ # Generate a unique flow ID if not provided
43
+ if target_flow_id is None:
44
+ target_flow_id = _generate_unique_flow_id(flow_graphs)
45
+
46
+ flow_settings = _create_flow_settings(flow_graphs[0], target_flow_id)
47
+ combined_graph = FlowGraph(flow_id=target_flow_id, flow_settings=flow_settings)
48
+ node_id_mapping = _create_node_id_mapping(flow_graphs)
49
+ _add_nodes_to_combined_graph(flow_graphs, combined_graph, node_id_mapping, target_flow_id)
50
+ _add_connections_to_combined_graph(flow_graphs, combined_graph, node_id_mapping)
51
+
52
+ return combined_graph
53
+
54
+
55
+ def _validate_input(flow_graphs: Tuple[FlowGraph, ...]) -> None:
56
+ """
57
+ Validate input parameters.
58
+
59
+ Args:
60
+ flow_graphs: Flow graphs to validate
61
+
62
+ Raises:
63
+ ValueError: If validation fails
64
+ """
65
+ if not flow_graphs:
66
+ raise ValueError("At least one FlowGraph must be provided")
67
+
68
+ # Check for duplicate flow IDs
69
+ flow_ids = [fg.flow_id for fg in flow_graphs]
70
+ if len(flow_ids) != len(set(flow_ids)):
71
+ raise ValueError("Cannot combine flows with duplicate flow IDs")
72
+
73
+
74
+ def _generate_unique_flow_id(flow_graphs: Tuple[FlowGraph, ...]) -> int:
75
+ """
76
+ Generate a unique flow ID based on the input flow graphs.
77
+
78
+ Args:
79
+ flow_graphs: Flow graphs to generate ID from
80
+
81
+ Returns:
82
+ int: A new unique flow ID
83
+ """
84
+ return abs(hash(tuple(fg.flow_id for fg in flow_graphs))) % 1000000
85
+
86
+
87
+ def _create_flow_settings(base_flow_graph: FlowGraph, target_flow_id: int) -> schemas.FlowSettings:
88
+ """
89
+ Create flow settings for the combined graph based on an existing graph.
90
+
91
+ Args:
92
+ base_flow_graph: Flow graph to base settings on
93
+ target_flow_id: The new flow ID
94
+
95
+ Returns:
96
+ schemas.FlowSettings: Flow settings for the combined graph
97
+ """
98
+ flow_settings = deepcopy(base_flow_graph.flow_settings)
99
+ flow_settings.flow_id = target_flow_id
100
+ flow_settings.name = f"Combined Flow {target_flow_id}"
101
+ return flow_settings
102
+
103
+
104
+ def _create_node_id_mapping(flow_graphs: Tuple[FlowGraph, ...]) -> Dict[Tuple[int, int], int]:
105
+ """
106
+ Create a mapping from (flow_id, original_node_id) to new unique node IDs.
107
+
108
+ Args:
109
+ flow_graphs: Flow graphs to process
110
+
111
+ Returns:
112
+ Dict: Mapping from (flow_id, node_id) to new node ID
113
+ """
114
+ node_id_mapping = {}
115
+ next_node_id = _get_next_available_node_id(flow_graphs)
116
+
117
+ for fg in flow_graphs:
118
+ for node in fg.nodes:
119
+ node_id_mapping[(fg.flow_id, node.node_id)] = next_node_id
120
+ next_node_id += 1
121
+
122
+ return node_id_mapping
123
+
124
+
125
+ def _get_next_available_node_id(flow_graphs: Tuple[FlowGraph, ...]) -> int:
126
+ """
127
+ Find the next available node ID.
128
+
129
+ Args:
130
+ flow_graphs: Flow graphs to examine
131
+
132
+ Returns:
133
+ int: Next available node ID
134
+ """
135
+ max_id = 0
136
+ for fg in flow_graphs:
137
+ for node in fg.nodes:
138
+ max_id = max(max_id, node.node_id)
139
+ return max_id + 1
140
+
141
+
142
+ def _add_nodes_to_combined_graph(
143
+ flow_graphs: Tuple[FlowGraph, ...],
144
+ combined_graph: FlowGraph,
145
+ node_id_mapping: Dict[Tuple[int, int], int],
146
+ target_flow_id: int
147
+ ) -> None:
148
+ """
149
+ Add all nodes from source graphs to the combined graph.
150
+
151
+ Args:
152
+ flow_graphs: Source flow graphs
153
+ combined_graph: Target combined graph
154
+ node_id_mapping: Mapping of node IDs
155
+ target_flow_id: Target flow ID
156
+ """
157
+ processed_nodes = set()
158
+
159
+ for fg in flow_graphs:
160
+ for node in fg.nodes:
161
+ # Skip if already processed
162
+ if (fg.flow_id, node.node_id) in processed_nodes:
163
+ continue
164
+
165
+ # Generate new node ID
166
+ new_node_id = node_id_mapping[(fg.flow_id, node.node_id)]
167
+
168
+ # Create and update setting input
169
+ setting_input = _create_updated_setting_input(
170
+ node.setting_input,
171
+ new_node_id,
172
+ target_flow_id,
173
+ fg.flow_id,
174
+ node_id_mapping
175
+ )
176
+
177
+ # Add node to combined graph
178
+ _add_node_to_graph(combined_graph, new_node_id, target_flow_id, node.node_type, setting_input)
179
+
180
+ processed_nodes.add((fg.flow_id, node.node_id))
181
+
182
+
183
+ def _create_updated_setting_input(
184
+ original_setting_input: any,
185
+ new_node_id: int,
186
+ target_flow_id: int,
187
+ source_flow_id: int,
188
+ node_id_mapping: Dict[Tuple[int, int], int]
189
+ ) -> any:
190
+ """
191
+ Create an updated setting input with new node and flow IDs.
192
+
193
+ Args:
194
+ original_setting_input: Original setting input
195
+ new_node_id: New node ID
196
+ target_flow_id: Target flow ID
197
+ source_flow_id: Source flow ID
198
+ node_id_mapping: Mapping of node IDs
199
+
200
+ Returns:
201
+ Updated setting input
202
+ """
203
+ setting_input = deepcopy(original_setting_input)
204
+
205
+ # Update node ID
206
+ if hasattr(setting_input, 'node_id'):
207
+ setting_input.node_id = new_node_id
208
+
209
+ # Update flow ID
210
+ if hasattr(setting_input, 'flow_id'):
211
+ setting_input.flow_id = target_flow_id
212
+
213
+ # Update depending_on_id if present
214
+ if hasattr(setting_input, 'depending_on_id') and setting_input.depending_on_id != -1:
215
+ orig_depending_id = setting_input.depending_on_id
216
+ setting_input.depending_on_id = node_id_mapping.get((source_flow_id, orig_depending_id), -1)
217
+
218
+ # Update depending_on_ids list if present
219
+ if hasattr(setting_input, 'depending_on_ids'):
220
+ setting_input.depending_on_ids = [
221
+ node_id_mapping.get((source_flow_id, dep_id), -1)
222
+ for dep_id in setting_input.depending_on_ids
223
+ if dep_id != -1
224
+ ]
225
+
226
+ return setting_input
227
+
228
+
229
+ def _add_node_to_graph(
230
+ graph: FlowGraph,
231
+ node_id: int,
232
+ flow_id: int,
233
+ node_type: str,
234
+ setting_input: any
235
+ ) -> None:
236
+ """
237
+ Add a node to the graph.
238
+
239
+ Args:
240
+ graph: Target graph
241
+ node_id: Node ID
242
+ flow_id: Flow ID
243
+ node_type: Node type
244
+ setting_input: Setting input
245
+ """
246
+ # Add node promise to graph
247
+ node_promise = input_schema.NodePromise(
248
+ node_id=node_id,
249
+ flow_id=flow_id,
250
+ node_type=node_type,
251
+ is_setup=True,
252
+ pos_x=getattr(setting_input, 'pos_x', 0),
253
+ pos_y=getattr(setting_input, 'pos_y', 0),
254
+ description=getattr(setting_input, 'description', '')
255
+ )
256
+ graph.add_node_promise(node_promise)
257
+
258
+ # Get node type-specific add method
259
+ add_method_name = f"add_{node_type}"
260
+ if hasattr(graph, add_method_name):
261
+ add_method = getattr(graph, add_method_name)
262
+ add_method(setting_input)
263
+
264
+
265
+ def _add_connections_to_combined_graph(
266
+ flow_graphs: Tuple[FlowGraph, ...],
267
+ combined_graph: FlowGraph,
268
+ node_id_mapping: Dict[Tuple[int, int], int]
269
+ ) -> None:
270
+ """
271
+ Add all connections from source graphs to the combined graph.
272
+
273
+ Args:
274
+ flow_graphs: Source flow graphs
275
+ combined_graph: Target combined graph
276
+ node_id_mapping: Mapping of node IDs
277
+ """
278
+ for fg in flow_graphs:
279
+ for connection in fg.node_connections:
280
+ source_id, target_id = connection
281
+ new_source_id = node_id_mapping.get((fg.flow_id, source_id))
282
+ new_target_id = node_id_mapping.get((fg.flow_id, target_id))
283
+
284
+ if new_source_id is not None and new_target_id is not None:
285
+ input_type = _determine_connection_input_type(fg, source_id, target_id)
286
+
287
+ # Create connection in combined graph
288
+ node_connection = input_schema.NodeConnection.create_from_simple_input(
289
+ from_id=new_source_id,
290
+ to_id=new_target_id,
291
+ input_type=input_type
292
+ )
293
+ add_connection(combined_graph, node_connection)
294
+
295
+
296
+ def _determine_connection_input_type(
297
+ flow_graph: FlowGraph,
298
+ source_id: int,
299
+ target_id: int
300
+ ) -> str:
301
+ """
302
+ Determine the input type for a connection.
303
+
304
+ Args:
305
+ flow_graph: Source flow graph
306
+ source_id: Source node ID
307
+ target_id: Target node ID
308
+
309
+ Returns:
310
+ str: Input type (main, left, right)
311
+ """
312
+ from_node = flow_graph.get_node(source_id)
313
+ to_node = flow_graph.get_node(target_id)
314
+
315
+ if from_node and to_node:
316
+ input_types = to_node.get_input_type(from_node.node_id)
317
+ if input_types:
318
+ return input_types[0]
319
+
320
+ return "main"
@@ -146,6 +146,7 @@ class FlowNode:
146
146
  self.node_settings.renew_schema = True
147
147
  if hasattr(setting_input, 'cache_results'):
148
148
  self.node_settings.cache_results = setting_input.cache_results
149
+
149
150
  self.setting_input = setting_input
150
151
  self.results.errors = None
151
152
  self.add_lead_to_in_depend_source()
@@ -174,7 +175,7 @@ class FlowNode:
174
175
  self.set_node_information()
175
176
  if self.node_type == 'manual_input' and isinstance(self._setting_input, input_schema.NodeManualInput):
176
177
  if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run:
177
- self.function = self.function.__class__(setting_input.raw_data)
178
+ self.function = self.function.__class__(setting_input.raw_data_format)
178
179
  self.reset()
179
180
  self.get_predicted_schema()
180
181
  elif self._setting_input is not None:
@@ -1,7 +1,7 @@
1
1
  from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union
2
2
  from pydantic import BaseModel, field_validator, ConfigDict
3
3
  import polars as pl
4
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import type_to_polars_str
4
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
5
5
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
6
6
  from flowfile_core.schemas.input_schema import MinimalFieldInfo
7
7
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
@@ -56,7 +56,7 @@ class JsonSchema(BaseModel):
56
56
  dtype = 'string'
57
57
  else:
58
58
  dtype = type_mapping.get(self.type[0] if isinstance(self.type, list) else self.type, 'string')
59
- return type_to_polars_str(dtype)
59
+ return cast_str_to_polars_type(dtype)
60
60
 
61
61
 
62
62
  class AirbyteProperty(BaseModel):
@@ -1,2 +1 @@
1
- from flowfile_core.flowfile.sources.external_sources.custom_external_sources import google_sheet
2
1
  from flowfile_core.flowfile.sources.external_sources.custom_external_sources import sample_users
@@ -4,7 +4,7 @@ from flowfile_core.configs import logger
4
4
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
5
5
  from flowfile_core.schemas.input_schema import MinimalFieldInfo, DatabaseSettings
6
6
  from sqlalchemy import Engine, inspect, create_engine, text
7
- from flowfile_core.secrets.secrets import get_encrypted_secret, decrypt_secret
7
+ from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
8
8
 
9
9
  from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
10
10
  from flowfile_core.flowfile.sources.external_sources.sql_source.utils import get_polars_type, construct_sql_uri
@@ -1,9 +1,15 @@
1
- from typing import List
2
1
  import os
3
- import shutil
4
2
  import hashlib
5
- from datetime import datetime
6
3
  import json
4
+ import polars as pl
5
+ import shutil
6
+
7
+ from datetime import datetime, date, time
8
+ from typing import List
9
+ from decimal import Decimal
10
+
11
+ from flowfile_core.flowfile.flow_data_engine.utils import standardize_col_dtype
12
+ from flowfile_core.schemas import input_schema
7
13
 
8
14
 
9
15
  def generate_sha256_hash(data: bytes):
@@ -26,8 +32,16 @@ def snake_case_to_camel_case(text: str) -> str:
26
32
  def json_default(val):
27
33
  if isinstance(val, datetime):
28
34
  return val.isoformat(timespec='microseconds')
35
+ elif isinstance(val, date):
36
+ return val.isoformat()
37
+ elif isinstance(val, time):
38
+ return val.isoformat()
29
39
  elif hasattr(val, '__dict__'):
30
40
  return val.__dict__
41
+ elif isinstance(val, Decimal):
42
+ if val.as_integer_ratio()[1] == 1:
43
+ return int(val)
44
+ return float(val)
31
45
  else:
32
46
  raise Exception('Value is not serializable')
33
47
 
@@ -104,3 +118,20 @@ def batch_generator(input_list: List, batch_size: int = 10000):
104
118
  input_list = []
105
119
  run = False
106
120
 
121
+
122
+ def _handle_raw_data(node_manual_input: input_schema.NodeManualInput):
123
+ """Ensure compatibility with the new typed raw data and the old dict form data type"""
124
+ if (not (hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
125
+ and (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
126
+ values = [standardize_col_dtype([vv for vv in c]) for c in zip(*(r.values()
127
+ for r in node_manual_input.raw_data))]
128
+ data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
129
+ _columns = [input_schema.MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in
130
+ node_manual_input.raw_data[0].keys()]
131
+
132
+ node_manual_input.raw_data_format = input_schema.RawData(columns=_columns, data=values)
133
+ elif ((hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
134
+ and not (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
135
+ node_manual_input.raw_data = [{c.name: node_manual_input.raw_data_format.data[ci][ri] for ci, c in
136
+ enumerate(node_manual_input.raw_data_format.columns)}
137
+ for ri in range(len(node_manual_input.raw_data_format.data[0]))]
flowfile_core/main.py CHANGED
@@ -8,7 +8,8 @@ from fastapi import FastAPI
8
8
  from fastapi.middleware.cors import CORSMiddleware
9
9
 
10
10
  from flowfile_core import ServerRun
11
- from flowfile_core.configs.settings import SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL
11
+ from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,
12
+ OFFLOAD_TO_WORKER)
12
13
 
13
14
  from flowfile_core.routes.auth import router as auth_router
14
15
  from flowfile_core.routes.secrets import router as secrets_router
@@ -107,7 +108,6 @@ def run(host: str = None, port: int = None):
107
108
  host = SERVER_HOST
108
109
  if port is None:
109
110
  port = SERVER_PORT
110
-
111
111
  print(f"Starting server on {host}:{port}")
112
112
  print(f"Worker configured at {WORKER_URL} (host: {WORKER_HOST}, port: {WORKER_PORT})")
113
113
 
@@ -120,7 +120,6 @@ def run(host: str = None, port: int = None):
120
120
  host=host,
121
121
  port=port,
122
122
  loop="asyncio",
123
- log_level="warning",
124
123
  )
125
124
  server = uvicorn.Server(config)
126
125
  server_instance = server # Store server instance globally
@@ -10,7 +10,7 @@ from flowfile_core.auth.jwt import get_current_active_user
10
10
  from flowfile_core.auth.models import Secret, SecretInput
11
11
  from flowfile_core.database import models as db_models
12
12
  from flowfile_core.database.connection import get_db
13
- from flowfile_core.secrets.secrets import encrypt_secret, store_secret, delete_secret as delete_secret_action
13
+ from flowfile_core.secret_manager.secret_manager import encrypt_secret, store_secret, delete_secret as delete_secret_action
14
14
 
15
15
  router = APIRouter(dependencies=[Depends(get_current_active_user)])
16
16
 
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Literal
1
+ from typing import List, Optional, Literal, Iterator
2
2
  from flowfile_core.schemas import transform_schema
3
3
  from pathlib import Path
4
4
  import os
@@ -15,6 +15,7 @@ InputConnectionClass = Literal['input-0', 'input-1', 'input-2', 'input-3', 'inpu
15
15
 
16
16
  InputType = Literal["main", "left", "right"]
17
17
 
18
+
18
19
  class NewDirectory(BaseModel):
19
20
  source_path: str
20
21
  dir_name: str
@@ -60,7 +61,7 @@ class ReceivedTableBase(BaseModel):
60
61
  return self.path
61
62
 
62
63
  def set_absolute_filepath(self):
63
- base_path = Path(self.path)
64
+ base_path = Path(self.path).expanduser()
64
65
  # Check if the path is relative, resolve it with the current working directory
65
66
  if not base_path.is_absolute():
66
67
  base_path = Path.cwd() / base_path
@@ -96,7 +97,7 @@ class ReceivedJsonTable(ReceivedCsvTable):
96
97
  pass
97
98
 
98
99
 
99
- class ReceivedParquetTable(BaseModel):
100
+ class ReceivedParquetTable(ReceivedTableBase):
100
101
  file_type: str = 'parquet'
101
102
 
102
103
 
@@ -246,8 +247,14 @@ class NodeDatasource(NodeBase):
246
247
  file_ref: str = None
247
248
 
248
249
 
250
+ class RawData(BaseModel):
251
+ columns: List[MinimalFieldInfo] = None
252
+ data: List[List] # List of list where each inner list is a column of data. This ensures more efficient storage
253
+
254
+
249
255
  class NodeManualInput(NodeBase):
250
- raw_data: List = None
256
+ raw_data: Optional[List] = None
257
+ raw_data_format: Optional[RawData] = None
251
258
 
252
259
 
253
260
  class NodeRead(NodeBase):
@@ -341,15 +348,6 @@ class SampleUsers(ExternalSource):
341
348
  size: int = 100
342
349
 
343
350
 
344
- class GoogleSheet(ExternalSource):
345
- GOOGLE_SHEET: bool
346
- class_name: str = "google_sheet"
347
- access_token: SecretStr = None
348
- sheet_id: str
349
- worksheet_name: str
350
- sheet_name: str
351
-
352
-
353
351
  class AirbyteReader(AirbyteConfig):
354
352
  class_name: Optional[str] = "airbyte_reader"
355
353
  fields: Optional[List[MinimalFieldInfo]] = None
@@ -362,7 +360,7 @@ class AccessToken(BaseModel):
362
360
 
363
361
  class NodeExternalSource(NodeBase):
364
362
  identifier: str
365
- source_settings: SampleUsers | GoogleSheet
363
+ source_settings: SampleUsers
366
364
 
367
365
 
368
366
  class NodeAirbyteReader(NodeExternalSource):