Flowfile 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (171) hide show
  1. build_backends/__init__.py +0 -0
  2. build_backends/main.py +313 -0
  3. build_backends/main_prd.py +202 -0
  4. flowfile/__init__.py +71 -0
  5. flowfile/__main__.py +24 -0
  6. flowfile-0.2.2.dist-info/LICENSE +21 -0
  7. flowfile-0.2.2.dist-info/METADATA +225 -0
  8. flowfile-0.2.2.dist-info/RECORD +171 -0
  9. flowfile-0.2.2.dist-info/WHEEL +4 -0
  10. flowfile-0.2.2.dist-info/entry_points.txt +9 -0
  11. flowfile_core/__init__.py +13 -0
  12. flowfile_core/auth/__init__.py +0 -0
  13. flowfile_core/auth/jwt.py +140 -0
  14. flowfile_core/auth/models.py +40 -0
  15. flowfile_core/auth/secrets.py +178 -0
  16. flowfile_core/configs/__init__.py +35 -0
  17. flowfile_core/configs/flow_logger.py +433 -0
  18. flowfile_core/configs/node_store/__init__.py +0 -0
  19. flowfile_core/configs/node_store/nodes.py +98 -0
  20. flowfile_core/configs/settings.py +120 -0
  21. flowfile_core/database/__init__.py +0 -0
  22. flowfile_core/database/connection.py +51 -0
  23. flowfile_core/database/init_db.py +45 -0
  24. flowfile_core/database/models.py +41 -0
  25. flowfile_core/fileExplorer/__init__.py +0 -0
  26. flowfile_core/fileExplorer/funcs.py +259 -0
  27. flowfile_core/fileExplorer/utils.py +53 -0
  28. flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  29. flowfile_core/flowfile/__init__.py +0 -0
  30. flowfile_core/flowfile/_extensions/__init__.py +0 -0
  31. flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  32. flowfile_core/flowfile/analytics/__init__.py +0 -0
  33. flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  34. flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  35. flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  36. flowfile_core/flowfile/analytics/utils.py +9 -0
  37. flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  38. flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  39. flowfile_core/flowfile/connection_manager/models.py +10 -0
  40. flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  41. flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  42. flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  43. flowfile_core/flowfile/extensions.py +36 -0
  44. flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  45. flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  46. flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  47. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  48. flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  49. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  50. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  51. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  52. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  53. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  54. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  55. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  56. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  57. flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  58. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  59. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  60. flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  61. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  62. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  63. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  64. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  65. flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  66. flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  67. flowfile_core/flowfile/flow_node/__init__.py +0 -0
  68. flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  69. flowfile_core/flowfile/flow_node/models.py +111 -0
  70. flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  71. flowfile_core/flowfile/handler.py +123 -0
  72. flowfile_core/flowfile/manage/__init__.py +0 -0
  73. flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  74. flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  75. flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  76. flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  77. flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  78. flowfile_core/flowfile/setting_generator/settings.py +176 -0
  79. flowfile_core/flowfile/sources/__init__.py +0 -0
  80. flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  81. flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  82. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  83. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  84. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  85. flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  86. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  87. flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  88. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  89. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  90. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  91. flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  92. flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  93. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  94. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  95. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  96. flowfile_core/flowfile/util/__init__.py +0 -0
  97. flowfile_core/flowfile/util/calculate_layout.py +137 -0
  98. flowfile_core/flowfile/util/execution_orderer.py +141 -0
  99. flowfile_core/flowfile/utils.py +106 -0
  100. flowfile_core/main.py +138 -0
  101. flowfile_core/routes/__init__.py +0 -0
  102. flowfile_core/routes/auth.py +34 -0
  103. flowfile_core/routes/logs.py +163 -0
  104. flowfile_core/routes/public.py +10 -0
  105. flowfile_core/routes/routes.py +601 -0
  106. flowfile_core/routes/secrets.py +85 -0
  107. flowfile_core/run_lock.py +11 -0
  108. flowfile_core/schemas/__init__.py +0 -0
  109. flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  110. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  111. flowfile_core/schemas/defaults.py +9 -0
  112. flowfile_core/schemas/external_sources/__init__.py +0 -0
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  114. flowfile_core/schemas/input_schema.py +477 -0
  115. flowfile_core/schemas/models.py +193 -0
  116. flowfile_core/schemas/output_model.py +115 -0
  117. flowfile_core/schemas/schemas.py +106 -0
  118. flowfile_core/schemas/transform_schema.py +569 -0
  119. flowfile_core/secrets/__init__.py +0 -0
  120. flowfile_core/secrets/secrets.py +64 -0
  121. flowfile_core/utils/__init__.py +0 -0
  122. flowfile_core/utils/arrow_reader.py +247 -0
  123. flowfile_core/utils/excel_file_manager.py +18 -0
  124. flowfile_core/utils/fileManager.py +45 -0
  125. flowfile_core/utils/fl_executor.py +38 -0
  126. flowfile_core/utils/utils.py +8 -0
  127. flowfile_frame/__init__.py +56 -0
  128. flowfile_frame/__main__.py +12 -0
  129. flowfile_frame/adapters.py +17 -0
  130. flowfile_frame/expr.py +1163 -0
  131. flowfile_frame/flow_frame.py +2093 -0
  132. flowfile_frame/group_frame.py +199 -0
  133. flowfile_frame/join.py +75 -0
  134. flowfile_frame/selectors.py +242 -0
  135. flowfile_frame/utils.py +184 -0
  136. flowfile_worker/__init__.py +55 -0
  137. flowfile_worker/configs.py +95 -0
  138. flowfile_worker/create/__init__.py +37 -0
  139. flowfile_worker/create/funcs.py +146 -0
  140. flowfile_worker/create/models.py +86 -0
  141. flowfile_worker/create/pl_types.py +35 -0
  142. flowfile_worker/create/read_excel_tables.py +110 -0
  143. flowfile_worker/create/utils.py +84 -0
  144. flowfile_worker/external_sources/__init__.py +0 -0
  145. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  146. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  147. flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  148. flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  149. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  150. flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  151. flowfile_worker/external_sources/sql_source/main.py +56 -0
  152. flowfile_worker/external_sources/sql_source/models.py +72 -0
  153. flowfile_worker/flow_logger.py +58 -0
  154. flowfile_worker/funcs.py +327 -0
  155. flowfile_worker/main.py +108 -0
  156. flowfile_worker/models.py +95 -0
  157. flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  158. flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  159. flowfile_worker/polars_fuzzy_match/models.py +36 -0
  160. flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  161. flowfile_worker/polars_fuzzy_match/process.py +86 -0
  162. flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  163. flowfile_worker/process_manager.py +36 -0
  164. flowfile_worker/routes.py +440 -0
  165. flowfile_worker/secrets.py +148 -0
  166. flowfile_worker/spawner.py +187 -0
  167. flowfile_worker/utils.py +25 -0
  168. test_utils/__init__.py +3 -0
  169. test_utils/postgres/__init__.py +1 -0
  170. test_utils/postgres/commands.py +109 -0
  171. test_utils/postgres/fixtures.py +417 -0
@@ -0,0 +1,111 @@
1
+
2
+ import pyarrow as pa
3
+ from typing import List, Union, Callable, Optional, Literal
4
+
5
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
6
+ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
7
+ from flowfile_core.schemas import schemas
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass
12
+ class NodeStepPromise:
13
+ node_id: Union[str, int]
14
+ name: str
15
+ is_start: bool
16
+ leads_to_id: Optional[List[Union[str, int]]] = None
17
+ left_input: Optional[Union[str, int]] = None
18
+ right_input: Optional[Union[str, int]] = None
19
+ depends_on: Optional[List[Union[str, int]]] = None
20
+
21
+
22
+ class NodeStepStats:
23
+ error: str = None
24
+ has_run: bool = False
25
+ active: bool = True
26
+ is_canceled: bool = False
27
+
28
+
29
+ class NodeStepSettings:
30
+ cache_results: bool = False
31
+ renew_schema: bool = True
32
+ streamable: bool = True
33
+ setup_errors: bool = False
34
+ breaking_setup_errors: bool = False
35
+ execute_location: schemas.ExecutionLocationsLiteral = 'auto'
36
+
37
+
38
+ class NodeStepInputs:
39
+ left_input: "FlowNode" = None
40
+ right_input: "FlowNode" = None
41
+ main_inputs: List["FlowNode"] = None
42
+
43
+ @property
44
+ def input_ids(self) -> List[int]:
45
+ if self.main_inputs is not None:
46
+ return [node_input.node_information.id for node_input in self.get_all_inputs()]
47
+
48
+ def get_all_inputs(self) -> List["FlowNode"]:
49
+ main_inputs = self.main_inputs or []
50
+ return [v for v in main_inputs + [self.left_input, self.right_input] if v is not None]
51
+
52
+ def __repr__(self) -> str:
53
+ left_repr = f"Left Input: {self.left_input}" if self.left_input else "Left Input: None"
54
+ right_repr = f"Right Input: {self.right_input}" if self.right_input else "Right Input: None"
55
+ main_inputs_repr = f"Main Inputs: {self.main_inputs}" if self.main_inputs else "Main Inputs: None"
56
+ return f"{self.__class__.__name__}({left_repr}, {right_repr}, {main_inputs_repr})"
57
+
58
+ def validate_if_input_connection_exists(self, node_input_id: int,
59
+ connection_name: Literal['main', 'left', 'right']) -> bool:
60
+ if connection_name == 'main':
61
+ return any((node_input.node_information.id == node_input_id for node_input in self.main_inputs))
62
+ if connection_name == 'left':
63
+ return self.left_input.node_information.id == node_input_id
64
+ if connection_name == 'right':
65
+ return self.right_input.node_information.id == node_input_id
66
+
67
+
68
+ class NodeSchemaInformation:
69
+ result_schema: Optional[List[FlowfileColumn]] = [] # resulting schema of the function
70
+ predicted_schema: Optional[List[FlowfileColumn]] = [] # predicted resulting schema of the function
71
+ input_columns: List[str] = [] # columns that are needed for the function
72
+ drop_columns: List[str] = [] # columns that will not be available after the function
73
+ output_columns: List[FlowfileColumn] = [] # columns that will be added with the function
74
+
75
+
76
+ class NodeResults:
77
+ _resulting_data: Optional[FlowDataEngine] = None # after successful execution this will contain the Flowfile
78
+ example_data: Optional[
79
+ FlowDataEngine] = None # after success this will contain a sample of the data (to provide frontend data)
80
+ example_data_path: Optional[str] = None # Path to the arrow table file
81
+ example_data_generator: Optional[Callable[[], pa.Table]] = None
82
+ run_time: int = -1
83
+ errors: Optional[str] = None
84
+ warnings: Optional[str] = None
85
+ analysis_data_generator: Optional[Callable[[], pa.Table]] = None
86
+
87
+ def __init__(self):
88
+ self._resulting_data = None
89
+ self.example_data = None
90
+ self.run_time = -1
91
+ self.errors = None
92
+ self.warnings = None
93
+ self.example_data_generator = None
94
+ self.analysis_data_generator = None
95
+
96
+ def get_example_data(self) -> pa.Table | None:
97
+ if self.example_data_generator:
98
+ return self.example_data_generator()
99
+
100
+ @property
101
+ def resulting_data(self) -> FlowDataEngine:
102
+ return self._resulting_data
103
+
104
+ @resulting_data.setter
105
+ def resulting_data(self, d: FlowDataEngine):
106
+ self._resulting_data = d
107
+
108
+ def reset(self):
109
+ self._resulting_data = None
110
+ self.run_time = -1
111
+
@@ -0,0 +1,70 @@
1
+
2
+ from typing import Callable, Any, Optional, Generic, TypeVar
3
+ from concurrent.futures import ThreadPoolExecutor, Future
4
+ from flowfile_core.configs import logger
5
+
6
+
7
+ T = TypeVar('T')
8
+
9
+
10
+ class SingleExecutionFuture(Generic[T]):
11
+ """Single execution of a function in a separate thread with caching of the result."""
12
+ executor: ThreadPoolExecutor
13
+ future: Optional[Future[T]]
14
+ func: Callable[[], T]
15
+ on_error: Optional[Callable[[Exception], Any]]
16
+ result_value: Optional[T]
17
+
18
+ def __init__(
19
+ self,
20
+ func: Callable[[], T],
21
+ on_error: Optional[Callable[[Exception], Any]] = None
22
+ ) -> None:
23
+ """Initialize with function and optional error handler."""
24
+ self.executor = ThreadPoolExecutor(max_workers=1)
25
+ self.future = None
26
+ self.func = func
27
+ self.on_error = on_error
28
+ self.result_value = None
29
+
30
+ def start(self) -> None:
31
+ """Start the function execution if not already started."""
32
+ if not self.future:
33
+ logger.info("single executor function started")
34
+ self.future = self.executor.submit(self.func)
35
+
36
+ def cleanup(self) -> None:
37
+ """Clean up resources by clearing the future and shutting down the executor."""
38
+ # if self.future:
39
+ # self.future = None
40
+ self.executor.shutdown(wait=False)
41
+
42
+ def __call__(self) -> Optional[T]:
43
+ """Execute function if not running and return its result."""
44
+ if self.result_value:
45
+ return self.result_value
46
+ if not self.future:
47
+ self.start()
48
+ else:
49
+ logger.info("Function already running or did complete")
50
+ try:
51
+ self.result_value = self.future.result()
52
+ logger.info("Done with the function")
53
+ return self.result_value
54
+ except Exception as e:
55
+ if self.on_error:
56
+ return self.on_error(e)
57
+ else:
58
+ raise e
59
+ finally:
60
+ self.cleanup()
61
+
62
+ def reset(self):
63
+ """Reset the future and result value."""
64
+ logger.info("Resetting the future and result value")
65
+ self.result_value = None
66
+ self.future = None
67
+
68
+ def __del__(self) -> None:
69
+ """Ensure executor is shut down on deletion."""
70
+ self.cleanup()
@@ -0,0 +1,123 @@
1
+ import time
2
+ import random
3
+ import uuid
4
+ import socket
5
+ import hashlib
6
+
7
+ from dataclasses import dataclass
8
+ from typing import Dict, List
9
+ import os
10
+ from pathlib import Path
11
+
12
+ from flowfile_core.flowfile.manage.open_flowfile import open_flow
13
+ from flowfile_core.flowfile.FlowfileFlow import FlowGraph
14
+ from flowfile_core.schemas.schemas import FlowSettings
15
+ from flowfile_core.configs import logger
16
+
17
+
18
+ def create_unique_id() -> int:
19
+ """
20
+ Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
21
+ Returns:
22
+ int: unique id within 32 bits (4 bytes)
23
+ """
24
+ # Get various entropy sources
25
+ time_ms = int(time.time() * 1000)
26
+ pid = os.getpid()
27
+ random_bytes = random.getrandbits(32)
28
+ mac_addr = uuid.getnode() # MAC address as integer
29
+ hostname = socket.gethostname()
30
+
31
+ # Combine all sources into a string
32
+ seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
33
+
34
+ # Create a hash of all entropy sources
35
+ hash_obj = hashlib.md5(seed.encode())
36
+ hash_int = int(hash_obj.hexdigest(), 16)
37
+
38
+ # Ensure the result fits within 32 bits (4 bytes)
39
+ unique_id = hash_int & 0xFFFFFFFF
40
+
41
+ return unique_id
42
+
43
+
44
+ @dataclass
45
+ class FlowfileHandler:
46
+ _flows: Dict[int, FlowGraph]
47
+
48
+ def __init__(self):
49
+ self._flows = {}
50
+
51
+ @property
52
+ def flowfile_flows(self) -> List[FlowGraph]:
53
+ return list(self._flows.values())
54
+
55
+ def __add__(self, other: FlowGraph) -> int:
56
+ self._flows[other.flow_id] = other
57
+ return other.flow_id
58
+
59
+ def import_flow(self, flow_path: Path|str) -> int:
60
+ if isinstance(flow_path, str):
61
+ flow_path = Path(flow_path)
62
+ imported_flow = open_flow(flow_path)
63
+ self._flows[imported_flow.flow_id] = imported_flow
64
+ imported_flow.flow_settings = self.get_flow_info(imported_flow.flow_id)
65
+ imported_flow.flow_settings.is_running = False
66
+ return imported_flow.flow_id
67
+
68
+ def register_flow(self, flow_settings: FlowSettings):
69
+ if flow_settings.flow_id in self._flows:
70
+ self.delete_flow(flow_settings.flow_id)
71
+ raise 'flow already registered'
72
+ else:
73
+ name = flow_settings.name if flow_settings.name else flow_settings.flow_id
74
+ self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_id=flow_settings.flow_id, flow_settings=flow_settings)
75
+ return self.get_flow(flow_settings.flow_id)
76
+
77
+ def get_flow(self, flow_id: int) -> FlowGraph | None:
78
+ return self._flows.get(flow_id, None)
79
+
80
+ def delete_flow(self, flow_id: int):
81
+ flow = self._flows.pop(flow_id)
82
+ del flow
83
+
84
+ def save_flow(self, flow_id: int, flow_path: str):
85
+ flow = self.get_flow(flow_id)
86
+ if flow:
87
+ flow.save_flow(flow_path)
88
+ else:
89
+ raise Exception('Flow not found')
90
+
91
+ def add_flow(self, name: str, flow_path: str) -> int:
92
+ """
93
+ Creates a new flow with a reference to the flow path
94
+ Args:
95
+ name (str): The name of the flow
96
+ flow_path (str): The path to the flow file
97
+
98
+ Returns:
99
+ int: The flow id
100
+
101
+ """
102
+ next_id = create_unique_id()
103
+ flow_info = FlowSettings(name=name, flow_id=next_id, save_location='', path=flow_path)
104
+ _ = self.register_flow(flow_info)
105
+ return next_id
106
+
107
+ def get_flow_info(self, flow_id: int) -> FlowSettings:
108
+ flow = self.get_flow(flow_id)
109
+ if not flow:
110
+ raise Exception(f'Flow {flow_id} not found')
111
+ flow_exists = os.path.exists(flow.flow_settings.path)
112
+ last_modified_ts = os.path.getmtime(flow.flow_settings.path) if flow_exists else -1
113
+ flow.flow_settings.modified_on = last_modified_ts
114
+ return flow.flow_settings
115
+
116
+ def get_node(self, flow_id: int, node_id: int):
117
+ flow = self.get_flow(flow_id)
118
+ if not flow:
119
+ raise Exception(f'Flow {flow_id} not found')
120
+ node = flow.get_node(node_id)
121
+ if not node:
122
+ raise Exception(f'Node {node_id} not found in flow {flow_id}')
123
+ return node
File without changes
@@ -0,0 +1,70 @@
1
+ from flowfile_core.schemas import schemas, input_schema
2
+
3
+
4
+ def ensure_compatibility_node_read(node_read: input_schema.NodeRead):
5
+ if hasattr(node_read, 'received_file'):
6
+ if not hasattr(node_read.received_file, 'fields'):
7
+ print('setting fields')
8
+ setattr(node_read.received_file, 'fields', [])
9
+
10
+
11
+ def ensure_compatibility_node_output(node_output: input_schema.NodeOutput):
12
+ if hasattr(node_output, 'output_settings'):
13
+ if not hasattr(node_output.output_settings, 'abs_file_path'):
14
+ new_output_settings = input_schema.OutputSettings.model_validate(node_output.output_settings.model_dump())
15
+ setattr(node_output, 'output_settings', new_output_settings)
16
+
17
+
18
+ def ensure_compatibility_node_select(node_select: input_schema.NodeSelect):
19
+ if hasattr(node_select, 'select_input'):
20
+ if any(not hasattr(select_input, 'position') for select_input in node_select.select_input):
21
+ for _index, select_input in enumerate(node_select.select_input):
22
+ setattr(select_input, 'position', _index)
23
+ if not hasattr(node_select, 'sorted_by'):
24
+ setattr(node_select, 'sorted_by', 'none')
25
+
26
+
27
+ def ensure_compatibility_node_joins(node_settings: input_schema.NodeFuzzyMatch | input_schema.NodeJoin):
28
+ if any(not hasattr(r, 'position') for r in node_settings.join_input.right_select.renames):
29
+ for _index, select_input in enumerate(node_settings.join_input.right_select.renames +
30
+ node_settings.join_input.left_select.renames):
31
+ setattr(select_input, 'position', _index)
32
+
33
+
34
+ def ensure_description(node: input_schema.NodeBase):
35
+ if not hasattr(node, 'description'):
36
+ setattr(node, 'description', '')
37
+
38
+
39
+ def ensure_compatibility_node_polars(node_polars: input_schema.NodePolarsCode):
40
+ if hasattr(node_polars, 'depending_on_id'):
41
+ setattr(node_polars, 'depending_on_ids', [getattr(node_polars, 'depending_on_id')])
42
+
43
+
44
+ def ensure_compatibility(flow_storage_obj: schemas.FlowInformation, flow_path: str):
45
+ if not hasattr(flow_storage_obj, 'flow_settings'):
46
+ flow_settings = schemas.FlowSettings(flow_id=flow_storage_obj.flow_id, path=flow_path,
47
+ name=flow_storage_obj.flow_name)
48
+ setattr(flow_storage_obj, 'flow_settings', flow_settings)
49
+ flow_storage_obj = schemas.FlowInformation.model_validate(flow_storage_obj)
50
+ elif not hasattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location'):
51
+ setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location', 'auto')
52
+ elif not hasattr(flow_storage_obj.flow_settings, 'is_running'):
53
+ setattr(flow_storage_obj.flow_settings, 'is_running', False)
54
+ setattr(flow_storage_obj.flow_settings, 'is_canceled', False)
55
+ if not hasattr(flow_storage_obj.flow_settings, 'show_detailed_progress'):
56
+ setattr(flow_storage_obj.flow_settings, 'show_detailed_progress', True)
57
+ for _id, node_information in flow_storage_obj.data.items():
58
+ if not hasattr(node_information, 'setting_input'):
59
+ continue
60
+ if node_information.setting_input.__class__.__name__ == 'NodeRead':
61
+ ensure_compatibility_node_read(node_information.setting_input)
62
+ elif node_information.setting_input.__class__.__name__ == 'NodeSelect':
63
+ ensure_compatibility_node_select(node_information.setting_input)
64
+ elif node_information.setting_input.__class__.__name__ == 'NodeOutput':
65
+ ensure_compatibility_node_output(node_information.setting_input)
66
+ elif node_information.setting_input.__class__.__name__ in ('NodeJoin', 'NodeFuzzyMatch'):
67
+ ensure_compatibility_node_joins(node_information.setting_input)
68
+ elif node_information.setting_input.__class__.__name__ == 'NodePolarsCode':
69
+ ensure_compatibility_node_polars(node_information.setting_input)
70
+ ensure_description(node_information.setting_input)
File without changes
@@ -0,0 +1,136 @@
1
+ from flowfile_core.schemas import schemas, input_schema
2
+ from typing import List, Tuple
3
+ from flowfile_core.flowfile.manage.compatibility_enhancements import ensure_compatibility
4
+ import pickle
5
+ from flowfile_core.flowfile.FlowfileFlow import FlowGraph
6
+ from pathlib import Path
7
+
8
+
9
+ def determine_insertion_order(node_storage: schemas.FlowInformation):
10
+ ingest_order: List[int] = []
11
+ ingest_order_set: set[int] = set()
12
+ all_nodes = set(node_storage.data.keys())
13
+
14
+ def assure_output_id(input_node: schemas.NodeInformation, output_node: schemas.NodeInformation):
15
+ # assure the output id is in the list with outputs of the input node this is a quick fix
16
+ if output_node.id not in input_node.outputs:
17
+ input_node.outputs.append(output_node.id)
18
+
19
+ def determine_order(node_id: int):
20
+ current_node = node_storage.data.get(node_id)
21
+ if current_node is None:
22
+ return
23
+ output_ids = current_node.outputs
24
+ main_input_ids = current_node.input_ids if current_node.input_ids else []
25
+ input_ids = [n for n in [current_node.left_input_id,
26
+ current_node.right_input_id] + main_input_ids if (n is not None
27
+ and n not in
28
+ ingest_order_set)]
29
+ if len(input_ids) > 0:
30
+ for input_id in input_ids:
31
+ new_node = node_storage.data.get(input_id)
32
+ if new_node is None:
33
+ ingest_order.append(current_node.id)
34
+ ingest_order_set.add(current_node.id)
35
+ continue
36
+ assure_output_id(new_node, current_node)
37
+ if new_node.id not in ingest_order_set:
38
+ determine_order(input_id)
39
+ elif current_node.id not in ingest_order_set:
40
+ ingest_order.append(current_node.id)
41
+ ingest_order_set.add(current_node.id)
42
+
43
+ for output_id in output_ids:
44
+ if output_id not in ingest_order_set:
45
+ determine_order(output_id)
46
+
47
+ if len(node_storage.node_starts) > 0:
48
+ determine_order(node_storage.node_starts[0])
49
+ # add the random not connected nodes
50
+ else:
51
+ for node_id in all_nodes:
52
+ determine_order(node_id)
53
+ ingest_order += list(all_nodes - ingest_order_set)
54
+ return ingest_order
55
+
56
+
57
+ def open_flow(flow_path: Path) -> FlowGraph:
58
+ """
59
+ Open a flowfile from a given path
60
+ Args:
61
+ flow_path (Path): The absolute or relative path to the flowfile
62
+
63
+ Returns:
64
+ FlowGraph: The flowfile object
65
+ """
66
+ with open(str(flow_path), 'rb') as f:
67
+ flow_storage_obj: schemas.FlowInformation = pickle.load(f)
68
+ flow_storage_obj.flow_settings.path = str(flow_path)
69
+ flow_storage_obj.flow_settings.name = str(flow_path.stem)
70
+ flow_storage_obj.flow_name = str(flow_path.stem)
71
+ ensure_compatibility(flow_storage_obj, str(flow_path))
72
+ ingestion_order = determine_insertion_order(flow_storage_obj)
73
+ new_flow = FlowGraph(flow_id=flow_storage_obj.flow_id, name=flow_storage_obj.flow_name,
74
+ flow_settings=flow_storage_obj.flow_settings)
75
+ for node_id in ingestion_order:
76
+ node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
77
+ node_promise = input_schema.NodePromise(flow_id=new_flow.flow_id, node_id=node_info.id,
78
+ pos_x=node_info.x_position, pos_y=node_info.y_position,
79
+ node_type=node_info.type)
80
+ if hasattr(node_info.setting_input, 'cache_results'):
81
+ node_promise.cache_results = node_info.setting_input.cache_results
82
+ new_flow.add_node_promise(node_promise)
83
+ for node_id in ingestion_order:
84
+ node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
85
+ getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
86
+ from_node = new_flow.get_node(node_id)
87
+ for output_node_id in node_info.outputs:
88
+ to_node = new_flow.get_node(output_node_id)
89
+ if to_node is not None:
90
+ output_node_obj = flow_storage_obj.data[output_node_id]
91
+ is_left_input = (output_node_obj.left_input_id == node_id) and (to_node.left_input.node_id != node_id
92
+ if to_node.left_input is not None
93
+ else True)
94
+ is_right_input = (output_node_obj.right_input_id == node_id) and (to_node.right_input.node_id != node_id
95
+ if to_node.right_input is not None
96
+ else True)
97
+ is_main_input = node_id in output_node_obj.input_ids
98
+ if is_left_input:
99
+ insert_type = 'left'
100
+ elif is_right_input:
101
+ insert_type = 'right'
102
+ elif is_main_input:
103
+ insert_type = 'main'
104
+ else:
105
+ continue
106
+ to_node.add_node_connection(from_node, insert_type)
107
+ else:
108
+ from_node.delete_lead_to_node(output_node_id)
109
+ if not (from_node.node_id, output_node_id) in flow_storage_obj.node_connections:
110
+ continue
111
+ flow_storage_obj.node_connections.pop(
112
+ flow_storage_obj.node_connections.index((from_node.node_id, output_node_id)))
113
+
114
+ for missing_connection in set(flow_storage_obj.node_connections) - set(new_flow.node_connections):
115
+ to_node = new_flow.get_node(missing_connection[1])
116
+ if not to_node.has_input:
117
+ test_if_circular_connection(missing_connection, new_flow)
118
+ from_node = new_flow.get_node(missing_connection[0])
119
+ if from_node:
120
+ to_node.add_node_connection(from_node)
121
+ return new_flow
122
+
123
+
124
+ def test_if_circular_connection(connection: Tuple[int, int], flow: FlowGraph):
125
+ to_node = flow.get_node(connection[1])
126
+ leads_to_nodes_queue = [n for n in to_node.leads_to_nodes]
127
+ circular_connection: bool = False
128
+ while len(leads_to_nodes_queue) > 0:
129
+ leads_to_node = leads_to_nodes_queue.pop(0)
130
+ if leads_to_node.node_id == connection[0]:
131
+ circular_connection = True
132
+ break
133
+ for leads_to_node_leads_to in leads_to_node.leads_to_nodes:
134
+ leads_to_nodes_queue.append(leads_to_node_leads_to)
135
+ return circular_connection
136
+
@@ -0,0 +1,2 @@
1
+
2
+ from flowfile_core.flowfile.setting_generator.settings import setting_generator, setting_updator
@@ -0,0 +1,41 @@
1
+ from flowfile_core.configs import logger
2
+ from typing import Callable
3
+
4
+
5
+ class SettingGenerator:
6
+ setting_generator_set = set()
7
+
8
+ def __init__(self):
9
+ self.setting_generator_set = set()
10
+
11
+ def add_setting_generator_func(self, f: Callable):
12
+ self.setting_generator_set.update([f.__name__])
13
+ setattr(self, f.__name__, f)
14
+
15
+ def get_setting_generator(self, node_type: str) -> Callable:
16
+ logger.info('getting setting generator for ' + node_type)
17
+
18
+ if node_type in self.setting_generator_set:
19
+ logger.info('setting generator found')
20
+ return getattr(self, node_type)
21
+ else:
22
+ return lambda x: x
23
+
24
+
25
+ class SettingUpdator:
26
+ setting_updator_set = set()
27
+
28
+ def __init__(self):
29
+ self.setting_updator_set = set()
30
+
31
+ def add_setting_updator_func(self, f: Callable):
32
+ self.setting_updator_set.update([f.__name__])
33
+ setattr(self, f.__name__, f)
34
+
35
+ def get_setting_updator(self, node_type: str) -> Callable:
36
+ logger.info('getting setting updator for ' + node_type)
37
+ if node_type in self.setting_updator_set:
38
+ logger.info('setting updator found')
39
+ return getattr(self, node_type)
40
+ else:
41
+ return lambda x: x