Flowfile 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (171) hide show
  1. build_backends/__init__.py +0 -0
  2. build_backends/main.py +313 -0
  3. build_backends/main_prd.py +202 -0
  4. flowfile/__init__.py +71 -0
  5. flowfile/__main__.py +24 -0
  6. flowfile-0.2.2.dist-info/LICENSE +21 -0
  7. flowfile-0.2.2.dist-info/METADATA +225 -0
  8. flowfile-0.2.2.dist-info/RECORD +171 -0
  9. flowfile-0.2.2.dist-info/WHEEL +4 -0
  10. flowfile-0.2.2.dist-info/entry_points.txt +9 -0
  11. flowfile_core/__init__.py +13 -0
  12. flowfile_core/auth/__init__.py +0 -0
  13. flowfile_core/auth/jwt.py +140 -0
  14. flowfile_core/auth/models.py +40 -0
  15. flowfile_core/auth/secrets.py +178 -0
  16. flowfile_core/configs/__init__.py +35 -0
  17. flowfile_core/configs/flow_logger.py +433 -0
  18. flowfile_core/configs/node_store/__init__.py +0 -0
  19. flowfile_core/configs/node_store/nodes.py +98 -0
  20. flowfile_core/configs/settings.py +120 -0
  21. flowfile_core/database/__init__.py +0 -0
  22. flowfile_core/database/connection.py +51 -0
  23. flowfile_core/database/init_db.py +45 -0
  24. flowfile_core/database/models.py +41 -0
  25. flowfile_core/fileExplorer/__init__.py +0 -0
  26. flowfile_core/fileExplorer/funcs.py +259 -0
  27. flowfile_core/fileExplorer/utils.py +53 -0
  28. flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  29. flowfile_core/flowfile/__init__.py +0 -0
  30. flowfile_core/flowfile/_extensions/__init__.py +0 -0
  31. flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  32. flowfile_core/flowfile/analytics/__init__.py +0 -0
  33. flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  34. flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  35. flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  36. flowfile_core/flowfile/analytics/utils.py +9 -0
  37. flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  38. flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  39. flowfile_core/flowfile/connection_manager/models.py +10 -0
  40. flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  41. flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  42. flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  43. flowfile_core/flowfile/extensions.py +36 -0
  44. flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  45. flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  46. flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  47. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  48. flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  49. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  50. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  51. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  52. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  53. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  54. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  55. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  56. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  57. flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  58. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  59. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  60. flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  61. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  62. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  63. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  64. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  65. flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  66. flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  67. flowfile_core/flowfile/flow_node/__init__.py +0 -0
  68. flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  69. flowfile_core/flowfile/flow_node/models.py +111 -0
  70. flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  71. flowfile_core/flowfile/handler.py +123 -0
  72. flowfile_core/flowfile/manage/__init__.py +0 -0
  73. flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  74. flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  75. flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  76. flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  77. flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  78. flowfile_core/flowfile/setting_generator/settings.py +176 -0
  79. flowfile_core/flowfile/sources/__init__.py +0 -0
  80. flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  81. flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  82. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  83. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  84. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  85. flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  86. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  87. flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  88. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  89. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  90. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  91. flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  92. flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  93. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  94. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  95. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  96. flowfile_core/flowfile/util/__init__.py +0 -0
  97. flowfile_core/flowfile/util/calculate_layout.py +137 -0
  98. flowfile_core/flowfile/util/execution_orderer.py +141 -0
  99. flowfile_core/flowfile/utils.py +106 -0
  100. flowfile_core/main.py +138 -0
  101. flowfile_core/routes/__init__.py +0 -0
  102. flowfile_core/routes/auth.py +34 -0
  103. flowfile_core/routes/logs.py +163 -0
  104. flowfile_core/routes/public.py +10 -0
  105. flowfile_core/routes/routes.py +601 -0
  106. flowfile_core/routes/secrets.py +85 -0
  107. flowfile_core/run_lock.py +11 -0
  108. flowfile_core/schemas/__init__.py +0 -0
  109. flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  110. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  111. flowfile_core/schemas/defaults.py +9 -0
  112. flowfile_core/schemas/external_sources/__init__.py +0 -0
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  114. flowfile_core/schemas/input_schema.py +477 -0
  115. flowfile_core/schemas/models.py +193 -0
  116. flowfile_core/schemas/output_model.py +115 -0
  117. flowfile_core/schemas/schemas.py +106 -0
  118. flowfile_core/schemas/transform_schema.py +569 -0
  119. flowfile_core/secrets/__init__.py +0 -0
  120. flowfile_core/secrets/secrets.py +64 -0
  121. flowfile_core/utils/__init__.py +0 -0
  122. flowfile_core/utils/arrow_reader.py +247 -0
  123. flowfile_core/utils/excel_file_manager.py +18 -0
  124. flowfile_core/utils/fileManager.py +45 -0
  125. flowfile_core/utils/fl_executor.py +38 -0
  126. flowfile_core/utils/utils.py +8 -0
  127. flowfile_frame/__init__.py +56 -0
  128. flowfile_frame/__main__.py +12 -0
  129. flowfile_frame/adapters.py +17 -0
  130. flowfile_frame/expr.py +1163 -0
  131. flowfile_frame/flow_frame.py +2093 -0
  132. flowfile_frame/group_frame.py +199 -0
  133. flowfile_frame/join.py +75 -0
  134. flowfile_frame/selectors.py +242 -0
  135. flowfile_frame/utils.py +184 -0
  136. flowfile_worker/__init__.py +55 -0
  137. flowfile_worker/configs.py +95 -0
  138. flowfile_worker/create/__init__.py +37 -0
  139. flowfile_worker/create/funcs.py +146 -0
  140. flowfile_worker/create/models.py +86 -0
  141. flowfile_worker/create/pl_types.py +35 -0
  142. flowfile_worker/create/read_excel_tables.py +110 -0
  143. flowfile_worker/create/utils.py +84 -0
  144. flowfile_worker/external_sources/__init__.py +0 -0
  145. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  146. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  147. flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  148. flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  149. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  150. flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  151. flowfile_worker/external_sources/sql_source/main.py +56 -0
  152. flowfile_worker/external_sources/sql_source/models.py +72 -0
  153. flowfile_worker/flow_logger.py +58 -0
  154. flowfile_worker/funcs.py +327 -0
  155. flowfile_worker/main.py +108 -0
  156. flowfile_worker/models.py +95 -0
  157. flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  158. flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  159. flowfile_worker/polars_fuzzy_match/models.py +36 -0
  160. flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  161. flowfile_worker/polars_fuzzy_match/process.py +86 -0
  162. flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  163. flowfile_worker/process_manager.py +36 -0
  164. flowfile_worker/routes.py +440 -0
  165. flowfile_worker/secrets.py +148 -0
  166. flowfile_worker/spawner.py +187 -0
  167. flowfile_worker/utils.py +25 -0
  168. test_utils/__init__.py +3 -0
  169. test_utils/postgres/__init__.py +1 -0
  170. test_utils/postgres/commands.py +109 -0
  171. test_utils/postgres/fixtures.py +417 -0
@@ -0,0 +1,178 @@
1
+ """
2
+ Secure storage module for FlowFile credentials and secrets.
3
+ """
4
+ from cryptography.fernet import Fernet
5
+ import os
6
+ from pathlib import Path
7
+ import json
8
+ import logging
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SecureStorage:
14
+ """A secure local storage mechanism for secrets using Fernet encryption."""
15
+
16
+ def __init__(self):
17
+ env = os.environ.get("FLOWFILE_MODE")
18
+ logger.debug(f'Using secure storage in {env} mode')
19
+ if os.environ.get("FLOWFILE_MODE") == "electron":
20
+ app_data = os.environ.get("APPDATA") or os.path.expanduser("~/.config")
21
+ self.storage_path = Path(app_data) / "flowfile"
22
+ else:
23
+ self.storage_path = Path(os.environ.get("SECURE_STORAGE_PATH", "/tmp/.flowfile"))
24
+ self.storage_path.mkdir(parents=True, exist_ok=True)
25
+ logger.debug(f"Using SECURE_STORAGE_PATH: {self.storage_path}")
26
+ try:
27
+ os.chmod(self.storage_path, 0o700)
28
+ except Exception as e:
29
+ logger.debug(f"Could not set permissions on storage directory: {e}")
30
+
31
+ self.key_path = self.storage_path / ".secret_key"
32
+ if not self.key_path.exists():
33
+ with open(self.key_path, "wb") as f:
34
+ f.write(Fernet.generate_key())
35
+ try:
36
+ os.chmod(self.key_path, 0o600)
37
+ except Exception as e:
38
+ logger.debug(f"Could not set permissions on key file: {e}")
39
+
40
+ def _get_store_path(self, service_name):
41
+ """Get the path to the encrypted store file for a service."""
42
+ return self.storage_path / f"{service_name}.json.enc"
43
+
44
+ def _read_store(self, service_name):
45
+ """Read and decrypt the store file for a service."""
46
+ path = self._get_store_path(service_name)
47
+ if not path.exists():
48
+ return {}
49
+
50
+ try:
51
+ with open(self.key_path, "rb") as f:
52
+ key = f.read()
53
+ with open(path, "rb") as f:
54
+ data = f.read()
55
+
56
+ return json.loads(Fernet(key).decrypt(data).decode())
57
+ except Exception as e:
58
+ logger.debug(f"Error reading from encrypted store: {e}")
59
+ return {}
60
+
61
+ def _write_store(self, service_name, data):
62
+ """Encrypt and write data to the store file for a service."""
63
+ try:
64
+ with open(self.key_path, "rb") as f:
65
+ key = f.read()
66
+
67
+ encrypted = Fernet(key).encrypt(json.dumps(data).encode())
68
+ path = self._get_store_path(service_name)
69
+
70
+ with open(path, "wb") as f:
71
+ f.write(encrypted)
72
+ try:
73
+ os.chmod(path, 0o600)
74
+ except Exception as e:
75
+ logger.debug(f"Could not set permissions on store file: {e}")
76
+ except Exception as e:
77
+ logger.error(f"Failed to write to secure store: {e}")
78
+
79
+ def get_password(self, service_name, username):
80
+ """Retrieve a password from secure storage."""
81
+ store = self._read_store(service_name)
82
+ return store.get(username)
83
+
84
+ def set_password(self, service_name, username, password):
85
+ """Store a password in secure storage."""
86
+ store = self._read_store(service_name)
87
+ store[username] = password
88
+ self._write_store(service_name, store)
89
+
90
+ def delete_password(self, service_name, username):
91
+ """Delete a password from secure storage."""
92
+ store = self._read_store(service_name)
93
+ if username in store:
94
+ del store[username]
95
+ self._write_store(service_name, store)
96
+
97
+
98
+ _storage = SecureStorage()
99
+
100
+
101
+ def get_password(service_name, username):
102
+ """
103
+ Retrieve a password from secure storage.
104
+
105
+ Args:
106
+ service_name: The name of the service
107
+ username: The username or key
108
+
109
+ Returns:
110
+ The stored password or None if not found
111
+ """
112
+ return _storage.get_password(service_name, username)
113
+
114
+
115
+ def set_password(service_name, username, password):
116
+ """
117
+ Store a password in secure storage.
118
+
119
+ Args:
120
+ service_name: The name of the service
121
+ username: The username or key
122
+ password: The password or secret to store
123
+ """
124
+ _storage.set_password(service_name, username, password)
125
+
126
+
127
+ def delete_password(service_name, username):
128
+ """
129
+ Delete a password from secure storage.
130
+
131
+ Args:
132
+ service_name: The name of the service
133
+ username: The username or key to delete
134
+ """
135
+ _storage.delete_password(service_name, username)
136
+
137
+
138
+ def get_docker_secret_key():
139
+ """
140
+ Get the master key from Docker secret.
141
+
142
+ Returns:
143
+ str: The master key if successfully read from Docker secret.
144
+
145
+ Raises:
146
+ RuntimeError: If running in Docker but unable to access the secret.
147
+ """
148
+ secret_path = "/run/secrets/flowfile_master_key"
149
+ if os.path.exists(secret_path):
150
+ try:
151
+ with open(secret_path, "r") as f:
152
+ return f.read().strip()
153
+ except Exception as e:
154
+ logger.error(f"Failed to read master key from Docker secret: {e}")
155
+ raise RuntimeError("Failed to read master key from Docker secret")
156
+ else:
157
+ logger.critical("Running in Docker but flowfile_master_key secret is not mounted!")
158
+ raise RuntimeError("Docker secret 'flowfile_master_key' is not mounted")
159
+
160
+
161
+ def get_master_key():
162
+ """
163
+ Get or generate the master encryption key.
164
+
165
+ If running in Docker, retrieves the key from Docker secrets.
166
+ Otherwise, retrieves or generates a key using the secure storage.
167
+
168
+ Returns:
169
+ str: The master encryption key
170
+ """
171
+ if os.environ.get("RUNNING_IN_DOCKER") == "true":
172
+ return get_docker_secret_key()
173
+
174
+ key = get_password("flowfile", "master_key")
175
+ if not key:
176
+ key = Fernet.generate_key().decode()
177
+ set_password("flowfile", "master_key", key)
178
+ return key
@@ -0,0 +1,35 @@
1
+ # flowfile_core/flowfile_core/configs/__init__.py
2
+ import logging
3
+ import sys
4
+ from pathlib import Path
5
+ import os
6
+
7
+ os.environ["FLOWFILE_MODE"] = "electron"
8
+
9
+ # Create and configure the logger
10
+ logger = logging.getLogger('PipelineHandler')
11
+ logger.setLevel(logging.INFO)
12
+ logger.propagate = False
13
+
14
+ # Create console handler with a specific format
15
+ console_handler = logging.StreamHandler(sys.stdout)
16
+ console_handler.setLevel(logging.INFO)
17
+
18
+ # Create formatter
19
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
20
+ console_handler.setFormatter(formatter)
21
+
22
+ if logger.hasHandlers():
23
+ logger.handlers.clear()
24
+ logger.addHandler(console_handler)
25
+
26
+ # Create logs directory in temp at startup
27
+ try:
28
+ from tempfile import gettempdir
29
+ log_dir = Path(gettempdir()) / "flowfile_logs"
30
+ log_dir.mkdir(exist_ok=True)
31
+ except Exception as e:
32
+ logger.warning(f"Failed to create logs directory: {e}")
33
+
34
+ # Initialize vault
35
+ logger.info("Logging system initialized")
@@ -0,0 +1,433 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from datetime import datetime
4
+ from flowfile_core.configs.settings import get_temp_dir
5
+ import os
6
+ import logging.handlers
7
+ import queue
8
+ import threading
9
+
10
+ _process_safe_queue = queue.Queue(-1)
11
+ main_logger = logging.getLogger('PipelineHandler')
12
+
13
+
14
+ class NodeLogger:
15
+ """Logger for individual flow nodes"""
16
+
17
+ def __init__(self, node_id: str | int, flow_id: int, flow_logger_parent: "FlowLogger"):
18
+ self.flow_id = flow_id
19
+ self.node_id = node_id
20
+ self.flow_logger_parent = flow_logger_parent
21
+
22
+ @property
23
+ def logger(self) -> logging.Logger:
24
+ return self.flow_logger_parent.logger
25
+
26
+ def info(self, msg: str):
27
+ self.logger.info(f"Node ID: {self.node_id} - {msg}")
28
+
29
+ def error(self, msg: str):
30
+ self.logger.error(f"Node ID: {self.node_id} - {msg}")
31
+
32
+ def warning(self, msg: str):
33
+ self.logger.warning(f"Node ID: {self.node_id} - {msg}")
34
+
35
+ def debug(self, msg: str):
36
+ self.logger.debug(f"Node ID: {self.node_id} - {msg}")
37
+
38
+
39
+ class FlowLogger:
40
+ """Thread-safe logger for flow execution"""
41
+ _instances = {}
42
+ _instances_lock = threading.RLock()
43
+ _queue_listener = None
44
+ _queue_listener_lock = threading.Lock()
45
+
46
+ @staticmethod
47
+ def handle_extra_log_info(flow_id: int, extra: dict = None) -> dict:
48
+ if extra is None:
49
+ extra = {}
50
+ extra['flow_id'] = flow_id
51
+ return extra
52
+
53
+ def __new__(cls, flow_id: int, clear_existing_logs: bool = False):
54
+ with cls._instances_lock:
55
+ if flow_id not in cls._instances:
56
+ instance = super().__new__(cls)
57
+ instance._initialize(flow_id, clear_existing_logs)
58
+ cls._instances[flow_id] = instance
59
+ else:
60
+ instance = cls._instances[flow_id]
61
+ if clear_existing_logs:
62
+ instance.clear_log_file()
63
+ return instance
64
+
65
+ def _initialize(self, flow_id: int, clear_existing_logs: bool):
66
+ self.flow_id = flow_id
67
+ self._logger = None
68
+ self.log_file_path = get_flow_log_file(self.flow_id)
69
+ self._file_lock = threading.RLock()
70
+ self._setup_new_logger()
71
+
72
+ with self._queue_listener_lock:
73
+ if not FlowLogger._queue_listener:
74
+ FlowLogger._start_queue_listener()
75
+
76
+ def _setup_new_logger(self):
77
+ """Creates a new logger instance with appropriate handlers"""
78
+ logger_name = f'FlowExecution.{self.flow_id}'
79
+ self._logger = logging.getLogger(logger_name)
80
+ self._logger.setLevel(logging.INFO)
81
+ self.setup_logging()
82
+
83
+ @property
84
+ def logger(self):
85
+ """Get the logger instance"""
86
+ if self._logger is None:
87
+ self._setup_new_logger()
88
+ return self._logger
89
+
90
+ def cleanup_self(self):
91
+ """Clean up just this logger instance (not global)"""
92
+ # Try with non-blocking lock first
93
+ if self._file_lock.acquire(blocking=False):
94
+ try:
95
+ self._cleanup_handlers()
96
+ finally:
97
+ self._file_lock.release()
98
+ else:
99
+ # If we can't get lock, proceed anyway
100
+ main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with cleanup anyway")
101
+ self._cleanup_handlers()
102
+
103
+ def _cleanup_handlers(self):
104
+ """Close and remove all handlers"""
105
+ if self._logger:
106
+ for handler in self._logger.handlers[:]:
107
+ try:
108
+ handler.close()
109
+ self._logger.removeHandler(handler)
110
+ except Exception as e:
111
+ main_logger.error(f"Error closing handler: {e}")
112
+
113
+ def recreate_self(self):
114
+ """Recreate this logger instance after cleanup"""
115
+ # Try with non-blocking lock first
116
+ if self._file_lock.acquire(blocking=False):
117
+ try:
118
+ self._recreate_impl()
119
+ finally:
120
+ self._file_lock.release()
121
+ else:
122
+ # If we can't get lock, proceed anyway
123
+ main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with recreation anyway")
124
+ self._recreate_impl()
125
+
126
+ def _recreate_impl(self):
127
+ """Implementation of recreate operation"""
128
+ # Make sure the log directory exists
129
+ log_dir = Path(self.log_file_path).parent
130
+ log_dir.mkdir(exist_ok=True, parents=True)
131
+
132
+ try:
133
+ # Create an empty file
134
+ with open(self.log_file_path, 'w') as f:
135
+ pass
136
+
137
+ # Re-setup the logger
138
+ self._setup_new_logger()
139
+ main_logger.info(f"Log file was recreated for flow {self.flow_id}")
140
+ except Exception as e:
141
+ main_logger.error(f"Error recreating log file for flow {self.flow_id}: {e}")
142
+
143
+ def refresh_logger_if_needed(self):
144
+ """Check if log file exists and refresh logger if needed"""
145
+ if not os.path.exists(self.log_file_path):
146
+ main_logger.info(f"Log file missing, recreating: {self.log_file_path}")
147
+ self.cleanup_self()
148
+ self.recreate_self()
149
+ return True
150
+ return False
151
+
152
+ @classmethod
153
+ def _start_queue_listener(cls):
154
+ """Start the queue listener for asynchronous logging"""
155
+ queue_handler = logging.handlers.QueueHandler(_process_safe_queue)
156
+ cls._queue_listener = logging.handlers.QueueListener(
157
+ _process_safe_queue,
158
+ queue_handler,
159
+ respect_handler_level=True
160
+ )
161
+ cls._queue_listener.start()
162
+
163
+ def setup_logging(self):
164
+ """Set up file handlers for logging"""
165
+ if self._file_lock.acquire(blocking=False):
166
+ try:
167
+ self._setup_logging_impl()
168
+ finally:
169
+ self._file_lock.release()
170
+ else:
171
+ # Try with timeout
172
+ if self._file_lock.acquire(timeout=1):
173
+ try:
174
+ self._setup_logging_impl()
175
+ finally:
176
+ self._file_lock.release()
177
+ else:
178
+ # If still can't get lock, proceed anyway
179
+ main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with setup anyway")
180
+ self._setup_logging_impl()
181
+
182
+ def _setup_logging_impl(self):
183
+ """Implementation of setup_logging without lock handling"""
184
+ # Remove existing handlers
185
+ for handler in self._logger.handlers[:]:
186
+ if isinstance(handler, logging.FileHandler):
187
+ handler.close()
188
+ self._logger.removeHandler(handler)
189
+
190
+ # Make sure the log directory exists
191
+ log_dir = Path(self.log_file_path).parent
192
+ log_dir.mkdir(exist_ok=True, parents=True)
193
+
194
+ # Add file handler
195
+ file_handler = logging.FileHandler(self.log_file_path)
196
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
197
+ file_handler.setFormatter(formatter)
198
+ self._logger.addHandler(file_handler)
199
+
200
+ def clear_log_file(self):
201
+ """Clear the log file for this flow"""
202
+ if self._file_lock.acquire(blocking=False):
203
+ try:
204
+ self._clear_log_impl()
205
+ finally:
206
+ self._file_lock.release()
207
+ else:
208
+ # If can't get lock, try with timeout
209
+ if self._file_lock.acquire(timeout=1):
210
+ try:
211
+ self._clear_log_impl()
212
+ finally:
213
+ self._file_lock.release()
214
+ else:
215
+ # If still can't get lock, proceed anyway
216
+ main_logger.warning(
217
+ f"Could not acquire lock for flow {self.flow_id}, proceeding with file clearing anyway")
218
+ self._clear_log_impl()
219
+
220
+ def _clear_log_impl(self):
221
+ """Implementation of clear_log_file without lock handling"""
222
+ try:
223
+ # Ensure parent directory exists
224
+ self.refresh_logger_if_needed()
225
+ # Truncate file
226
+ with open(self.log_file_path, 'w') as f:
227
+ pass
228
+ main_logger.info(f"Log file cleared for flow {self.flow_id}")
229
+ except Exception as e:
230
+ main_logger.error(f"Error clearing log file {self.log_file_path}: {e}")
231
+
232
+ @classmethod
233
+ def cleanup_instance(cls, flow_id: int):
234
+ """Clean up a specific flow logger instance"""
235
+ with cls._instances_lock:
236
+ if flow_id in cls._instances:
237
+ instance = cls._instances[flow_id]
238
+ instance.cleanup_logging()
239
+ del cls._instances[flow_id]
240
+
241
+ def cleanup_logging(self):
242
+ """Clean up logging for this flow"""
243
+ if self._file_lock.acquire(blocking=False):
244
+ try:
245
+ self._cleanup_handlers()
246
+ finally:
247
+ self._file_lock.release()
248
+ else:
249
+ # If can't get lock, proceed anyway
250
+ main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with cleanup anyway")
251
+ self._cleanup_handlers()
252
+
253
+ @classmethod
254
+ def get_instance(cls, flow_id: int):
255
+ """Get an existing flow logger instance without creating a new one"""
256
+ with cls._instances_lock:
257
+ return cls._instances.get(flow_id)
258
+
259
+ def get_node_logger(self, node_id: str | int) -> NodeLogger:
260
+ """Get a logger for a specific node in this flow"""
261
+ return NodeLogger(node_id, flow_id=self.flow_id, flow_logger_parent=self)
262
+
263
+ # Logging methods with automatic refresh
264
+ def info(self, msg: str, extra: dict = None, node_id: str | int = -1):
265
+ self.refresh_logger_if_needed()
266
+ if node_id != -1:
267
+ msg = f"Node ID: {node_id} - {msg}"
268
+ extra = self.handle_extra_log_info(self.flow_id, extra)
269
+ self.logger.info(msg, extra=extra)
270
+
271
+ def error(self, msg: str, extra: dict = None, node_id: str | int = -1):
272
+ self.refresh_logger_if_needed()
273
+ if node_id != -1:
274
+ msg = f"Node ID: {node_id} - {msg}"
275
+ extra = self.handle_extra_log_info(self.flow_id, extra)
276
+ self.logger.error(msg, extra=extra)
277
+
278
+ def warning(self, msg: str, extra: dict = None, node_id: str | int = -1):
279
+ self.refresh_logger_if_needed()
280
+ if node_id != -1:
281
+ msg = f"Node ID: {node_id} - {msg}"
282
+ extra = self.handle_extra_log_info(self.flow_id, extra)
283
+ self.logger.warning(msg, extra=extra)
284
+
285
+ def debug(self, msg: str, extra: dict = None, node_id: str | int = -1):
286
+ self.refresh_logger_if_needed()
287
+ if node_id != -1:
288
+ msg = f"Node ID: {node_id} - {msg}"
289
+ extra = self.handle_extra_log_info(self.flow_id, extra)
290
+ self.logger.debug(msg, extra=extra)
291
+
292
+ def get_log_filepath(self):
293
+ """Get the path to the log file for this flow"""
294
+ return str(self.log_file_path)
295
+
296
+ def read_from_line(self, start_line: int = 0):
297
+ """Read log content starting from a specific line"""
298
+ # Refresh logger if needed before reading
299
+ self.refresh_logger_if_needed()
300
+
301
+ if self._file_lock.acquire(blocking=False):
302
+ try:
303
+ return read_log_from_line(self.log_file_path, start_line)
304
+ finally:
305
+ self._file_lock.release()
306
+ else:
307
+ # Reading is safe without lock
308
+ return read_log_from_line(self.log_file_path, start_line)
309
+
310
+ @classmethod
311
+ def refresh_all_loggers(cls):
312
+ """Refresh all loggers that need it"""
313
+ with cls._instances_lock:
314
+ for flow_id, instance in cls._instances.items():
315
+ try:
316
+ instance.refresh_logger_if_needed()
317
+ except Exception as e:
318
+ main_logger.error(f"Error refreshing logger for flow {flow_id}: {e}")
319
+
320
+ @classmethod
321
+ def global_cleanup(cls):
322
+ """Cleanup all loggers, handlers and queue listener."""
323
+ with cls._instances_lock:
324
+ # Get a copy of keys to avoid modification during iteration
325
+ flow_ids = list(cls._instances.keys())
326
+
327
+ # Cleanup all instances
328
+ for flow_id in flow_ids:
329
+ try:
330
+ cls.cleanup_instance(flow_id)
331
+ except Exception as e:
332
+ main_logger.error(f"Error cleaning up instance for flow {flow_id}: {e}")
333
+
334
+ # Stop queue listener
335
+ with cls._queue_listener_lock:
336
+ if cls._queue_listener:
337
+ try:
338
+ cls._queue_listener.stop()
339
+ cls._queue_listener = None
340
+ except Exception as e:
341
+ main_logger.error(f"Error stopping queue listener: {e}")
342
+
343
+ # Clear instances
344
+ cls._instances.clear()
345
+
346
+ def __del__(self):
347
+ """Cleanup instance on deletion."""
348
+ try:
349
+ self.cleanup_instance(self.flow_id)
350
+ except:
351
+ pass # Ignore errors during deletion
352
+
353
+
354
+ def get_logs_dir() -> Path:
355
+ """Get the directory for flow logs"""
356
+ base_dir = Path(get_temp_dir())
357
+ logs_dir = base_dir / "flowfile_logs"
358
+ logs_dir.mkdir(exist_ok=True, parents=True)
359
+ return logs_dir
360
+
361
+
362
+ def get_flow_log_file(flow_id: int) -> Path:
363
+ """Get the path to the log file for a specific flow"""
364
+ return get_logs_dir() / f"flow_{flow_id}.log"
365
+
366
+
367
+ def cleanup_old_logs(max_age_days: int = 7):
368
+ """Delete log files older than specified days"""
369
+ logs_dir = get_logs_dir()
370
+ now = datetime.now().timestamp()
371
+ deleted_count = 0
372
+
373
+ for log_file in logs_dir.glob("flow_*.log"):
374
+ try:
375
+ if (now - log_file.stat().st_mtime) > (max_age_days * 24 * 60 * 60):
376
+ log_file.unlink()
377
+ deleted_count += 1
378
+ except Exception as e:
379
+ main_logger.error(f"Failed to delete old log file {log_file}: {e}")
380
+
381
+ if deleted_count > 0:
382
+ main_logger.info(f"Deleted {deleted_count} old log files")
383
+
384
+
385
+ def clear_all_flow_logs():
386
+ """Delete all flow log files"""
387
+ logs_dir = get_logs_dir()
388
+ deleted_count = 0
389
+
390
+ try:
391
+ # First close all handlers
392
+ with FlowLogger._instances_lock:
393
+ for flow_id, instance in FlowLogger._instances.items():
394
+ try:
395
+ if instance._logger:
396
+ for handler in instance._logger.handlers[:]:
397
+ if isinstance(handler, logging.FileHandler):
398
+ handler.close()
399
+ instance._logger.removeHandler(handler)
400
+ except Exception as e:
401
+ main_logger.error(f"Error closing handlers for flow {flow_id}: {e}")
402
+
403
+ # Now delete all log files
404
+ for log_file in logs_dir.glob("*.log"):
405
+ try:
406
+ os.remove(log_file)
407
+ deleted_count += 1
408
+ except Exception as e:
409
+ main_logger.error(f"Error removing log file {log_file}: {e}")
410
+
411
+ main_logger.info(f"Successfully deleted {deleted_count} flow log files")
412
+ except Exception as e:
413
+ main_logger.error(f"Failed to delete flow log files: {e}")
414
+
415
+
416
+ def read_log_from_line(log_file_path: Path, start_line: int = 0):
417
+ """Read log file content starting from a specific line"""
418
+ lines = []
419
+ try:
420
+ with open(log_file_path, "r") as file:
421
+ # Skip lines efficiently if needed
422
+ if start_line > 0:
423
+ for _ in range(start_line):
424
+ next(file, None)
425
+
426
+ # Read remaining lines
427
+ lines = file.readlines()
428
+ except FileNotFoundError:
429
+ main_logger.error(f"Log file not found: {log_file_path}")
430
+ except Exception as e:
431
+ main_logger.error(f"Error reading log file {log_file_path}: {e}")
432
+
433
+ return lines
File without changes