Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Secure storage module for FlowFile credentials and secrets.
|
|
3
|
+
"""
|
|
4
|
+
from cryptography.fernet import Fernet
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SecureStorage:
|
|
14
|
+
"""A secure local storage mechanism for secrets using Fernet encryption."""
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
env = os.environ.get("FLOWFILE_MODE")
|
|
18
|
+
logger.debug(f'Using secure storage in {env} mode')
|
|
19
|
+
if os.environ.get("FLOWFILE_MODE") == "electron":
|
|
20
|
+
app_data = os.environ.get("APPDATA") or os.path.expanduser("~/.config")
|
|
21
|
+
self.storage_path = Path(app_data) / "flowfile"
|
|
22
|
+
else:
|
|
23
|
+
self.storage_path = Path(os.environ.get("SECURE_STORAGE_PATH", "/tmp/.flowfile"))
|
|
24
|
+
self.storage_path.mkdir(parents=True, exist_ok=True)
|
|
25
|
+
logger.debug(f"Using SECURE_STORAGE_PATH: {self.storage_path}")
|
|
26
|
+
try:
|
|
27
|
+
os.chmod(self.storage_path, 0o700)
|
|
28
|
+
except Exception as e:
|
|
29
|
+
logger.debug(f"Could not set permissions on storage directory: {e}")
|
|
30
|
+
|
|
31
|
+
self.key_path = self.storage_path / ".secret_key"
|
|
32
|
+
if not self.key_path.exists():
|
|
33
|
+
with open(self.key_path, "wb") as f:
|
|
34
|
+
f.write(Fernet.generate_key())
|
|
35
|
+
try:
|
|
36
|
+
os.chmod(self.key_path, 0o600)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
logger.debug(f"Could not set permissions on key file: {e}")
|
|
39
|
+
|
|
40
|
+
def _get_store_path(self, service_name):
|
|
41
|
+
"""Get the path to the encrypted store file for a service."""
|
|
42
|
+
return self.storage_path / f"{service_name}.json.enc"
|
|
43
|
+
|
|
44
|
+
def _read_store(self, service_name):
|
|
45
|
+
"""Read and decrypt the store file for a service."""
|
|
46
|
+
path = self._get_store_path(service_name)
|
|
47
|
+
if not path.exists():
|
|
48
|
+
return {}
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
with open(self.key_path, "rb") as f:
|
|
52
|
+
key = f.read()
|
|
53
|
+
with open(path, "rb") as f:
|
|
54
|
+
data = f.read()
|
|
55
|
+
|
|
56
|
+
return json.loads(Fernet(key).decrypt(data).decode())
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logger.debug(f"Error reading from encrypted store: {e}")
|
|
59
|
+
return {}
|
|
60
|
+
|
|
61
|
+
def _write_store(self, service_name, data):
|
|
62
|
+
"""Encrypt and write data to the store file for a service."""
|
|
63
|
+
try:
|
|
64
|
+
with open(self.key_path, "rb") as f:
|
|
65
|
+
key = f.read()
|
|
66
|
+
|
|
67
|
+
encrypted = Fernet(key).encrypt(json.dumps(data).encode())
|
|
68
|
+
path = self._get_store_path(service_name)
|
|
69
|
+
|
|
70
|
+
with open(path, "wb") as f:
|
|
71
|
+
f.write(encrypted)
|
|
72
|
+
try:
|
|
73
|
+
os.chmod(path, 0o600)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.debug(f"Could not set permissions on store file: {e}")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.error(f"Failed to write to secure store: {e}")
|
|
78
|
+
|
|
79
|
+
def get_password(self, service_name, username):
|
|
80
|
+
"""Retrieve a password from secure storage."""
|
|
81
|
+
store = self._read_store(service_name)
|
|
82
|
+
return store.get(username)
|
|
83
|
+
|
|
84
|
+
def set_password(self, service_name, username, password):
|
|
85
|
+
"""Store a password in secure storage."""
|
|
86
|
+
store = self._read_store(service_name)
|
|
87
|
+
store[username] = password
|
|
88
|
+
self._write_store(service_name, store)
|
|
89
|
+
|
|
90
|
+
def delete_password(self, service_name, username):
|
|
91
|
+
"""Delete a password from secure storage."""
|
|
92
|
+
store = self._read_store(service_name)
|
|
93
|
+
if username in store:
|
|
94
|
+
del store[username]
|
|
95
|
+
self._write_store(service_name, store)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
_storage = SecureStorage()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_password(service_name, username):
|
|
102
|
+
"""
|
|
103
|
+
Retrieve a password from secure storage.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
service_name: The name of the service
|
|
107
|
+
username: The username or key
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
The stored password or None if not found
|
|
111
|
+
"""
|
|
112
|
+
return _storage.get_password(service_name, username)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def set_password(service_name, username, password):
|
|
116
|
+
"""
|
|
117
|
+
Store a password in secure storage.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
service_name: The name of the service
|
|
121
|
+
username: The username or key
|
|
122
|
+
password: The password or secret to store
|
|
123
|
+
"""
|
|
124
|
+
_storage.set_password(service_name, username, password)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def delete_password(service_name, username):
|
|
128
|
+
"""
|
|
129
|
+
Delete a password from secure storage.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
service_name: The name of the service
|
|
133
|
+
username: The username or key to delete
|
|
134
|
+
"""
|
|
135
|
+
_storage.delete_password(service_name, username)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_docker_secret_key():
|
|
139
|
+
"""
|
|
140
|
+
Get the master key from Docker secret.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
str: The master key if successfully read from Docker secret.
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
RuntimeError: If running in Docker but unable to access the secret.
|
|
147
|
+
"""
|
|
148
|
+
secret_path = "/run/secrets/flowfile_master_key"
|
|
149
|
+
if os.path.exists(secret_path):
|
|
150
|
+
try:
|
|
151
|
+
with open(secret_path, "r") as f:
|
|
152
|
+
return f.read().strip()
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Failed to read master key from Docker secret: {e}")
|
|
155
|
+
raise RuntimeError("Failed to read master key from Docker secret")
|
|
156
|
+
else:
|
|
157
|
+
logger.critical("Running in Docker but flowfile_master_key secret is not mounted!")
|
|
158
|
+
raise RuntimeError("Docker secret 'flowfile_master_key' is not mounted")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_master_key():
|
|
162
|
+
"""
|
|
163
|
+
Get or generate the master encryption key.
|
|
164
|
+
|
|
165
|
+
If running in Docker, retrieves the key from Docker secrets.
|
|
166
|
+
Otherwise, retrieves or generates a key using the secure storage.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
str: The master encryption key
|
|
170
|
+
"""
|
|
171
|
+
if os.environ.get("RUNNING_IN_DOCKER") == "true":
|
|
172
|
+
return get_docker_secret_key()
|
|
173
|
+
|
|
174
|
+
key = get_password("flowfile", "master_key")
|
|
175
|
+
if not key:
|
|
176
|
+
key = Fernet.generate_key().decode()
|
|
177
|
+
set_password("flowfile", "master_key", key)
|
|
178
|
+
return key
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# flowfile_core/flowfile_core/configs/__init__.py
|
|
2
|
+
import logging
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
os.environ["FLOWFILE_MODE"] = "electron"
|
|
8
|
+
|
|
9
|
+
# Create and configure the logger
|
|
10
|
+
logger = logging.getLogger('PipelineHandler')
|
|
11
|
+
logger.setLevel(logging.INFO)
|
|
12
|
+
logger.propagate = False
|
|
13
|
+
|
|
14
|
+
# Create console handler with a specific format
|
|
15
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
16
|
+
console_handler.setLevel(logging.INFO)
|
|
17
|
+
|
|
18
|
+
# Create formatter
|
|
19
|
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
20
|
+
console_handler.setFormatter(formatter)
|
|
21
|
+
|
|
22
|
+
if logger.hasHandlers():
|
|
23
|
+
logger.handlers.clear()
|
|
24
|
+
logger.addHandler(console_handler)
|
|
25
|
+
|
|
26
|
+
# Create logs directory in temp at startup
|
|
27
|
+
try:
|
|
28
|
+
from tempfile import gettempdir
|
|
29
|
+
log_dir = Path(gettempdir()) / "flowfile_logs"
|
|
30
|
+
log_dir.mkdir(exist_ok=True)
|
|
31
|
+
except Exception as e:
|
|
32
|
+
logger.warning(f"Failed to create logs directory: {e}")
|
|
33
|
+
|
|
34
|
+
# Initialize vault
|
|
35
|
+
logger.info("Logging system initialized")
|
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from flowfile_core.configs.settings import get_temp_dir
|
|
5
|
+
import os
|
|
6
|
+
import logging.handlers
|
|
7
|
+
import queue
|
|
8
|
+
import threading
|
|
9
|
+
|
|
10
|
+
_process_safe_queue = queue.Queue(-1)
|
|
11
|
+
main_logger = logging.getLogger('PipelineHandler')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NodeLogger:
|
|
15
|
+
"""Logger for individual flow nodes"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, node_id: str | int, flow_id: int, flow_logger_parent: "FlowLogger"):
|
|
18
|
+
self.flow_id = flow_id
|
|
19
|
+
self.node_id = node_id
|
|
20
|
+
self.flow_logger_parent = flow_logger_parent
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def logger(self) -> logging.Logger:
|
|
24
|
+
return self.flow_logger_parent.logger
|
|
25
|
+
|
|
26
|
+
def info(self, msg: str):
|
|
27
|
+
self.logger.info(f"Node ID: {self.node_id} - {msg}")
|
|
28
|
+
|
|
29
|
+
def error(self, msg: str):
|
|
30
|
+
self.logger.error(f"Node ID: {self.node_id} - {msg}")
|
|
31
|
+
|
|
32
|
+
def warning(self, msg: str):
|
|
33
|
+
self.logger.warning(f"Node ID: {self.node_id} - {msg}")
|
|
34
|
+
|
|
35
|
+
def debug(self, msg: str):
|
|
36
|
+
self.logger.debug(f"Node ID: {self.node_id} - {msg}")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class FlowLogger:
|
|
40
|
+
"""Thread-safe logger for flow execution"""
|
|
41
|
+
_instances = {}
|
|
42
|
+
_instances_lock = threading.RLock()
|
|
43
|
+
_queue_listener = None
|
|
44
|
+
_queue_listener_lock = threading.Lock()
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def handle_extra_log_info(flow_id: int, extra: dict = None) -> dict:
|
|
48
|
+
if extra is None:
|
|
49
|
+
extra = {}
|
|
50
|
+
extra['flow_id'] = flow_id
|
|
51
|
+
return extra
|
|
52
|
+
|
|
53
|
+
def __new__(cls, flow_id: int, clear_existing_logs: bool = False):
|
|
54
|
+
with cls._instances_lock:
|
|
55
|
+
if flow_id not in cls._instances:
|
|
56
|
+
instance = super().__new__(cls)
|
|
57
|
+
instance._initialize(flow_id, clear_existing_logs)
|
|
58
|
+
cls._instances[flow_id] = instance
|
|
59
|
+
else:
|
|
60
|
+
instance = cls._instances[flow_id]
|
|
61
|
+
if clear_existing_logs:
|
|
62
|
+
instance.clear_log_file()
|
|
63
|
+
return instance
|
|
64
|
+
|
|
65
|
+
def _initialize(self, flow_id: int, clear_existing_logs: bool):
|
|
66
|
+
self.flow_id = flow_id
|
|
67
|
+
self._logger = None
|
|
68
|
+
self.log_file_path = get_flow_log_file(self.flow_id)
|
|
69
|
+
self._file_lock = threading.RLock()
|
|
70
|
+
self._setup_new_logger()
|
|
71
|
+
|
|
72
|
+
with self._queue_listener_lock:
|
|
73
|
+
if not FlowLogger._queue_listener:
|
|
74
|
+
FlowLogger._start_queue_listener()
|
|
75
|
+
|
|
76
|
+
def _setup_new_logger(self):
|
|
77
|
+
"""Creates a new logger instance with appropriate handlers"""
|
|
78
|
+
logger_name = f'FlowExecution.{self.flow_id}'
|
|
79
|
+
self._logger = logging.getLogger(logger_name)
|
|
80
|
+
self._logger.setLevel(logging.INFO)
|
|
81
|
+
self.setup_logging()
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def logger(self):
|
|
85
|
+
"""Get the logger instance"""
|
|
86
|
+
if self._logger is None:
|
|
87
|
+
self._setup_new_logger()
|
|
88
|
+
return self._logger
|
|
89
|
+
|
|
90
|
+
def cleanup_self(self):
|
|
91
|
+
"""Clean up just this logger instance (not global)"""
|
|
92
|
+
# Try with non-blocking lock first
|
|
93
|
+
if self._file_lock.acquire(blocking=False):
|
|
94
|
+
try:
|
|
95
|
+
self._cleanup_handlers()
|
|
96
|
+
finally:
|
|
97
|
+
self._file_lock.release()
|
|
98
|
+
else:
|
|
99
|
+
# If we can't get lock, proceed anyway
|
|
100
|
+
main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with cleanup anyway")
|
|
101
|
+
self._cleanup_handlers()
|
|
102
|
+
|
|
103
|
+
def _cleanup_handlers(self):
|
|
104
|
+
"""Close and remove all handlers"""
|
|
105
|
+
if self._logger:
|
|
106
|
+
for handler in self._logger.handlers[:]:
|
|
107
|
+
try:
|
|
108
|
+
handler.close()
|
|
109
|
+
self._logger.removeHandler(handler)
|
|
110
|
+
except Exception as e:
|
|
111
|
+
main_logger.error(f"Error closing handler: {e}")
|
|
112
|
+
|
|
113
|
+
def recreate_self(self):
|
|
114
|
+
"""Recreate this logger instance after cleanup"""
|
|
115
|
+
# Try with non-blocking lock first
|
|
116
|
+
if self._file_lock.acquire(blocking=False):
|
|
117
|
+
try:
|
|
118
|
+
self._recreate_impl()
|
|
119
|
+
finally:
|
|
120
|
+
self._file_lock.release()
|
|
121
|
+
else:
|
|
122
|
+
# If we can't get lock, proceed anyway
|
|
123
|
+
main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with recreation anyway")
|
|
124
|
+
self._recreate_impl()
|
|
125
|
+
|
|
126
|
+
def _recreate_impl(self):
|
|
127
|
+
"""Implementation of recreate operation"""
|
|
128
|
+
# Make sure the log directory exists
|
|
129
|
+
log_dir = Path(self.log_file_path).parent
|
|
130
|
+
log_dir.mkdir(exist_ok=True, parents=True)
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
# Create an empty file
|
|
134
|
+
with open(self.log_file_path, 'w') as f:
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
# Re-setup the logger
|
|
138
|
+
self._setup_new_logger()
|
|
139
|
+
main_logger.info(f"Log file was recreated for flow {self.flow_id}")
|
|
140
|
+
except Exception as e:
|
|
141
|
+
main_logger.error(f"Error recreating log file for flow {self.flow_id}: {e}")
|
|
142
|
+
|
|
143
|
+
def refresh_logger_if_needed(self):
|
|
144
|
+
"""Check if log file exists and refresh logger if needed"""
|
|
145
|
+
if not os.path.exists(self.log_file_path):
|
|
146
|
+
main_logger.info(f"Log file missing, recreating: {self.log_file_path}")
|
|
147
|
+
self.cleanup_self()
|
|
148
|
+
self.recreate_self()
|
|
149
|
+
return True
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def _start_queue_listener(cls):
|
|
154
|
+
"""Start the queue listener for asynchronous logging"""
|
|
155
|
+
queue_handler = logging.handlers.QueueHandler(_process_safe_queue)
|
|
156
|
+
cls._queue_listener = logging.handlers.QueueListener(
|
|
157
|
+
_process_safe_queue,
|
|
158
|
+
queue_handler,
|
|
159
|
+
respect_handler_level=True
|
|
160
|
+
)
|
|
161
|
+
cls._queue_listener.start()
|
|
162
|
+
|
|
163
|
+
def setup_logging(self):
|
|
164
|
+
"""Set up file handlers for logging"""
|
|
165
|
+
if self._file_lock.acquire(blocking=False):
|
|
166
|
+
try:
|
|
167
|
+
self._setup_logging_impl()
|
|
168
|
+
finally:
|
|
169
|
+
self._file_lock.release()
|
|
170
|
+
else:
|
|
171
|
+
# Try with timeout
|
|
172
|
+
if self._file_lock.acquire(timeout=1):
|
|
173
|
+
try:
|
|
174
|
+
self._setup_logging_impl()
|
|
175
|
+
finally:
|
|
176
|
+
self._file_lock.release()
|
|
177
|
+
else:
|
|
178
|
+
# If still can't get lock, proceed anyway
|
|
179
|
+
main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with setup anyway")
|
|
180
|
+
self._setup_logging_impl()
|
|
181
|
+
|
|
182
|
+
def _setup_logging_impl(self):
|
|
183
|
+
"""Implementation of setup_logging without lock handling"""
|
|
184
|
+
# Remove existing handlers
|
|
185
|
+
for handler in self._logger.handlers[:]:
|
|
186
|
+
if isinstance(handler, logging.FileHandler):
|
|
187
|
+
handler.close()
|
|
188
|
+
self._logger.removeHandler(handler)
|
|
189
|
+
|
|
190
|
+
# Make sure the log directory exists
|
|
191
|
+
log_dir = Path(self.log_file_path).parent
|
|
192
|
+
log_dir.mkdir(exist_ok=True, parents=True)
|
|
193
|
+
|
|
194
|
+
# Add file handler
|
|
195
|
+
file_handler = logging.FileHandler(self.log_file_path)
|
|
196
|
+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
|
197
|
+
file_handler.setFormatter(formatter)
|
|
198
|
+
self._logger.addHandler(file_handler)
|
|
199
|
+
|
|
200
|
+
def clear_log_file(self):
|
|
201
|
+
"""Clear the log file for this flow"""
|
|
202
|
+
if self._file_lock.acquire(blocking=False):
|
|
203
|
+
try:
|
|
204
|
+
self._clear_log_impl()
|
|
205
|
+
finally:
|
|
206
|
+
self._file_lock.release()
|
|
207
|
+
else:
|
|
208
|
+
# If can't get lock, try with timeout
|
|
209
|
+
if self._file_lock.acquire(timeout=1):
|
|
210
|
+
try:
|
|
211
|
+
self._clear_log_impl()
|
|
212
|
+
finally:
|
|
213
|
+
self._file_lock.release()
|
|
214
|
+
else:
|
|
215
|
+
# If still can't get lock, proceed anyway
|
|
216
|
+
main_logger.warning(
|
|
217
|
+
f"Could not acquire lock for flow {self.flow_id}, proceeding with file clearing anyway")
|
|
218
|
+
self._clear_log_impl()
|
|
219
|
+
|
|
220
|
+
def _clear_log_impl(self):
|
|
221
|
+
"""Implementation of clear_log_file without lock handling"""
|
|
222
|
+
try:
|
|
223
|
+
# Ensure parent directory exists
|
|
224
|
+
self.refresh_logger_if_needed()
|
|
225
|
+
# Truncate file
|
|
226
|
+
with open(self.log_file_path, 'w') as f:
|
|
227
|
+
pass
|
|
228
|
+
main_logger.info(f"Log file cleared for flow {self.flow_id}")
|
|
229
|
+
except Exception as e:
|
|
230
|
+
main_logger.error(f"Error clearing log file {self.log_file_path}: {e}")
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def cleanup_instance(cls, flow_id: int):
|
|
234
|
+
"""Clean up a specific flow logger instance"""
|
|
235
|
+
with cls._instances_lock:
|
|
236
|
+
if flow_id in cls._instances:
|
|
237
|
+
instance = cls._instances[flow_id]
|
|
238
|
+
instance.cleanup_logging()
|
|
239
|
+
del cls._instances[flow_id]
|
|
240
|
+
|
|
241
|
+
def cleanup_logging(self):
|
|
242
|
+
"""Clean up logging for this flow"""
|
|
243
|
+
if self._file_lock.acquire(blocking=False):
|
|
244
|
+
try:
|
|
245
|
+
self._cleanup_handlers()
|
|
246
|
+
finally:
|
|
247
|
+
self._file_lock.release()
|
|
248
|
+
else:
|
|
249
|
+
# If can't get lock, proceed anyway
|
|
250
|
+
main_logger.warning(f"Could not acquire lock for flow {self.flow_id}, proceeding with cleanup anyway")
|
|
251
|
+
self._cleanup_handlers()
|
|
252
|
+
|
|
253
|
+
@classmethod
|
|
254
|
+
def get_instance(cls, flow_id: int):
|
|
255
|
+
"""Get an existing flow logger instance without creating a new one"""
|
|
256
|
+
with cls._instances_lock:
|
|
257
|
+
return cls._instances.get(flow_id)
|
|
258
|
+
|
|
259
|
+
def get_node_logger(self, node_id: str | int) -> NodeLogger:
|
|
260
|
+
"""Get a logger for a specific node in this flow"""
|
|
261
|
+
return NodeLogger(node_id, flow_id=self.flow_id, flow_logger_parent=self)
|
|
262
|
+
|
|
263
|
+
# Logging methods with automatic refresh
|
|
264
|
+
def info(self, msg: str, extra: dict = None, node_id: str | int = -1):
|
|
265
|
+
self.refresh_logger_if_needed()
|
|
266
|
+
if node_id != -1:
|
|
267
|
+
msg = f"Node ID: {node_id} - {msg}"
|
|
268
|
+
extra = self.handle_extra_log_info(self.flow_id, extra)
|
|
269
|
+
self.logger.info(msg, extra=extra)
|
|
270
|
+
|
|
271
|
+
def error(self, msg: str, extra: dict = None, node_id: str | int = -1):
|
|
272
|
+
self.refresh_logger_if_needed()
|
|
273
|
+
if node_id != -1:
|
|
274
|
+
msg = f"Node ID: {node_id} - {msg}"
|
|
275
|
+
extra = self.handle_extra_log_info(self.flow_id, extra)
|
|
276
|
+
self.logger.error(msg, extra=extra)
|
|
277
|
+
|
|
278
|
+
def warning(self, msg: str, extra: dict = None, node_id: str | int = -1):
|
|
279
|
+
self.refresh_logger_if_needed()
|
|
280
|
+
if node_id != -1:
|
|
281
|
+
msg = f"Node ID: {node_id} - {msg}"
|
|
282
|
+
extra = self.handle_extra_log_info(self.flow_id, extra)
|
|
283
|
+
self.logger.warning(msg, extra=extra)
|
|
284
|
+
|
|
285
|
+
def debug(self, msg: str, extra: dict = None, node_id: str | int = -1):
|
|
286
|
+
self.refresh_logger_if_needed()
|
|
287
|
+
if node_id != -1:
|
|
288
|
+
msg = f"Node ID: {node_id} - {msg}"
|
|
289
|
+
extra = self.handle_extra_log_info(self.flow_id, extra)
|
|
290
|
+
self.logger.debug(msg, extra=extra)
|
|
291
|
+
|
|
292
|
+
def get_log_filepath(self):
|
|
293
|
+
"""Get the path to the log file for this flow"""
|
|
294
|
+
return str(self.log_file_path)
|
|
295
|
+
|
|
296
|
+
def read_from_line(self, start_line: int = 0):
|
|
297
|
+
"""Read log content starting from a specific line"""
|
|
298
|
+
# Refresh logger if needed before reading
|
|
299
|
+
self.refresh_logger_if_needed()
|
|
300
|
+
|
|
301
|
+
if self._file_lock.acquire(blocking=False):
|
|
302
|
+
try:
|
|
303
|
+
return read_log_from_line(self.log_file_path, start_line)
|
|
304
|
+
finally:
|
|
305
|
+
self._file_lock.release()
|
|
306
|
+
else:
|
|
307
|
+
# Reading is safe without lock
|
|
308
|
+
return read_log_from_line(self.log_file_path, start_line)
|
|
309
|
+
|
|
310
|
+
@classmethod
|
|
311
|
+
def refresh_all_loggers(cls):
|
|
312
|
+
"""Refresh all loggers that need it"""
|
|
313
|
+
with cls._instances_lock:
|
|
314
|
+
for flow_id, instance in cls._instances.items():
|
|
315
|
+
try:
|
|
316
|
+
instance.refresh_logger_if_needed()
|
|
317
|
+
except Exception as e:
|
|
318
|
+
main_logger.error(f"Error refreshing logger for flow {flow_id}: {e}")
|
|
319
|
+
|
|
320
|
+
@classmethod
|
|
321
|
+
def global_cleanup(cls):
|
|
322
|
+
"""Cleanup all loggers, handlers and queue listener."""
|
|
323
|
+
with cls._instances_lock:
|
|
324
|
+
# Get a copy of keys to avoid modification during iteration
|
|
325
|
+
flow_ids = list(cls._instances.keys())
|
|
326
|
+
|
|
327
|
+
# Cleanup all instances
|
|
328
|
+
for flow_id in flow_ids:
|
|
329
|
+
try:
|
|
330
|
+
cls.cleanup_instance(flow_id)
|
|
331
|
+
except Exception as e:
|
|
332
|
+
main_logger.error(f"Error cleaning up instance for flow {flow_id}: {e}")
|
|
333
|
+
|
|
334
|
+
# Stop queue listener
|
|
335
|
+
with cls._queue_listener_lock:
|
|
336
|
+
if cls._queue_listener:
|
|
337
|
+
try:
|
|
338
|
+
cls._queue_listener.stop()
|
|
339
|
+
cls._queue_listener = None
|
|
340
|
+
except Exception as e:
|
|
341
|
+
main_logger.error(f"Error stopping queue listener: {e}")
|
|
342
|
+
|
|
343
|
+
# Clear instances
|
|
344
|
+
cls._instances.clear()
|
|
345
|
+
|
|
346
|
+
def __del__(self):
|
|
347
|
+
"""Cleanup instance on deletion."""
|
|
348
|
+
try:
|
|
349
|
+
self.cleanup_instance(self.flow_id)
|
|
350
|
+
except:
|
|
351
|
+
pass # Ignore errors during deletion
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def get_logs_dir() -> Path:
|
|
355
|
+
"""Get the directory for flow logs"""
|
|
356
|
+
base_dir = Path(get_temp_dir())
|
|
357
|
+
logs_dir = base_dir / "flowfile_logs"
|
|
358
|
+
logs_dir.mkdir(exist_ok=True, parents=True)
|
|
359
|
+
return logs_dir
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def get_flow_log_file(flow_id: int) -> Path:
|
|
363
|
+
"""Get the path to the log file for a specific flow"""
|
|
364
|
+
return get_logs_dir() / f"flow_{flow_id}.log"
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def cleanup_old_logs(max_age_days: int = 7):
|
|
368
|
+
"""Delete log files older than specified days"""
|
|
369
|
+
logs_dir = get_logs_dir()
|
|
370
|
+
now = datetime.now().timestamp()
|
|
371
|
+
deleted_count = 0
|
|
372
|
+
|
|
373
|
+
for log_file in logs_dir.glob("flow_*.log"):
|
|
374
|
+
try:
|
|
375
|
+
if (now - log_file.stat().st_mtime) > (max_age_days * 24 * 60 * 60):
|
|
376
|
+
log_file.unlink()
|
|
377
|
+
deleted_count += 1
|
|
378
|
+
except Exception as e:
|
|
379
|
+
main_logger.error(f"Failed to delete old log file {log_file}: {e}")
|
|
380
|
+
|
|
381
|
+
if deleted_count > 0:
|
|
382
|
+
main_logger.info(f"Deleted {deleted_count} old log files")
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def clear_all_flow_logs():
|
|
386
|
+
"""Delete all flow log files"""
|
|
387
|
+
logs_dir = get_logs_dir()
|
|
388
|
+
deleted_count = 0
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
# First close all handlers
|
|
392
|
+
with FlowLogger._instances_lock:
|
|
393
|
+
for flow_id, instance in FlowLogger._instances.items():
|
|
394
|
+
try:
|
|
395
|
+
if instance._logger:
|
|
396
|
+
for handler in instance._logger.handlers[:]:
|
|
397
|
+
if isinstance(handler, logging.FileHandler):
|
|
398
|
+
handler.close()
|
|
399
|
+
instance._logger.removeHandler(handler)
|
|
400
|
+
except Exception as e:
|
|
401
|
+
main_logger.error(f"Error closing handlers for flow {flow_id}: {e}")
|
|
402
|
+
|
|
403
|
+
# Now delete all log files
|
|
404
|
+
for log_file in logs_dir.glob("*.log"):
|
|
405
|
+
try:
|
|
406
|
+
os.remove(log_file)
|
|
407
|
+
deleted_count += 1
|
|
408
|
+
except Exception as e:
|
|
409
|
+
main_logger.error(f"Error removing log file {log_file}: {e}")
|
|
410
|
+
|
|
411
|
+
main_logger.info(f"Successfully deleted {deleted_count} flow log files")
|
|
412
|
+
except Exception as e:
|
|
413
|
+
main_logger.error(f"Failed to delete flow log files: {e}")
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def read_log_from_line(log_file_path: Path, start_line: int = 0):
|
|
417
|
+
"""Read log file content starting from a specific line"""
|
|
418
|
+
lines = []
|
|
419
|
+
try:
|
|
420
|
+
with open(log_file_path, "r") as file:
|
|
421
|
+
# Skip lines efficiently if needed
|
|
422
|
+
if start_line > 0:
|
|
423
|
+
for _ in range(start_line):
|
|
424
|
+
next(file, None)
|
|
425
|
+
|
|
426
|
+
# Read remaining lines
|
|
427
|
+
lines = file.readlines()
|
|
428
|
+
except FileNotFoundError:
|
|
429
|
+
main_logger.error(f"Log file not found: {log_file_path}")
|
|
430
|
+
except Exception as e:
|
|
431
|
+
main_logger.error(f"Error reading log file {log_file_path}: {e}")
|
|
432
|
+
|
|
433
|
+
return lines
|
|
File without changes
|