flowcept 0.9.17__py3-none-any.whl → 0.9.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/agents/agent_client.py +10 -4
- flowcept/agents/agents_utils.py +54 -19
- flowcept/agents/flowcept_agent.py +116 -12
- flowcept/agents/flowcept_ctx_manager.py +116 -46
- flowcept/agents/gui/gui_utils.py +21 -3
- flowcept/agents/prompts/general_prompts.py +1 -1
- flowcept/agents/prompts/in_memory_query_prompts.py +158 -45
- flowcept/agents/tools/general_tools.py +20 -3
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +14 -31
- flowcept/commons/daos/docdb_dao/lmdb_dao.py +48 -0
- flowcept/commons/daos/keyvalue_dao.py +12 -3
- flowcept/commons/daos/mq_dao/mq_dao_base.py +37 -20
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +2 -2
- flowcept/commons/daos/mq_dao/mq_dao_redis.py +33 -2
- flowcept/commons/flowcept_dataclasses/task_object.py +4 -1
- flowcept/configs.py +17 -3
- flowcept/flowcept_api/flowcept_controller.py +5 -1
- flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +33 -2
- flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +18 -4
- flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +1 -0
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +9 -10
- flowcept/flowceptor/consumers/base_consumer.py +22 -4
- flowcept/flowceptor/consumers/document_inserter.py +22 -1
- flowcept/instrumentation/flowcept_task.py +147 -51
- flowcept/instrumentation/task_capture.py +10 -1
- flowcept/version.py +1 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/METADATA +8 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/RECORD +32 -32
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/WHEEL +1 -1
- resources/sample_settings.yaml +2 -1
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.17.dist-info → flowcept-0.9.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -7,6 +7,7 @@ import msgpack
|
|
|
7
7
|
from time import time
|
|
8
8
|
import flowcept.commons
|
|
9
9
|
from flowcept.commons.autoflush_buffer import AutoflushBuffer
|
|
10
|
+
from flowcept.commons.daos.keyvalue_dao import KeyValueDAO
|
|
10
11
|
from flowcept.commons.utils import chunked
|
|
11
12
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
12
13
|
from flowcept.configs import (
|
|
@@ -29,6 +30,8 @@ class MQDao(object):
|
|
|
29
30
|
|
|
30
31
|
ENCODER = GenericJSONEncoder if JSON_SERIALIZER == "complex" else None
|
|
31
32
|
# TODO we don't have a unit test to cover complex dict!
|
|
33
|
+
MQ_THREAD_SET_ID = "started_mq_thread_execution"
|
|
34
|
+
MQ_FLUSH_COMPLETE_SET_ID = "pending_mq_flush_complete"
|
|
32
35
|
|
|
33
36
|
@staticmethod
|
|
34
37
|
def build(*args, **kwargs) -> "MQDao":
|
|
@@ -51,20 +54,6 @@ class MQDao(object):
|
|
|
51
54
|
else:
|
|
52
55
|
raise NotImplementedError
|
|
53
56
|
|
|
54
|
-
@staticmethod
|
|
55
|
-
def _get_set_name(exec_bundle_id=None):
|
|
56
|
-
"""Get the set name.
|
|
57
|
-
|
|
58
|
-
:param exec_bundle_id: A way to group one or many interceptors, and
|
|
59
|
-
treat each group as a bundle to control when their time_based
|
|
60
|
-
threads started and ended.
|
|
61
|
-
:return:
|
|
62
|
-
"""
|
|
63
|
-
set_id = "started_mq_thread_execution"
|
|
64
|
-
if exec_bundle_id is not None:
|
|
65
|
-
set_id += "_" + str(exec_bundle_id)
|
|
66
|
-
return set_id
|
|
67
|
-
|
|
68
57
|
def __init__(self, adapter_settings=None):
|
|
69
58
|
self.logger = FlowceptLogger()
|
|
70
59
|
self.started = False
|
|
@@ -103,22 +92,36 @@ class MQDao(object):
|
|
|
103
92
|
|
|
104
93
|
def register_time_based_thread_init(self, interceptor_instance_id: str, exec_bundle_id=None):
|
|
105
94
|
"""Register the time."""
|
|
106
|
-
set_name = MQDao.
|
|
95
|
+
set_name = KeyValueDAO.get_set_name(MQDao.MQ_THREAD_SET_ID, exec_bundle_id)
|
|
107
96
|
# self.logger.info(
|
|
108
97
|
# f"Register start of time_based MQ flush thread {set_name}.{interceptor_instance_id}"
|
|
109
98
|
# )
|
|
110
99
|
self._keyvalue_dao.add_key_into_set(set_name, interceptor_instance_id)
|
|
100
|
+
flush_set_name = KeyValueDAO.get_set_name(MQDao.MQ_FLUSH_COMPLETE_SET_ID, exec_bundle_id)
|
|
101
|
+
self._keyvalue_dao.add_key_into_set(flush_set_name, interceptor_instance_id)
|
|
111
102
|
|
|
112
103
|
def register_time_based_thread_end(self, interceptor_instance_id: str, exec_bundle_id=None):
|
|
113
104
|
"""Register time."""
|
|
114
|
-
set_name = MQDao.
|
|
105
|
+
set_name = KeyValueDAO.get_set_name(MQDao.MQ_THREAD_SET_ID, exec_bundle_id)
|
|
115
106
|
self.logger.info(f"Registering end of time_based MQ flush thread {set_name}.{interceptor_instance_id}")
|
|
116
107
|
self._keyvalue_dao.remove_key_from_set(set_name, interceptor_instance_id)
|
|
117
108
|
self.logger.info(f"Done registering time_based MQ flush thread {set_name}.{interceptor_instance_id}")
|
|
118
109
|
|
|
119
110
|
def all_time_based_threads_ended(self, exec_bundle_id=None):
|
|
120
111
|
"""Get all time."""
|
|
121
|
-
set_name = MQDao.
|
|
112
|
+
set_name = KeyValueDAO.get_set_name(MQDao.MQ_THREAD_SET_ID, exec_bundle_id)
|
|
113
|
+
return self._keyvalue_dao.set_is_empty(set_name)
|
|
114
|
+
|
|
115
|
+
def register_flush_complete(self, interceptor_instance_id: str, exec_bundle_id=None):
|
|
116
|
+
"""Register a flush-complete signal for an interceptor."""
|
|
117
|
+
set_name = KeyValueDAO.get_set_name(MQDao.MQ_FLUSH_COMPLETE_SET_ID, exec_bundle_id)
|
|
118
|
+
self.logger.info(f"Registering flush completion {set_name}.{interceptor_instance_id}")
|
|
119
|
+
self._keyvalue_dao.remove_key_from_set(set_name, interceptor_instance_id)
|
|
120
|
+
self.logger.info(f"Done registering flush completion {set_name}.{interceptor_instance_id}")
|
|
121
|
+
|
|
122
|
+
def all_flush_complete_received(self, exec_bundle_id=None):
|
|
123
|
+
"""Return True when all interceptors in the bundle reported flush completion."""
|
|
124
|
+
set_name = KeyValueDAO.get_set_name(MQDao.MQ_FLUSH_COMPLETE_SET_ID, exec_bundle_id)
|
|
122
125
|
return self._keyvalue_dao.set_is_empty(set_name)
|
|
123
126
|
|
|
124
127
|
def set_campaign_id(self, campaign_id=None):
|
|
@@ -172,11 +175,14 @@ class MQDao(object):
|
|
|
172
175
|
if self._time_based_flushing_started:
|
|
173
176
|
self.buffer.stop()
|
|
174
177
|
self._time_based_flushing_started = False
|
|
178
|
+
self.logger.debug("MQ time-based flushed for the last time!")
|
|
175
179
|
else:
|
|
176
180
|
self.logger.error("MQ time-based flushing is not started")
|
|
177
181
|
else:
|
|
178
182
|
self.buffer = list()
|
|
179
183
|
|
|
184
|
+
self.logger.debug("Buffer closed.")
|
|
185
|
+
|
|
180
186
|
def _stop_timed(self, interceptor_instance_id: str, check_safe_stops: bool = True, bundle_exec_id: int = None):
|
|
181
187
|
t1 = time()
|
|
182
188
|
self._stop(interceptor_instance_id, check_safe_stops, bundle_exec_id)
|
|
@@ -190,10 +196,12 @@ class MQDao(object):
|
|
|
190
196
|
|
|
191
197
|
def _stop(self, interceptor_instance_id: str = None, check_safe_stops: bool = True, bundle_exec_id: int = None):
|
|
192
198
|
"""Stop MQ publisher."""
|
|
193
|
-
self.logger.debug(f"MQ pub received stop sign: bundle={bundle_exec_id}, interceptor={interceptor_instance_id}")
|
|
194
199
|
self._close_buffer()
|
|
195
|
-
|
|
196
|
-
|
|
200
|
+
if check_safe_stops and MQ_ENABLED:
|
|
201
|
+
self.logger.debug(
|
|
202
|
+
f"Sending flush-complete msg. Bundle: {bundle_exec_id}; interceptor id: {interceptor_instance_id}"
|
|
203
|
+
)
|
|
204
|
+
self._send_mq_dao_flush_complete(interceptor_instance_id, bundle_exec_id)
|
|
197
205
|
self.logger.debug(f"Sending stop msg. Bundle: {bundle_exec_id}; interceptor id: {interceptor_instance_id}")
|
|
198
206
|
self._send_mq_dao_time_thread_stop(interceptor_instance_id, bundle_exec_id)
|
|
199
207
|
self.started = False
|
|
@@ -210,6 +218,15 @@ class MQDao(object):
|
|
|
210
218
|
# self.logger.info("Control msg sent: " + str(msg))
|
|
211
219
|
self.send_message(msg)
|
|
212
220
|
|
|
221
|
+
def _send_mq_dao_flush_complete(self, interceptor_instance_id, exec_bundle_id=None):
|
|
222
|
+
msg = {
|
|
223
|
+
"type": "flowcept_control",
|
|
224
|
+
"info": "mq_flush_complete",
|
|
225
|
+
"interceptor_instance_id": interceptor_instance_id,
|
|
226
|
+
"exec_bundle_id": exec_bundle_id,
|
|
227
|
+
}
|
|
228
|
+
self.send_message(msg)
|
|
229
|
+
|
|
213
230
|
def send_document_inserter_stop(self, exec_bundle_id=None):
|
|
214
231
|
"""Send the document."""
|
|
215
232
|
# These control_messages are handled by the document inserter
|
|
@@ -42,7 +42,7 @@ class MQDaoKafka(MQDao):
|
|
|
42
42
|
def message_listener(self, message_handler: Callable):
|
|
43
43
|
"""Get message listener."""
|
|
44
44
|
try:
|
|
45
|
-
while
|
|
45
|
+
while self._consumer is not None:
|
|
46
46
|
msg = self._consumer.poll(1.0)
|
|
47
47
|
if msg is None:
|
|
48
48
|
continue
|
|
@@ -59,7 +59,7 @@ class MQDaoKafka(MQDao):
|
|
|
59
59
|
except Exception as e:
|
|
60
60
|
self.logger.exception(e)
|
|
61
61
|
finally:
|
|
62
|
-
self.
|
|
62
|
+
self.unsubscribe()
|
|
63
63
|
|
|
64
64
|
def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps):
|
|
65
65
|
"""Send the message."""
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""MQ redis module."""
|
|
2
2
|
|
|
3
|
+
from threading import Thread
|
|
3
4
|
from typing import Callable
|
|
4
5
|
import redis
|
|
5
6
|
|
|
@@ -14,12 +15,15 @@ from flowcept.configs import MQ_CHANNEL, MQ_HOST, MQ_PORT, MQ_PASSWORD, MQ_URI,
|
|
|
14
15
|
class MQDaoRedis(MQDao):
|
|
15
16
|
"""MQ redis class."""
|
|
16
17
|
|
|
17
|
-
MESSAGE_TYPES_IGNORE = {"psubscribe"}
|
|
18
|
+
MESSAGE_TYPES_IGNORE = {"psubscribe", "subscribe", "pong"}
|
|
18
19
|
|
|
19
20
|
def __init__(self, adapter_settings=None):
|
|
20
21
|
super().__init__(adapter_settings)
|
|
21
22
|
|
|
22
23
|
self._consumer = None
|
|
24
|
+
self._ping_thread = None
|
|
25
|
+
self._ping_stop = False
|
|
26
|
+
|
|
23
27
|
use_same_as_kv = MQ_SETTINGS.get("same_as_kvdb", False)
|
|
24
28
|
if use_same_as_kv:
|
|
25
29
|
if KVDB_ENABLED:
|
|
@@ -37,6 +41,26 @@ class MQDaoRedis(MQDao):
|
|
|
37
41
|
"""
|
|
38
42
|
self._consumer = self._producer.pubsub()
|
|
39
43
|
self._consumer.psubscribe(MQ_CHANNEL)
|
|
44
|
+
self._start_ping_thread()
|
|
45
|
+
|
|
46
|
+
def _start_ping_thread(self, interval: int = 30):
|
|
47
|
+
"""Start a background thread to ping Redis pubsub periodically."""
|
|
48
|
+
if self._ping_thread and self._ping_thread.is_alive():
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
self._ping_stop = False
|
|
52
|
+
|
|
53
|
+
def _pinger():
|
|
54
|
+
while not self._ping_stop:
|
|
55
|
+
try:
|
|
56
|
+
if self._consumer is not None:
|
|
57
|
+
self._consumer.ping()
|
|
58
|
+
except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
|
|
59
|
+
self.logger.critical(f"Redis PubSub ping failed: {e}")
|
|
60
|
+
sleep(interval)
|
|
61
|
+
|
|
62
|
+
self._ping_thread = Thread(target=_pinger, daemon=True)
|
|
63
|
+
self._ping_thread.start()
|
|
40
64
|
|
|
41
65
|
def unsubscribe(self):
|
|
42
66
|
"""
|
|
@@ -75,8 +99,15 @@ class MQDaoRedis(MQDao):
|
|
|
75
99
|
current_trials = 0
|
|
76
100
|
except (redis.exceptions.ConnectionError, redis.exceptions.TimeoutError) as e:
|
|
77
101
|
current_trials += 1
|
|
78
|
-
self.logger.critical(f"Redis connection lost: {e}.
|
|
102
|
+
self.logger.critical(f"Redis connection lost: {e}. Trying to reconnect in 3 seconds...")
|
|
79
103
|
sleep(3)
|
|
104
|
+
try:
|
|
105
|
+
self.subscribe()
|
|
106
|
+
self.logger.warning(f"Redis reconnected after {current_trials} trials.")
|
|
107
|
+
current_trials = 0
|
|
108
|
+
except Exception as e:
|
|
109
|
+
self.logger.critical(f"Redis error when trying to reconnect: {e}.")
|
|
110
|
+
|
|
80
111
|
except Exception as e:
|
|
81
112
|
self.logger.exception(e)
|
|
82
113
|
continue
|
|
@@ -134,7 +134,10 @@ class TaskObject:
|
|
|
134
134
|
"""User-defined tags attached to the task."""
|
|
135
135
|
|
|
136
136
|
agent_id: str = None
|
|
137
|
-
"""Identifier of the agent
|
|
137
|
+
"""Identifier of the agent that executed (or is going to execute) this task."""
|
|
138
|
+
|
|
139
|
+
source_agent_id: str = None
|
|
140
|
+
"""Identifier of the agent that sent this task to be executed (if any)."""
|
|
138
141
|
|
|
139
142
|
_DEFAULT_ENRICH_VALUES = {
|
|
140
143
|
"node_name": NODE_NAME,
|
flowcept/configs.py
CHANGED
|
@@ -9,7 +9,7 @@ from flowcept.version import __version__
|
|
|
9
9
|
PROJECT_NAME = "flowcept"
|
|
10
10
|
|
|
11
11
|
DEFAULT_SETTINGS = {
|
|
12
|
-
"
|
|
12
|
+
"flowcept_version": __version__,
|
|
13
13
|
"log": {"log_file_level": "disable", "log_stream_level": "disable"},
|
|
14
14
|
"project": {"dump_buffer": {"enabled": True}},
|
|
15
15
|
"telemetry_capture": {},
|
|
@@ -81,7 +81,7 @@ FLOWCEPT_USER = settings["experiment"].get("user", "blank_user")
|
|
|
81
81
|
|
|
82
82
|
MQ_INSTANCES = settings["mq"].get("instances", None)
|
|
83
83
|
MQ_SETTINGS = settings["mq"]
|
|
84
|
-
MQ_ENABLED = os.getenv("MQ_ENABLED", settings["mq"].get("enabled", True))
|
|
84
|
+
MQ_ENABLED = os.getenv("MQ_ENABLED", str(settings["mq"].get("enabled", True))).strip().lower() in _TRUE_VALUES
|
|
85
85
|
MQ_TYPE = os.getenv("MQ_TYPE", settings["mq"].get("type", "redis"))
|
|
86
86
|
MQ_CHANNEL = os.getenv("MQ_CHANNEL", settings["mq"].get("channel", "interception"))
|
|
87
87
|
MQ_PASSWORD = settings["mq"].get("password", None)
|
|
@@ -103,6 +103,11 @@ KVDB_PORT = int(os.getenv("KVDB_PORT", settings["kv_db"].get("port", "6379")))
|
|
|
103
103
|
KVDB_URI = os.getenv("KVDB_URI", settings["kv_db"].get("uri", None))
|
|
104
104
|
KVDB_ENABLED = settings["kv_db"].get("enabled", False)
|
|
105
105
|
|
|
106
|
+
if MQ_ENABLED and not KVDB_ENABLED:
|
|
107
|
+
raise ValueError(
|
|
108
|
+
"Invalid configuration: MQ is enabled but kv_db is disabled. "
|
|
109
|
+
"Enable kv_db.enabled (and KVDB) when MQ is enabled."
|
|
110
|
+
)
|
|
106
111
|
|
|
107
112
|
DATABASES = settings.get("databases", {})
|
|
108
113
|
|
|
@@ -155,14 +160,22 @@ DB_INSERTER_SLEEP_TRIALS_STOP = db_buffer_settings.get("stop_trials_sleep", 0.01
|
|
|
155
160
|
###########################
|
|
156
161
|
|
|
157
162
|
DB_FLUSH_MODE = settings["project"].get("db_flush_mode", "offline")
|
|
158
|
-
# DEBUG_MODE = settings["project"].get("debug", False)
|
|
159
163
|
PERF_LOG = settings["project"].get("performance_logging", False)
|
|
160
164
|
JSON_SERIALIZER = settings["project"].get("json_serializer", "default")
|
|
161
165
|
REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True)
|
|
162
166
|
ENRICH_MESSAGES = settings["project"].get("enrich_messages", True)
|
|
163
167
|
|
|
168
|
+
if DB_FLUSH_MODE == "online" and not MQ_ENABLED:
|
|
169
|
+
raise ValueError(
|
|
170
|
+
"Invalid configuration: project.db_flush_mode is 'online' but MQ is disabled. "
|
|
171
|
+
"Enable mq.enabled (or MQ_ENABLED=true) or set project.db_flush_mode to 'offline'."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Default: enable dump buffer only when running in offline flush mode.
|
|
164
175
|
_DEFAULT_DUMP_BUFFER_ENABLED = DB_FLUSH_MODE == "offline"
|
|
165
176
|
DUMP_BUFFER_ENABLED = (
|
|
177
|
+
# Env var "DUMP_BUFFER" overrides settings.yaml.
|
|
178
|
+
# Falls back to settings project.dump_buffer.enabled, then to the default above.
|
|
166
179
|
os.getenv(
|
|
167
180
|
"DUMP_BUFFER", str(settings["project"].get("dump_buffer", {}).get("enabled", _DEFAULT_DUMP_BUFFER_ENABLED))
|
|
168
181
|
)
|
|
@@ -170,6 +183,7 @@ DUMP_BUFFER_ENABLED = (
|
|
|
170
183
|
.lower()
|
|
171
184
|
in _TRUE_VALUES
|
|
172
185
|
)
|
|
186
|
+
# Path is only read from settings.yaml; env override is not supported here.
|
|
173
187
|
DUMP_BUFFER_PATH = settings["project"].get("dump_buffer", {}).get("path", "flowcept_buffer.jsonl")
|
|
174
188
|
|
|
175
189
|
TELEMETRY_CAPTURE = settings.get("telemetry_capture", None)
|
|
@@ -320,7 +320,11 @@ class Flowcept(object):
|
|
|
320
320
|
file_path = DUMP_BUFFER_PATH
|
|
321
321
|
assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
|
|
322
322
|
if not os.path.exists(file_path):
|
|
323
|
-
raise FileNotFoundError(
|
|
323
|
+
raise FileNotFoundError(
|
|
324
|
+
f"Flowcept buffer file '{file_path}' was not found. "
|
|
325
|
+
f"Check your settings to see if you're dumping the data to a file and check if you"
|
|
326
|
+
f"have started Flowcept."
|
|
327
|
+
)
|
|
324
328
|
|
|
325
329
|
with open(file_path, "rb") as f:
|
|
326
330
|
lines = [ln for ln in f.read().splitlines() if ln]
|
|
@@ -13,7 +13,38 @@ class InterceptionEventHandler(FileSystemEventHandler):
|
|
|
13
13
|
self.callback_function = callback_function
|
|
14
14
|
self.interceptor_instance = interceptor_instance
|
|
15
15
|
|
|
16
|
+
def _matches_watch_target(self, path):
|
|
17
|
+
"""Return True when a path matches the watch target.
|
|
18
|
+
|
|
19
|
+
If the target is a directory, any event under that directory matches.
|
|
20
|
+
If the target is a file, only the exact file path matches.
|
|
21
|
+
"""
|
|
22
|
+
if not path:
|
|
23
|
+
return False
|
|
24
|
+
target = Path(self.file_path_to_watch).resolve()
|
|
25
|
+
candidate = Path(path).resolve()
|
|
26
|
+
if target.is_dir():
|
|
27
|
+
try:
|
|
28
|
+
candidate.relative_to(target)
|
|
29
|
+
return True
|
|
30
|
+
except ValueError:
|
|
31
|
+
return False
|
|
32
|
+
return candidate == target
|
|
33
|
+
|
|
34
|
+
def _maybe_callback(self, event):
|
|
35
|
+
"""Invoke the callback when an event matches the watch target."""
|
|
36
|
+
paths = [getattr(event, "src_path", None), getattr(event, "dest_path", None)]
|
|
37
|
+
if any(self._matches_watch_target(path) for path in paths):
|
|
38
|
+
self.callback_function(self.interceptor_instance)
|
|
39
|
+
|
|
16
40
|
def on_modified(self, event):
|
|
17
41
|
"""Get on modified."""
|
|
18
|
-
|
|
19
|
-
|
|
42
|
+
self._maybe_callback(event)
|
|
43
|
+
|
|
44
|
+
def on_created(self, event):
|
|
45
|
+
"""Get on created."""
|
|
46
|
+
self._maybe_callback(event)
|
|
47
|
+
|
|
48
|
+
def on_moved(self, event):
|
|
49
|
+
"""Get on moved."""
|
|
50
|
+
self._maybe_callback(event)
|
|
@@ -50,9 +50,10 @@ class MLFlowInterceptor(BaseInterceptor):
|
|
|
50
50
|
interesting change, it calls self.intercept; otherwise, let it
|
|
51
51
|
go....
|
|
52
52
|
"""
|
|
53
|
+
intercepted = 0
|
|
53
54
|
runs = self.dao.get_finished_run_uuids()
|
|
54
55
|
if not runs:
|
|
55
|
-
return
|
|
56
|
+
return intercepted
|
|
56
57
|
for run_uuid_tuple in runs:
|
|
57
58
|
run_uuid = run_uuid_tuple[0]
|
|
58
59
|
if not self.state_manager.has_element_id(run_uuid):
|
|
@@ -63,6 +64,8 @@ class MLFlowInterceptor(BaseInterceptor):
|
|
|
63
64
|
continue
|
|
64
65
|
task_msg = self.prepare_task_msg(run_data).to_dict()
|
|
65
66
|
self.intercept(task_msg)
|
|
67
|
+
intercepted += 1
|
|
68
|
+
return intercepted
|
|
66
69
|
|
|
67
70
|
def start(self, bundle_exec_id, check_safe_stops) -> "MLFlowInterceptor":
|
|
68
71
|
"""Start it."""
|
|
@@ -74,10 +77,20 @@ class MLFlowInterceptor(BaseInterceptor):
|
|
|
74
77
|
def stop(self, check_safe_stops: bool = True) -> bool:
|
|
75
78
|
"""Stop it."""
|
|
76
79
|
sleep(1)
|
|
77
|
-
super().stop(check_safe_stops)
|
|
78
80
|
self.logger.debug("Interceptor stopping...")
|
|
79
|
-
|
|
80
|
-
|
|
81
|
+
# Flush any late writes before stopping the observer.
|
|
82
|
+
try:
|
|
83
|
+
intercepted = self.callback()
|
|
84
|
+
if intercepted == 0:
|
|
85
|
+
sleep(self.settings.watch_interval_sec)
|
|
86
|
+
self.callback()
|
|
87
|
+
except Exception as e:
|
|
88
|
+
self.logger.exception(e)
|
|
89
|
+
super().stop(check_safe_stops)
|
|
90
|
+
if self._observer is not None:
|
|
91
|
+
self._observer.stop()
|
|
92
|
+
if self._observer_thread is not None:
|
|
93
|
+
self._observer_thread.join()
|
|
81
94
|
self.logger.debug("Interceptor stopped.")
|
|
82
95
|
return True
|
|
83
96
|
|
|
@@ -98,4 +111,5 @@ class MLFlowInterceptor(BaseInterceptor):
|
|
|
98
111
|
watch_dir = os.path.dirname(self.settings.file_path) or "."
|
|
99
112
|
self._observer.schedule(event_handler, watch_dir, recursive=True)
|
|
100
113
|
self._observer.start()
|
|
114
|
+
sleep(0.2)
|
|
101
115
|
self.logger.info(f"Watching directory {watch_dir} with file {self.settings.file_path} ")
|
|
@@ -20,6 +20,12 @@ class BaseAppContext:
|
|
|
20
20
|
|
|
21
21
|
tasks: List[Dict]
|
|
22
22
|
|
|
23
|
+
def reset_context(self):
|
|
24
|
+
"""
|
|
25
|
+
Method to reset the variables in the context.
|
|
26
|
+
"""
|
|
27
|
+
self.tasks = []
|
|
28
|
+
|
|
23
29
|
|
|
24
30
|
class BaseAgentContextManager(BaseConsumer):
|
|
25
31
|
"""
|
|
@@ -39,14 +45,13 @@ class BaseAgentContextManager(BaseConsumer):
|
|
|
39
45
|
|
|
40
46
|
agent_id = None
|
|
41
47
|
|
|
42
|
-
def __init__(self):
|
|
48
|
+
def __init__(self, allow_mq_disabled: bool = False):
|
|
43
49
|
"""
|
|
44
50
|
Initializes the agent and resets its context state.
|
|
45
51
|
"""
|
|
46
52
|
self._started = False
|
|
47
|
-
super().__init__()
|
|
48
|
-
self.context =
|
|
49
|
-
self.reset_context()
|
|
53
|
+
super().__init__(allow_mq_disabled=allow_mq_disabled)
|
|
54
|
+
# self.context = BaseAppContext(tasks=[])
|
|
50
55
|
self.agent_id = BaseAgentContextManager.agent_id
|
|
51
56
|
|
|
52
57
|
def message_handler(self, msg_obj: Dict) -> bool:
|
|
@@ -77,12 +82,6 @@ class BaseAgentContextManager(BaseConsumer):
|
|
|
77
82
|
|
|
78
83
|
return True
|
|
79
84
|
|
|
80
|
-
def reset_context(self):
|
|
81
|
-
"""
|
|
82
|
-
Resets the internal context, clearing all stored task data.
|
|
83
|
-
"""
|
|
84
|
-
self.context = BaseAppContext(tasks=[])
|
|
85
|
-
|
|
86
85
|
@asynccontextmanager
|
|
87
86
|
async def lifespan(self, app):
|
|
88
87
|
"""
|
|
@@ -13,18 +13,28 @@ class BaseConsumer(object):
|
|
|
13
13
|
|
|
14
14
|
This class provides a standard interface and shared logic for subscribing to
|
|
15
15
|
message queues and dispatching messages to a handler.
|
|
16
|
+
|
|
17
|
+
Note
|
|
18
|
+
----
|
|
19
|
+
The MQ-disabled path is only intended for agent consumers that can operate
|
|
20
|
+
from an offline buffer file. General consumers that require MQ should keep
|
|
21
|
+
the default behavior (raise when MQ_ENABLED is False).
|
|
16
22
|
"""
|
|
17
23
|
|
|
18
|
-
def __init__(self):
|
|
24
|
+
def __init__(self, allow_mq_disabled: bool = False):
|
|
19
25
|
"""Initialize the message queue DAO and logger."""
|
|
26
|
+
self.logger = FlowceptLogger()
|
|
27
|
+
self._main_thread: Optional[Thread] = None
|
|
28
|
+
|
|
20
29
|
if not MQ_ENABLED:
|
|
30
|
+
if allow_mq_disabled:
|
|
31
|
+
self._mq_dao = None
|
|
32
|
+
self.logger.warning("MQ is disabled; starting consumer without a message queue.")
|
|
33
|
+
return
|
|
21
34
|
raise Exception("MQ is disabled in the settings. You cannot consume messages.")
|
|
22
35
|
|
|
23
36
|
self._mq_dao = MQDao.build()
|
|
24
37
|
|
|
25
|
-
self.logger = FlowceptLogger()
|
|
26
|
-
self._main_thread: Optional[Thread] = None
|
|
27
|
-
|
|
28
38
|
@abstractmethod
|
|
29
39
|
def message_handler(self, msg_obj: Dict) -> bool:
|
|
30
40
|
"""
|
|
@@ -62,6 +72,9 @@ class BaseConsumer(object):
|
|
|
62
72
|
BaseConsumer
|
|
63
73
|
The current instance (to allow chaining).
|
|
64
74
|
"""
|
|
75
|
+
if self._mq_dao is None:
|
|
76
|
+
self.logger.warning("MQ is disabled; skipping message consumption start.")
|
|
77
|
+
return self
|
|
65
78
|
if target is None:
|
|
66
79
|
target = self.default_thread_target
|
|
67
80
|
self._mq_dao.subscribe()
|
|
@@ -85,6 +98,9 @@ class BaseConsumer(object):
|
|
|
85
98
|
--------
|
|
86
99
|
start : Starts the consumer and optionally spawns a background thread to run this method.
|
|
87
100
|
"""
|
|
101
|
+
if self._mq_dao is None:
|
|
102
|
+
self.logger.warning("MQ is disabled; no message listener will run.")
|
|
103
|
+
return
|
|
88
104
|
self.logger.debug("Going to wait for new messages!")
|
|
89
105
|
self._mq_dao.message_listener(self.message_handler)
|
|
90
106
|
self.logger.debug("Broke main message listening loop!")
|
|
@@ -96,4 +112,6 @@ class BaseConsumer(object):
|
|
|
96
112
|
"""
|
|
97
113
|
Stop consuming messages by unsubscribing from the message queue.
|
|
98
114
|
"""
|
|
115
|
+
if self._mq_dao is None:
|
|
116
|
+
return
|
|
99
117
|
self._mq_dao.unsubscribe()
|
|
@@ -197,6 +197,24 @@ class DocumentInserter(BaseConsumer):
|
|
|
197
197
|
f"{'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!"
|
|
198
198
|
)
|
|
199
199
|
return "continue"
|
|
200
|
+
elif message["info"] == "mq_flush_complete":
|
|
201
|
+
exec_bundle_id = message.get("exec_bundle_id", None)
|
|
202
|
+
interceptor_instance_id = message.get("interceptor_instance_id")
|
|
203
|
+
self.logger.info(
|
|
204
|
+
f"DocInserter id {id(self)}. Received mq_flush_complete message "
|
|
205
|
+
f"from the interceptor {'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!"
|
|
206
|
+
)
|
|
207
|
+
if self.check_safe_stops:
|
|
208
|
+
self.logger.info(
|
|
209
|
+
f"Begin register_flush_complete "
|
|
210
|
+
f"{'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!"
|
|
211
|
+
)
|
|
212
|
+
self._mq_dao.register_flush_complete(interceptor_instance_id, exec_bundle_id)
|
|
213
|
+
self.logger.info(
|
|
214
|
+
f"Done register_flush_complete "
|
|
215
|
+
f"{'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!"
|
|
216
|
+
)
|
|
217
|
+
return "continue"
|
|
200
218
|
elif message["info"] == "stop_document_inserter":
|
|
201
219
|
exec_bundle_id = message.get("exec_bundle_id", None)
|
|
202
220
|
if self._bundle_exec_id == exec_bundle_id:
|
|
@@ -297,7 +315,10 @@ class DocumentInserter(BaseConsumer):
|
|
|
297
315
|
return self
|
|
298
316
|
if self.check_safe_stops:
|
|
299
317
|
trial = 0
|
|
300
|
-
while not
|
|
318
|
+
while not (
|
|
319
|
+
self._mq_dao.all_time_based_threads_ended(bundle_exec_id)
|
|
320
|
+
and self._mq_dao.all_flush_complete_received(bundle_exec_id)
|
|
321
|
+
):
|
|
301
322
|
self.logger.debug(
|
|
302
323
|
f"# time_based_threads for bundle_exec_id {bundle_exec_id} is"
|
|
303
324
|
f"{self._mq_dao._keyvalue_dao.set_count(bundle_exec_id)}"
|