flowcept 0.8.11__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +7 -4
- flowcept/agents/__init__.py +5 -0
- flowcept/{flowceptor/consumers/agent/client_agent.py → agents/agent_client.py} +22 -12
- flowcept/agents/agents_utils.py +181 -0
- flowcept/agents/dynamic_schema_tracker.py +191 -0
- flowcept/agents/flowcept_agent.py +30 -0
- flowcept/agents/flowcept_ctx_manager.py +175 -0
- flowcept/agents/gui/__init__.py +5 -0
- flowcept/agents/gui/agent_gui.py +76 -0
- flowcept/agents/gui/gui_utils.py +239 -0
- flowcept/agents/llms/__init__.py +1 -0
- flowcept/agents/llms/claude_gcp.py +139 -0
- flowcept/agents/llms/gemini25.py +119 -0
- flowcept/agents/prompts/__init__.py +1 -0
- flowcept/{flowceptor/adapters/agents/prompts.py → agents/prompts/general_prompts.py} +18 -0
- flowcept/agents/prompts/in_memory_query_prompts.py +297 -0
- flowcept/agents/tools/__init__.py +1 -0
- flowcept/agents/tools/general_tools.py +102 -0
- flowcept/agents/tools/in_memory_queries/__init__.py +1 -0
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +704 -0
- flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py +309 -0
- flowcept/cli.py +286 -44
- flowcept/commons/daos/docdb_dao/mongodb_dao.py +47 -0
- flowcept/commons/daos/mq_dao/mq_dao_base.py +24 -13
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +18 -2
- flowcept/commons/flowcept_dataclasses/task_object.py +16 -21
- flowcept/commons/flowcept_dataclasses/workflow_object.py +9 -1
- flowcept/commons/task_data_preprocess.py +260 -60
- flowcept/commons/utils.py +25 -6
- flowcept/configs.py +41 -26
- flowcept/flowcept_api/flowcept_controller.py +73 -6
- flowcept/flowceptor/adapters/base_interceptor.py +11 -5
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +25 -1
- flowcept/flowceptor/consumers/base_consumer.py +4 -0
- flowcept/flowceptor/consumers/consumer_utils.py +5 -4
- flowcept/flowceptor/consumers/document_inserter.py +2 -2
- flowcept/flowceptor/telemetry_capture.py +5 -2
- flowcept/instrumentation/flowcept_agent_task.py +294 -0
- flowcept/instrumentation/flowcept_decorator.py +43 -0
- flowcept/instrumentation/flowcept_loop.py +3 -3
- flowcept/instrumentation/flowcept_task.py +64 -24
- flowcept/instrumentation/flowcept_torch.py +5 -5
- flowcept/instrumentation/task_capture.py +83 -6
- flowcept/version.py +1 -1
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/METADATA +42 -14
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/RECORD +50 -36
- resources/sample_settings.yaml +12 -4
- flowcept/flowceptor/adapters/agents/__init__.py +0 -1
- flowcept/flowceptor/adapters/agents/agents_utils.py +0 -89
- flowcept/flowceptor/adapters/agents/flowcept_agent.py +0 -292
- flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +0 -186
- flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +0 -145
- flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +0 -112
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/WHEEL +0 -0
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/entry_points.txt +0 -0
- {flowcept-0.8.11.dist-info → flowcept-0.9.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Controller module."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import os.path
|
|
4
|
+
from typing import List, Dict
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
|
|
6
7
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
@@ -16,6 +17,8 @@ from flowcept.configs import (
|
|
|
16
17
|
SETTINGS_PATH,
|
|
17
18
|
LMDB_ENABLED,
|
|
18
19
|
KVDB_ENABLED,
|
|
20
|
+
MQ_ENABLED,
|
|
21
|
+
DUMP_BUFFER_PATH,
|
|
19
22
|
)
|
|
20
23
|
from flowcept.flowceptor.adapters.base_interceptor import BaseInterceptor
|
|
21
24
|
|
|
@@ -44,7 +47,7 @@ class Flowcept(object):
|
|
|
44
47
|
campaign_id: str = None,
|
|
45
48
|
workflow_id: str = None,
|
|
46
49
|
workflow_name: str = None,
|
|
47
|
-
workflow_args:
|
|
50
|
+
workflow_args: Dict = None,
|
|
48
51
|
start_persistence=True,
|
|
49
52
|
check_safe_stops=True, # TODO add to docstring
|
|
50
53
|
save_workflow=True,
|
|
@@ -94,6 +97,7 @@ class Flowcept(object):
|
|
|
94
97
|
self.logger.debug(f"Using settings file: {SETTINGS_PATH}")
|
|
95
98
|
self._enable_persistence = start_persistence
|
|
96
99
|
self._db_inserters: List = []
|
|
100
|
+
self.buffer = None
|
|
97
101
|
self._check_safe_stops = check_safe_stops
|
|
98
102
|
if bundle_exec_id is None:
|
|
99
103
|
self._bundle_exec_id = id(self)
|
|
@@ -151,7 +155,7 @@ class Flowcept(object):
|
|
|
151
155
|
interceptor_inst = BaseInterceptor.build(interceptor)
|
|
152
156
|
interceptor_inst.start(bundle_exec_id=self._bundle_exec_id, check_safe_stops=self._check_safe_stops)
|
|
153
157
|
self._interceptor_instances.append(interceptor_inst)
|
|
154
|
-
|
|
158
|
+
self.buffer = interceptor_inst._mq_dao.buffer
|
|
155
159
|
if self._should_save_workflow and not self._workflow_saved:
|
|
156
160
|
self.save_workflow(interceptor, interceptor_inst)
|
|
157
161
|
|
|
@@ -161,6 +165,68 @@ class Flowcept(object):
|
|
|
161
165
|
self.logger.debug("Flowcept started successfully.")
|
|
162
166
|
return self
|
|
163
167
|
|
|
168
|
+
def _publish_buffer(self):
|
|
169
|
+
self._interceptor_instances[0]._mq_dao.bulk_publish(self.buffer)
|
|
170
|
+
|
|
171
|
+
@staticmethod
|
|
172
|
+
def read_messages_file(file_path: str = None) -> List[Dict]:
|
|
173
|
+
"""
|
|
174
|
+
Read a JSON Lines (JSONL) file containing captured Flowcept messages.
|
|
175
|
+
|
|
176
|
+
This function loads a file where each line is a serialized JSON object.
|
|
177
|
+
It joins the lines into a single JSON array and parses them efficiently
|
|
178
|
+
with ``orjson``.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
file_path : str, optional
|
|
183
|
+
Path to the messages file. If not provided, defaults to the
|
|
184
|
+
value of ``DUMP_BUFFER_PATH`` from the configuration.
|
|
185
|
+
If neither is provided, an assertion error is raised.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
List[dict]
|
|
190
|
+
A list of message objects (dictionaries) parsed from the file.
|
|
191
|
+
|
|
192
|
+
Raises
|
|
193
|
+
------
|
|
194
|
+
AssertionError
|
|
195
|
+
If no ``file_path`` is provided and ``DUMP_BUFFER_PATH`` is not set.
|
|
196
|
+
FileNotFoundError
|
|
197
|
+
If the specified file does not exist.
|
|
198
|
+
orjson.JSONDecodeError
|
|
199
|
+
If the file contents cannot be parsed as valid JSON.
|
|
200
|
+
|
|
201
|
+
Examples
|
|
202
|
+
--------
|
|
203
|
+
Read messages from a file explicitly:
|
|
204
|
+
|
|
205
|
+
>>> msgs = read_messages_file("offline_buffer.jsonl")
|
|
206
|
+
>>> print(len(msgs))
|
|
207
|
+
128
|
|
208
|
+
|
|
209
|
+
Use the default dump buffer path from config:
|
|
210
|
+
|
|
211
|
+
>>> msgs = read_messages_file()
|
|
212
|
+
>>> for m in msgs[:2]:
|
|
213
|
+
... print(m["type"], m.get("workflow_id"))
|
|
214
|
+
task_start wf_123
|
|
215
|
+
task_end wf_123
|
|
216
|
+
"""
|
|
217
|
+
import orjson
|
|
218
|
+
|
|
219
|
+
_buffer = []
|
|
220
|
+
if file_path is None:
|
|
221
|
+
file_path = DUMP_BUFFER_PATH
|
|
222
|
+
assert file_path is not None, "Please indicate file_path either in the argument or in the config file."
|
|
223
|
+
if not os.path.exists(file_path):
|
|
224
|
+
raise f"File {file_path} has not been created. It will only be created if you run in fully offline mode."
|
|
225
|
+
with open(file_path, "rb") as f:
|
|
226
|
+
lines = [ln for ln in f.read().splitlines() if ln]
|
|
227
|
+
_buffer = orjson.loads(b"[" + b",".join(lines) + b"]")
|
|
228
|
+
return _buffer
|
|
229
|
+
|
|
164
230
|
def save_workflow(self, interceptor: str, interceptor_instance: BaseInterceptor):
|
|
165
231
|
"""
|
|
166
232
|
Save the current workflow and send its metadata using the provided interceptor.
|
|
@@ -270,9 +336,10 @@ class Flowcept(object):
|
|
|
270
336
|
"""
|
|
271
337
|
logger = FlowceptLogger()
|
|
272
338
|
mq = MQDao.build()
|
|
273
|
-
if
|
|
274
|
-
|
|
275
|
-
|
|
339
|
+
if MQ_ENABLED:
|
|
340
|
+
if not mq.liveness_test():
|
|
341
|
+
logger.error("MQ Not Ready!")
|
|
342
|
+
return False
|
|
276
343
|
|
|
277
344
|
if KVDB_ENABLED:
|
|
278
345
|
if not mq._keyvalue_dao.liveness_test():
|
|
@@ -9,14 +9,13 @@ from flowcept.commons.flowcept_dataclasses.workflow_object import (
|
|
|
9
9
|
)
|
|
10
10
|
from flowcept.configs import (
|
|
11
11
|
ENRICH_MESSAGES,
|
|
12
|
+
TELEMETRY_ENABLED,
|
|
12
13
|
)
|
|
13
14
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
14
15
|
from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao
|
|
15
16
|
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
16
17
|
from flowcept.commons.settings_factory import get_settings
|
|
17
18
|
|
|
18
|
-
from flowcept.flowceptor.telemetry_capture import TelemetryCapture
|
|
19
|
-
|
|
20
19
|
|
|
21
20
|
# TODO :base-interceptor-refactor: :ml-refactor: :code-reorg: :usability:
|
|
22
21
|
# Consider creating a new concept for instrumentation-based 'interception'.
|
|
@@ -74,7 +73,14 @@ class BaseInterceptor(object):
|
|
|
74
73
|
self._bundle_exec_id = None
|
|
75
74
|
self.started = False
|
|
76
75
|
self._interceptor_instance_id = str(id(self))
|
|
77
|
-
|
|
76
|
+
|
|
77
|
+
if TELEMETRY_ENABLED:
|
|
78
|
+
from flowcept.flowceptor.telemetry_capture import TelemetryCapture
|
|
79
|
+
|
|
80
|
+
self.telemetry_capture = TelemetryCapture()
|
|
81
|
+
else:
|
|
82
|
+
self.telemetry_capture = None
|
|
83
|
+
|
|
78
84
|
self._saved_workflows = set()
|
|
79
85
|
self._generated_workflow_id = False
|
|
80
86
|
self.kind = kind
|
|
@@ -129,8 +135,8 @@ class BaseInterceptor(object):
|
|
|
129
135
|
# TODO :base-interceptor-refactor: :code-reorg: :usability:
|
|
130
136
|
raise Exception(f"This interceptor {id(self)} has never been started!")
|
|
131
137
|
workflow_obj.interceptor_ids = [self._interceptor_instance_id]
|
|
132
|
-
|
|
133
|
-
|
|
138
|
+
if self.telemetry_capture:
|
|
139
|
+
machine_info = self.telemetry_capture.capture_machine_info()
|
|
134
140
|
if workflow_obj.machine_info is None:
|
|
135
141
|
workflow_obj.machine_info = dict()
|
|
136
142
|
# TODO :refactor-base-interceptor: we might want to register
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from contextlib import asynccontextmanager
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import Dict, List
|
|
4
|
+
from uuid import uuid4
|
|
4
5
|
|
|
6
|
+
from flowcept.flowcept_api.flowcept_controller import Flowcept
|
|
5
7
|
from flowcept.flowceptor.consumers.base_consumer import BaseConsumer
|
|
6
8
|
|
|
7
9
|
|
|
@@ -35,13 +37,17 @@ class BaseAgentContextManager(BaseConsumer):
|
|
|
35
37
|
- Access shared state via `self.context` during execution
|
|
36
38
|
"""
|
|
37
39
|
|
|
40
|
+
agent_id = None
|
|
41
|
+
|
|
38
42
|
def __init__(self):
|
|
39
43
|
"""
|
|
40
44
|
Initializes the agent and resets its context state.
|
|
41
45
|
"""
|
|
46
|
+
self._started = False
|
|
42
47
|
super().__init__()
|
|
43
48
|
self.context = None
|
|
44
49
|
self.reset_context()
|
|
50
|
+
self.agent_id = BaseAgentContextManager.agent_id
|
|
45
51
|
|
|
46
52
|
def message_handler(self, msg_obj: Dict) -> bool:
|
|
47
53
|
"""
|
|
@@ -94,7 +100,25 @@ class BaseAgentContextManager(BaseConsumer):
|
|
|
94
100
|
BaseAppContext
|
|
95
101
|
The current application context, including collected tasks.
|
|
96
102
|
"""
|
|
97
|
-
self.
|
|
103
|
+
if not self._started:
|
|
104
|
+
self.agent_id = BaseAgentContextManager.agent_id = str(uuid4())
|
|
105
|
+
self.logger.info(f"Starting lifespan for agent {BaseAgentContextManager.agent_id}.")
|
|
106
|
+
self._started = True
|
|
107
|
+
|
|
108
|
+
f = Flowcept(
|
|
109
|
+
start_persistence=False,
|
|
110
|
+
save_workflow=True,
|
|
111
|
+
check_safe_stops=False,
|
|
112
|
+
workflow_name="agent_workflow",
|
|
113
|
+
workflow_args={"agent_id": self.agent_id},
|
|
114
|
+
)
|
|
115
|
+
self.agent_workflow_id = f.current_workflow_id
|
|
116
|
+
f.start()
|
|
117
|
+
f.logger.info(
|
|
118
|
+
f"This section's workflow_id={Flowcept.current_workflow_id}, campaign_id={Flowcept.campaign_id}"
|
|
119
|
+
)
|
|
120
|
+
self.start()
|
|
121
|
+
|
|
98
122
|
try:
|
|
99
123
|
yield self.context
|
|
100
124
|
finally:
|
|
@@ -82,6 +82,10 @@ class BaseConsumer(object):
|
|
|
82
82
|
"""
|
|
83
83
|
self.logger.debug("Going to wait for new messages!")
|
|
84
84
|
self._mq_dao.message_listener(self.message_handler)
|
|
85
|
+
self.logger.debug("Broke main message listening loop!")
|
|
86
|
+
# self._mq_dao.stop(check_safe_stops=False) # TODO Do we need to stop mq_dao here?
|
|
87
|
+
self.stop_consumption()
|
|
88
|
+
self.logger.debug("MQ stopped.")
|
|
85
89
|
|
|
86
90
|
def stop_consumption(self):
|
|
87
91
|
"""
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
"""Consumer utilities module."""
|
|
2
2
|
|
|
3
3
|
from datetime import datetime
|
|
4
|
+
from zoneinfo import ZoneInfo
|
|
4
5
|
from time import time
|
|
5
6
|
from typing import List, Dict
|
|
6
7
|
|
|
7
|
-
import pytz
|
|
8
|
-
|
|
9
8
|
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
10
9
|
from flowcept.commons.vocabulary import Status
|
|
11
10
|
|
|
11
|
+
UTC_TZ = ZoneInfo("UTC")
|
|
12
|
+
|
|
12
13
|
|
|
13
14
|
def curate_task_msg(task_msg_dict: dict, convert_times=True):
|
|
14
15
|
"""Curate a task message."""
|
|
@@ -47,10 +48,10 @@ def curate_task_msg(task_msg_dict: dict, convert_times=True):
|
|
|
47
48
|
if convert_times:
|
|
48
49
|
for time_field in TaskObject.get_time_field_names():
|
|
49
50
|
if time_field in task_msg_dict:
|
|
50
|
-
task_msg_dict[time_field] = datetime.fromtimestamp(task_msg_dict[time_field],
|
|
51
|
+
task_msg_dict[time_field] = datetime.fromtimestamp(task_msg_dict[time_field], UTC_TZ)
|
|
51
52
|
|
|
52
53
|
if "registered_at" not in task_msg_dict:
|
|
53
|
-
task_msg_dict["registered_at"] = datetime.fromtimestamp(time(),
|
|
54
|
+
task_msg_dict["registered_at"] = datetime.fromtimestamp(time(), UTC_TZ)
|
|
54
55
|
|
|
55
56
|
|
|
56
57
|
def remove_empty_fields_from_dict(obj: dict):
|
|
@@ -150,9 +150,9 @@ class DocumentInserter(BaseConsumer):
|
|
|
150
150
|
and message["telemetry_at_end"]
|
|
151
151
|
):
|
|
152
152
|
try:
|
|
153
|
-
telemetry_summary = summarize_telemetry(message)
|
|
153
|
+
telemetry_summary = summarize_telemetry(message, self.logger)
|
|
154
154
|
message["telemetry_summary"] = telemetry_summary
|
|
155
|
-
# TODO: make this
|
|
155
|
+
# TODO: make this configurable
|
|
156
156
|
tags = tag_critical_task(
|
|
157
157
|
generated=message.get("generated", {}), telemetry_summary=telemetry_summary, thresholds=None
|
|
158
158
|
)
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Callable, Set, List
|
|
4
4
|
|
|
5
|
-
import psutil
|
|
6
5
|
import platform
|
|
7
|
-
|
|
6
|
+
|
|
8
7
|
import os
|
|
9
8
|
|
|
10
9
|
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
@@ -15,6 +14,10 @@ from flowcept.configs import (
|
|
|
15
14
|
)
|
|
16
15
|
from flowcept.commons.flowcept_dataclasses.telemetry import Telemetry
|
|
17
16
|
|
|
17
|
+
if TELEMETRY_CAPTURE is not None and len(TELEMETRY_CAPTURE):
|
|
18
|
+
import psutil
|
|
19
|
+
import cpuinfo
|
|
20
|
+
|
|
18
21
|
|
|
19
22
|
class GPUCapture:
|
|
20
23
|
"""GPU Capture class."""
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Flowcept Agent Task module."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import threading
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from time import time
|
|
7
|
+
from typing import Any, Dict, List, Optional, Union
|
|
8
|
+
|
|
9
|
+
from langchain_core.language_models import LLM
|
|
10
|
+
from langchain_core.language_models.base import BaseLanguageModel
|
|
11
|
+
from langchain_core.messages import BaseMessage
|
|
12
|
+
from langchain_core.runnables import Runnable
|
|
13
|
+
|
|
14
|
+
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
15
|
+
from flowcept.commons.flowcept_logger import FlowceptLogger
|
|
16
|
+
from flowcept.commons.utils import replace_non_serializable
|
|
17
|
+
from flowcept.commons.vocabulary import Status
|
|
18
|
+
from flowcept.configs import (
|
|
19
|
+
INSTRUMENTATION_ENABLED,
|
|
20
|
+
REPLACE_NON_JSON_SERIALIZABLE,
|
|
21
|
+
TELEMETRY_ENABLED,
|
|
22
|
+
)
|
|
23
|
+
from flowcept.flowcept_api.flowcept_controller import Flowcept
|
|
24
|
+
from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
|
|
25
|
+
from flowcept.flowceptor.consumers.agent.base_agent_context_manager import BaseAgentContextManager
|
|
26
|
+
from flowcept.instrumentation.task_capture import FlowceptTask
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_thread_local = threading.local()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# TODO: :code-reorg: consider moving it to utils and reusing it in dask interceptor
|
|
33
|
+
def default_args_handler(*args, **kwargs):
|
|
34
|
+
"""Get default arguments."""
|
|
35
|
+
args_handled = {}
|
|
36
|
+
if args is not None and len(args):
|
|
37
|
+
if isinstance(args[0], argparse.Namespace):
|
|
38
|
+
args_handled.update(args[0].__dict__)
|
|
39
|
+
args = args[1:]
|
|
40
|
+
for i in range(len(args)):
|
|
41
|
+
args_handled[f"arg_{i}"] = args[i]
|
|
42
|
+
if kwargs is not None and len(kwargs):
|
|
43
|
+
args_handled.update(kwargs)
|
|
44
|
+
if REPLACE_NON_JSON_SERIALIZABLE:
|
|
45
|
+
args_handled = replace_non_serializable(args_handled)
|
|
46
|
+
return args_handled
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def agent_flowcept_task(func=None, **decorator_kwargs):
|
|
50
|
+
"""Get flowcept task."""
|
|
51
|
+
if INSTRUMENTATION_ENABLED:
|
|
52
|
+
interceptor = InstrumentationInterceptor.get_instance()
|
|
53
|
+
logger = FlowceptLogger()
|
|
54
|
+
|
|
55
|
+
def decorator(func):
|
|
56
|
+
@wraps(func)
|
|
57
|
+
def wrapper(*args, **kwargs):
|
|
58
|
+
if not INSTRUMENTATION_ENABLED:
|
|
59
|
+
return func(*args, **kwargs)
|
|
60
|
+
|
|
61
|
+
args_handler = decorator_kwargs.get("args_handler", default_args_handler)
|
|
62
|
+
custom_metadata = decorator_kwargs.get("custom_metadata", None)
|
|
63
|
+
tags = decorator_kwargs.get("tags", None)
|
|
64
|
+
|
|
65
|
+
task_obj = TaskObject()
|
|
66
|
+
task_obj.subtype = decorator_kwargs.get("subtype", "agent_task")
|
|
67
|
+
task_obj.activity_id = func.__name__
|
|
68
|
+
handled_args = args_handler(*args, **kwargs)
|
|
69
|
+
task_obj.workflow_id = handled_args.pop("workflow_id", Flowcept.current_workflow_id)
|
|
70
|
+
task_obj.campaign_id = handled_args.pop("campaign_id", Flowcept.campaign_id)
|
|
71
|
+
task_obj.used = handled_args
|
|
72
|
+
task_obj.tags = tags
|
|
73
|
+
task_obj.started_at = time()
|
|
74
|
+
task_obj.custom_metadata = custom_metadata or {}
|
|
75
|
+
task_obj.task_id = str(task_obj.started_at)
|
|
76
|
+
_thread_local._flowcept_current_context_task = task_obj
|
|
77
|
+
if TELEMETRY_ENABLED:
|
|
78
|
+
task_obj.telemetry_at_start = interceptor.telemetry_capture.capture()
|
|
79
|
+
task_obj.agent_id = BaseAgentContextManager.agent_id
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
result = func(*args, **kwargs)
|
|
83
|
+
task_obj.status = Status.FINISHED
|
|
84
|
+
except Exception as e:
|
|
85
|
+
task_obj.status = Status.ERROR
|
|
86
|
+
result = None
|
|
87
|
+
logger.exception(e)
|
|
88
|
+
task_obj.stderr = str(e)
|
|
89
|
+
task_obj.ended_at = time()
|
|
90
|
+
|
|
91
|
+
if TELEMETRY_ENABLED:
|
|
92
|
+
task_obj.telemetry_at_end = interceptor.telemetry_capture.capture()
|
|
93
|
+
try:
|
|
94
|
+
if result is not None:
|
|
95
|
+
if isinstance(result, dict):
|
|
96
|
+
task_obj.generated = args_handler(**result)
|
|
97
|
+
else:
|
|
98
|
+
task_obj.generated = args_handler(result)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.exception(e)
|
|
101
|
+
|
|
102
|
+
interceptor.intercept(task_obj.to_dict())
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
return wrapper
|
|
106
|
+
|
|
107
|
+
if func is None:
|
|
108
|
+
return decorator
|
|
109
|
+
else:
|
|
110
|
+
return decorator(func)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_current_context_task() -> TaskObject | None:
|
|
114
|
+
"""Retrieve the current task object from thread-local storage."""
|
|
115
|
+
return getattr(_thread_local, "_flowcept_current_context_task", None)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _extract_llm_metadata(llm: LLM) -> Dict:
|
|
119
|
+
"""
|
|
120
|
+
Extract metadata from a LangChain LLM instance.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
llm : LLM
|
|
125
|
+
The language model instance.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
dict
|
|
130
|
+
Dictionary containing class name, module, model name, and configuration if available.
|
|
131
|
+
"""
|
|
132
|
+
llm_metadata = {
|
|
133
|
+
"class_name": llm.__class__.__name__,
|
|
134
|
+
"module": llm.__class__.__module__,
|
|
135
|
+
"config": llm.dict() if hasattr(llm, "dict") else {},
|
|
136
|
+
}
|
|
137
|
+
return llm_metadata
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class FlowceptLLM(Runnable):
|
|
141
|
+
"""
|
|
142
|
+
Flowcept wrapper for language models to capture provenance of LLM interactions.
|
|
143
|
+
|
|
144
|
+
This class wraps a LangChain-compatible LLM (any subclass of
|
|
145
|
+
``langchain_core.language_models.base.BaseLanguageModel``) so that
|
|
146
|
+
prompts and responses are automatically captured as provenance tasks
|
|
147
|
+
in Flowcept. It ensures that both inputs (prompts) and outputs
|
|
148
|
+
(responses) are recorded, along with metadata about the underlying LLM.
|
|
149
|
+
|
|
150
|
+
Parameters
|
|
151
|
+
----------
|
|
152
|
+
llm : BaseLanguageModel
|
|
153
|
+
The underlying LangChain-compatible LLM instance to wrap.
|
|
154
|
+
agent_id : str, optional
|
|
155
|
+
Identifier of the agent that owns this LLM. Used to correlate
|
|
156
|
+
tasks across agents.
|
|
157
|
+
parent_task_id : str, optional
|
|
158
|
+
Identifier of the parent task, if this LLM interaction is part
|
|
159
|
+
of a larger workflow task.
|
|
160
|
+
workflow_id : str, optional
|
|
161
|
+
Identifier of the workflow execution associated with this task.
|
|
162
|
+
campaign_id : str, optional
|
|
163
|
+
Identifier of the campaign or experiment associated with this task.
|
|
164
|
+
|
|
165
|
+
Attributes
|
|
166
|
+
----------
|
|
167
|
+
llm : BaseLanguageModel
|
|
168
|
+
The underlying LLM object.
|
|
169
|
+
agent_id : str
|
|
170
|
+
The agent identifier, if provided.
|
|
171
|
+
parent_task_id : str
|
|
172
|
+
Parent task identifier, if provided.
|
|
173
|
+
worflow_id : str
|
|
174
|
+
Workflow identifier, if provided.
|
|
175
|
+
campaign_id : str
|
|
176
|
+
Campaign identifier, if provided.
|
|
177
|
+
metadata : dict
|
|
178
|
+
Extracted metadata about the underlying LLM, such as class name,
|
|
179
|
+
module, and configuration.
|
|
180
|
+
|
|
181
|
+
Methods
|
|
182
|
+
-------
|
|
183
|
+
call(messages, tools=None, callbacks=None, available_functions=None)
|
|
184
|
+
Generic call method for compatibility with some LLM APIs.
|
|
185
|
+
invoke(input, **kwargs)
|
|
186
|
+
Standard LangChain entrypoint for invoking the LLM.
|
|
187
|
+
__call__(*args, **kwargs)
|
|
188
|
+
Syntactic sugar for calling the wrapper like a function.
|
|
189
|
+
_format_messages(messages)
|
|
190
|
+
Utility method to render messages (string or list of role/content dicts)
|
|
191
|
+
into a human-readable string.
|
|
192
|
+
|
|
193
|
+
Notes
|
|
194
|
+
-----
|
|
195
|
+
Every call is wrapped in a :class:`flowcept.instrumentation.task_capture.FlowceptTask`
|
|
196
|
+
context. This ensures the provenance database records:
|
|
197
|
+
|
|
198
|
+
- Used: the input prompt/messages
|
|
199
|
+
- Generated: the LLM response
|
|
200
|
+
- Metadata: model configuration and optional response metadata
|
|
201
|
+
|
|
202
|
+
Examples
|
|
203
|
+
--------
|
|
204
|
+
Wrap an OpenAI model and capture provenance automatically:
|
|
205
|
+
|
|
206
|
+
>>> from langchain_openai import ChatOpenAI
|
|
207
|
+
>>> from flowcept.flowceptor.adapters.flowcept_llm import FlowceptLLM
|
|
208
|
+
>>>
|
|
209
|
+
>>> llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
|
|
210
|
+
>>> wrapped_llm = FlowceptLLM(llm, agent_id="agent_123", workflow_id="wf_456")
|
|
211
|
+
>>>
|
|
212
|
+
>>> # Example with a single string prompt
|
|
213
|
+
>>> response = wrapped_llm("What is the capital of France?")
|
|
214
|
+
>>> print(response)
|
|
215
|
+
"Paris"
|
|
216
|
+
|
|
217
|
+
Example with a list of role/content messages:
|
|
218
|
+
|
|
219
|
+
>>> messages = [
|
|
220
|
+
... {"role": "system", "content": "You are a helpful assistant."},
|
|
221
|
+
... {"role": "user", "content": "Tell me a joke about computers."}
|
|
222
|
+
... ]
|
|
223
|
+
>>> response = wrapped_llm.invoke(messages)
|
|
224
|
+
>>> print(response)
|
|
225
|
+
"Why did the computer show up at work late? It had a hard drive!"
|
|
226
|
+
|
|
227
|
+
In both cases, provenance is captured automatically and can be
|
|
228
|
+
queried via the Flowcept API.
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
def __init__(
|
|
232
|
+
self,
|
|
233
|
+
llm: BaseLanguageModel,
|
|
234
|
+
agent_id: str = None,
|
|
235
|
+
parent_task_id: str = None,
|
|
236
|
+
workflow_id=None,
|
|
237
|
+
campaign_id=None,
|
|
238
|
+
):
|
|
239
|
+
self.llm = llm
|
|
240
|
+
self.agent_id = agent_id
|
|
241
|
+
self.worflow_id = workflow_id
|
|
242
|
+
self.campaign_id = campaign_id
|
|
243
|
+
self.metadata = _extract_llm_metadata(llm)
|
|
244
|
+
self.parent_task_id = parent_task_id
|
|
245
|
+
|
|
246
|
+
def _our_call(self, messages, **kwargs):
|
|
247
|
+
messages_str = FlowceptLLM._format_messages(messages)
|
|
248
|
+
used = {"prompt": messages_str}
|
|
249
|
+
with FlowceptTask(
|
|
250
|
+
used=used,
|
|
251
|
+
subtype="llm_task",
|
|
252
|
+
custom_metadata=self.metadata,
|
|
253
|
+
agent_id=self.agent_id,
|
|
254
|
+
activity_id="llm_interaction",
|
|
255
|
+
campaign_id=self.campaign_id,
|
|
256
|
+
workflow_id=self.worflow_id,
|
|
257
|
+
parent_task_id=self.parent_task_id,
|
|
258
|
+
) as task:
|
|
259
|
+
response = self.llm.invoke(messages, **kwargs)
|
|
260
|
+
response_str = response.content if isinstance(response, BaseMessage) else str(response)
|
|
261
|
+
generated = {"response": response_str}
|
|
262
|
+
|
|
263
|
+
if hasattr(response, "response_metadata"):
|
|
264
|
+
task._task.custom_metadata["response_metadata"] = response.response_metadata
|
|
265
|
+
|
|
266
|
+
task.end(generated=generated)
|
|
267
|
+
return response_str
|
|
268
|
+
|
|
269
|
+
def call(
|
|
270
|
+
self,
|
|
271
|
+
messages: Union[str, List[Dict[str, str]]],
|
|
272
|
+
tools: Optional[List[dict]] = None,
|
|
273
|
+
callbacks: Optional[List[Any]] = None,
|
|
274
|
+
available_functions: Optional[Dict[str, Any]] = None,
|
|
275
|
+
) -> Union[str, Any]:
|
|
276
|
+
"""Invoke method used by some other LLMs."""
|
|
277
|
+
return self._our_call(messages)
|
|
278
|
+
|
|
279
|
+
def invoke(self, input: Union[str, List[Dict[str, str]]], **kwargs) -> Any:
|
|
280
|
+
"""Invoke method used by LangChain."""
|
|
281
|
+
return self._our_call(input, **kwargs)
|
|
282
|
+
|
|
283
|
+
def __call__(self, *args, **kwargs):
|
|
284
|
+
"""Default call method, to be used like llm("string")."""
|
|
285
|
+
return self.invoke(*args, **kwargs)
|
|
286
|
+
|
|
287
|
+
@staticmethod
|
|
288
|
+
def _format_messages(messages: Union[str, List[Dict[str, str]]]) -> str:
|
|
289
|
+
if isinstance(messages, str):
|
|
290
|
+
return messages
|
|
291
|
+
elif isinstance(messages, list):
|
|
292
|
+
return "\n".join(f"{m.get('role', '').capitalize()}: {m.get('content', '')}" for m in messages)
|
|
293
|
+
else:
|
|
294
|
+
raise ValueError(f"Invalid message format: {messages}")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
4
|
+
from flowcept.flowcept_api.flowcept_controller import Flowcept
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def flowcept(func=None, **flowcept_constructor_kwargs):
|
|
8
|
+
"""
|
|
9
|
+
Usage:
|
|
10
|
+
@flowcept
|
|
11
|
+
def main(): ...
|
|
12
|
+
|
|
13
|
+
@flowcept(project="X", campaign_id="C123", verbose=True)
|
|
14
|
+
def main(): ...
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def _decorate(f):
|
|
18
|
+
is_async = inspect.iscoroutinefunction(f)
|
|
19
|
+
flowcept_args = flowcept_constructor_kwargs.copy()
|
|
20
|
+
flowcept_args["start_persistence"] = flowcept_constructor_kwargs.get("start_persistence", False)
|
|
21
|
+
flowcept_args["save_workflow"] = flowcept_constructor_kwargs.get("save_workflow", False)
|
|
22
|
+
flowcept_args["check_safe_stops"] = flowcept_constructor_kwargs.get("check_safe_stops", False)
|
|
23
|
+
|
|
24
|
+
if is_async:
|
|
25
|
+
|
|
26
|
+
@wraps(f)
|
|
27
|
+
async def _aw(*args, **kwargs):
|
|
28
|
+
# Flowcept used as a context manager around the coroutine call
|
|
29
|
+
with Flowcept(**flowcept_args):
|
|
30
|
+
return await f(*args, **kwargs)
|
|
31
|
+
|
|
32
|
+
return _aw
|
|
33
|
+
else:
|
|
34
|
+
|
|
35
|
+
@wraps(f)
|
|
36
|
+
def _w(*args, **kwargs):
|
|
37
|
+
with Flowcept(**flowcept_args):
|
|
38
|
+
return f(*args, **kwargs)
|
|
39
|
+
|
|
40
|
+
return _w
|
|
41
|
+
|
|
42
|
+
# Support bare @flowcept vs @flowcept(...)
|
|
43
|
+
return _decorate if func is None else _decorate(func)
|
|
@@ -6,7 +6,7 @@ from typing import Union, Sized, Iterator, Dict
|
|
|
6
6
|
|
|
7
7
|
from flowcept import Flowcept
|
|
8
8
|
from flowcept.commons.vocabulary import Status
|
|
9
|
-
from flowcept.configs import INSTRUMENTATION_ENABLED
|
|
9
|
+
from flowcept.configs import INSTRUMENTATION_ENABLED, TELEMETRY_ENABLED
|
|
10
10
|
from flowcept.flowceptor.adapters.instrumentation_interceptor import InstrumentationInterceptor
|
|
11
11
|
|
|
12
12
|
|
|
@@ -160,8 +160,8 @@ class FlowceptLoop:
|
|
|
160
160
|
|
|
161
161
|
def _end_iteration_task(self, _):
|
|
162
162
|
self._last_iteration_task["status"] = Status.FINISHED.value
|
|
163
|
-
|
|
164
|
-
|
|
163
|
+
if TELEMETRY_ENABLED:
|
|
164
|
+
tel = FlowceptLoop._interceptor.telemetry_capture.capture()
|
|
165
165
|
self._last_iteration_task["telemetry_at_end"] = tel.to_dict()
|
|
166
166
|
FlowceptLoop._interceptor.intercept(self._last_iteration_task)
|
|
167
167
|
|