flowcept 0.8.9__py3-none-any.whl → 0.8.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/cli.py +460 -0
- flowcept/commons/daos/keyvalue_dao.py +19 -23
- flowcept/commons/daos/mq_dao/mq_dao_base.py +29 -29
- flowcept/commons/daos/mq_dao/mq_dao_kafka.py +4 -3
- flowcept/commons/daos/mq_dao/mq_dao_mofka.py +4 -0
- flowcept/commons/daos/mq_dao/mq_dao_redis.py +38 -5
- flowcept/commons/daos/redis_conn.py +47 -0
- flowcept/commons/flowcept_dataclasses/task_object.py +36 -8
- flowcept/commons/settings_factory.py +2 -4
- flowcept/commons/task_data_preprocess.py +200 -0
- flowcept/commons/utils.py +1 -1
- flowcept/configs.py +11 -9
- flowcept/flowcept_api/flowcept_controller.py +30 -13
- flowcept/flowceptor/adapters/agents/__init__.py +1 -0
- flowcept/flowceptor/adapters/agents/agents_utils.py +89 -0
- flowcept/flowceptor/adapters/agents/flowcept_agent.py +292 -0
- flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +186 -0
- flowcept/flowceptor/adapters/agents/prompts.py +51 -0
- flowcept/flowceptor/adapters/base_interceptor.py +17 -19
- flowcept/flowceptor/adapters/brokers/__init__.py +1 -0
- flowcept/flowceptor/adapters/brokers/mqtt_interceptor.py +132 -0
- flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +3 -3
- flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +3 -3
- flowcept/flowceptor/consumers/agent/__init__.py +1 -0
- flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +101 -0
- flowcept/flowceptor/consumers/agent/client_agent.py +48 -0
- flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +145 -0
- flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +112 -0
- flowcept/flowceptor/consumers/base_consumer.py +90 -0
- flowcept/flowceptor/consumers/document_inserter.py +138 -53
- flowcept/flowceptor/telemetry_capture.py +1 -1
- flowcept/instrumentation/task_capture.py +19 -9
- flowcept/version.py +1 -1
- {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/METADATA +18 -6
- {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/RECORD +39 -25
- flowcept-0.8.11.dist-info/entry_points.txt +2 -0
- resources/sample_settings.yaml +44 -23
- flowcept/flowceptor/adapters/zambeze/__init__.py +0 -1
- flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -41
- flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -102
- {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/WHEEL +0 -0
- {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/licenses/LICENSE +0 -0
resources/sample_settings.yaml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
flowcept_version: 0.8.
|
|
1
|
+
flowcept_version: 0.8.11 # Version of the Flowcept package. This setting file is compatible with this version.
|
|
2
2
|
|
|
3
3
|
project:
|
|
4
4
|
debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
|
|
@@ -25,7 +25,6 @@ telemetry_capture: # This toggles each individual type of telemetry capture. GPU
|
|
|
25
25
|
|
|
26
26
|
instrumentation:
|
|
27
27
|
enabled: true # This toggles data capture for instrumentation.
|
|
28
|
-
singleton: true # Use a single instrumentation instance per process. Defaults to true
|
|
29
28
|
torch:
|
|
30
29
|
what: parent_and_children # Scope of instrumentation: "parent_only" -- will capture only at the main model level, "parent_and_children" -- will capture the inner layers, or ~ (disable).
|
|
31
30
|
children_mode: telemetry_and_tensor_inspection # What to capture if parent_and_children is chosen in the scope. Possible values: "tensor_inspection" (i.e., tensor metadata), "telemetry", "telemetry_and_tensor_inspection"
|
|
@@ -40,18 +39,22 @@ experiment:
|
|
|
40
39
|
mq:
|
|
41
40
|
type: redis # or kafka or mofka; Please adjust the port (kafka's default is 9092; redis is 6379). If mofka, adjust the group_file.
|
|
42
41
|
host: localhost
|
|
43
|
-
#
|
|
42
|
+
# uri: ?
|
|
43
|
+
# instances: ["localhost:6379"] # We can have multiple MQ instances being accessed by the consumers but each interceptor will currently access one single MQ..
|
|
44
44
|
port: 6379
|
|
45
45
|
# group_file: mofka.json
|
|
46
46
|
channel: interception
|
|
47
47
|
buffer_size: 50
|
|
48
48
|
insertion_buffer_time_secs: 5
|
|
49
49
|
timing: false
|
|
50
|
+
# uri: use Redis connection uri here
|
|
50
51
|
chunk_size: -1 # use 0 or -1 to disable this. Or simply omit this from the config file.
|
|
52
|
+
same_as_kvdb: false # Set this to true if you are using the same Redis instance both as an MQ and as the KV_DB. In that case, no need to repeat connection parameters in MQ. Use only what you define in KV_DB.
|
|
51
53
|
|
|
52
54
|
kv_db:
|
|
53
55
|
host: localhost
|
|
54
56
|
port: 6379
|
|
57
|
+
enabled: true
|
|
55
58
|
# uri: use Redis connection uri here
|
|
56
59
|
|
|
57
60
|
web_server:
|
|
@@ -59,9 +62,9 @@ web_server:
|
|
|
59
62
|
port: 5000
|
|
60
63
|
|
|
61
64
|
sys_metadata:
|
|
62
|
-
environment_id: "laptop"
|
|
65
|
+
environment_id: "laptop" # We use this to keep track of the environment used to run an experiment. Typical values include the cluster name, but it can be anything that you think will help identify your experimentation environment.
|
|
63
66
|
|
|
64
|
-
extra_metadata:
|
|
67
|
+
extra_metadata: # We use this to store any extra metadata you want to keep track of during an experiment.
|
|
65
68
|
place_holder: ""
|
|
66
69
|
|
|
67
70
|
analytics:
|
|
@@ -70,13 +73,20 @@ analytics:
|
|
|
70
73
|
generated.accuracy: maximum_first
|
|
71
74
|
|
|
72
75
|
db_buffer:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
insertion_buffer_time_secs: 5 # Time interval (in seconds) to buffer incoming records before flushing to the database
|
|
77
|
+
buffer_size: 50 # Maximum number of records to hold in the buffer before forcing a flush
|
|
78
|
+
remove_empty_fields: false # If true, fields with null/empty values will be removed before insertion
|
|
79
|
+
stop_max_trials: 240 # Maximum number of trials before giving up when waiting for a fully safe stop (i.e., all records have been inserted as expected).
|
|
80
|
+
stop_trials_sleep: 0.01 # Sleep duration (in seconds) between trials when waiting for a fully safe stop.
|
|
81
|
+
|
|
82
|
+
agent:
|
|
83
|
+
enabled: false
|
|
84
|
+
mcp_host: localhost
|
|
85
|
+
mcp_port: 8000
|
|
86
|
+
llm_server_url: '?'
|
|
87
|
+
api_key: '?'
|
|
88
|
+
model: '?'
|
|
89
|
+
model_kwargs: {}
|
|
80
90
|
|
|
81
91
|
databases:
|
|
82
92
|
|
|
@@ -89,20 +99,30 @@ databases:
|
|
|
89
99
|
host: localhost
|
|
90
100
|
port: 27017
|
|
91
101
|
db: flowcept
|
|
92
|
-
create_collection_index: true
|
|
102
|
+
create_collection_index: true # Whether flowcept should create collection indices if they haven't been created yet. This is done only at the Flowcept start up.
|
|
93
103
|
|
|
94
104
|
adapters:
|
|
95
105
|
# For each key below, you can have multiple instances. Like mlflow1, mlflow2; zambeze1, zambeze2. Use an empty dict, {}, if you won't use any adapter.
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
+
|
|
107
|
+
broker_mqtt:
|
|
108
|
+
kind: broker
|
|
109
|
+
host: h
|
|
110
|
+
port: 30011
|
|
111
|
+
protocol: mqtt3.1.1
|
|
112
|
+
queues: ["#"]
|
|
113
|
+
username: postman
|
|
114
|
+
password: p
|
|
115
|
+
qos: 2
|
|
116
|
+
task_subtype: intersect_msg
|
|
117
|
+
tracked_keys:
|
|
118
|
+
used: payload
|
|
119
|
+
generated: ~
|
|
120
|
+
custom_metadata: [headers, msgId]
|
|
121
|
+
activity_id: operationId
|
|
122
|
+
submitted_at: ~
|
|
123
|
+
started_at: ~
|
|
124
|
+
ended_at: ~
|
|
125
|
+
registered_at: ~
|
|
106
126
|
|
|
107
127
|
mlflow:
|
|
108
128
|
kind: mlflow
|
|
@@ -125,3 +145,4 @@ adapters:
|
|
|
125
145
|
worker_should_get_output: true
|
|
126
146
|
scheduler_create_timestamps: true
|
|
127
147
|
worker_create_timestamps: false
|
|
148
|
+
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
"""Zambeze subpackage."""
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
"""Zambeze dataclass module."""
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from typing import List, Dict
|
|
5
|
-
|
|
6
|
-
from flowcept.commons.flowcept_dataclasses.base_settings_dataclasses import (
|
|
7
|
-
BaseSettings,
|
|
8
|
-
KeyValue,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class ZambezeMessage:
|
|
14
|
-
"""Zambeze message."""
|
|
15
|
-
|
|
16
|
-
name: str
|
|
17
|
-
activity_id: str
|
|
18
|
-
campaign_id: str
|
|
19
|
-
origin_agent_id: str
|
|
20
|
-
files: List[str]
|
|
21
|
-
command: str
|
|
22
|
-
activity_status: str
|
|
23
|
-
arguments: List[str]
|
|
24
|
-
kwargs: Dict
|
|
25
|
-
depends_on: List[str]
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class ZambezeSettings(BaseSettings):
|
|
30
|
-
"""Zambeze settings."""
|
|
31
|
-
|
|
32
|
-
host: str
|
|
33
|
-
port: int
|
|
34
|
-
queue_names: List[str]
|
|
35
|
-
key_values_to_filter: List[KeyValue] = None
|
|
36
|
-
kind = "zambeze"
|
|
37
|
-
|
|
38
|
-
def __post_init__(self):
|
|
39
|
-
"""Set attributes after init."""
|
|
40
|
-
self.observer_type = "message_broker"
|
|
41
|
-
self.observer_subtype = "rabbit_mq"
|
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
"""Zambeze interceptor module."""
|
|
2
|
-
|
|
3
|
-
from threading import Thread
|
|
4
|
-
from time import sleep
|
|
5
|
-
import pika
|
|
6
|
-
import json
|
|
7
|
-
from typing import Dict
|
|
8
|
-
|
|
9
|
-
from flowcept.commons.utils import get_utc_now, get_status_from_str
|
|
10
|
-
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
11
|
-
from flowcept.flowceptor.adapters.base_interceptor import (
|
|
12
|
-
BaseInterceptor,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ZambezeInterceptor(BaseInterceptor):
|
|
17
|
-
"""Zambeze interceptor."""
|
|
18
|
-
|
|
19
|
-
def __init__(self, plugin_key="zambeze"):
|
|
20
|
-
super().__init__(plugin_key)
|
|
21
|
-
self._consumer_tag = None
|
|
22
|
-
self._channel = None
|
|
23
|
-
self._observer_thread: Thread = None
|
|
24
|
-
|
|
25
|
-
def prepare_task_msg(self, zambeze_msg: Dict) -> TaskObject:
|
|
26
|
-
"""Prepare a task."""
|
|
27
|
-
task_msg = TaskObject()
|
|
28
|
-
task_msg.utc_timestamp = get_utc_now()
|
|
29
|
-
task_msg.campaign_id = zambeze_msg.get("campaign_id", None)
|
|
30
|
-
task_msg.task_id = zambeze_msg.get("activity_id", None)
|
|
31
|
-
task_msg.activity_id = zambeze_msg.get("name", None)
|
|
32
|
-
task_msg.dependencies = zambeze_msg.get("depends_on", None)
|
|
33
|
-
task_msg.custom_metadata = {"command": zambeze_msg.get("command", None)}
|
|
34
|
-
task_msg.status = get_status_from_str(zambeze_msg.get("activity_status", None))
|
|
35
|
-
task_msg.used = {
|
|
36
|
-
"args": zambeze_msg.get("arguments", None),
|
|
37
|
-
"kwargs": zambeze_msg.get("kwargs", None),
|
|
38
|
-
"files": zambeze_msg.get("files", None),
|
|
39
|
-
}
|
|
40
|
-
return task_msg
|
|
41
|
-
|
|
42
|
-
def start(self, bundle_exec_id) -> "ZambezeInterceptor":
|
|
43
|
-
"""Start it."""
|
|
44
|
-
super().start(bundle_exec_id)
|
|
45
|
-
self._observer_thread = Thread(target=self.observe)
|
|
46
|
-
self._observer_thread.start()
|
|
47
|
-
return self
|
|
48
|
-
|
|
49
|
-
def stop(self) -> bool:
|
|
50
|
-
"""Stop it."""
|
|
51
|
-
self.logger.debug("Interceptor stopping...")
|
|
52
|
-
super().stop()
|
|
53
|
-
try:
|
|
54
|
-
self._channel.stop_consuming()
|
|
55
|
-
except Exception as e:
|
|
56
|
-
self.logger.warning(f"This exception is expected to occur after channel.basic_cancel: {e}")
|
|
57
|
-
sleep(2)
|
|
58
|
-
self._observer_thread.join()
|
|
59
|
-
self.logger.debug("Interceptor stopped.")
|
|
60
|
-
return True
|
|
61
|
-
|
|
62
|
-
def observe(self):
|
|
63
|
-
"""Observe it."""
|
|
64
|
-
connection = pika.BlockingConnection(
|
|
65
|
-
pika.ConnectionParameters(host=self.settings.host, port=self.settings.port)
|
|
66
|
-
)
|
|
67
|
-
self._channel = connection.channel()
|
|
68
|
-
for queue in self.settings.queue_names:
|
|
69
|
-
self._channel.queue_declare(queue=queue)
|
|
70
|
-
|
|
71
|
-
# self._consumer_tag =\
|
|
72
|
-
for queue in self.settings.queue_names:
|
|
73
|
-
self._channel.basic_consume(
|
|
74
|
-
queue=queue,
|
|
75
|
-
on_message_callback=self.callback,
|
|
76
|
-
auto_ack=True,
|
|
77
|
-
)
|
|
78
|
-
self.logger.debug(f"Waiting for Zambeze messages on queue {queue}")
|
|
79
|
-
|
|
80
|
-
try:
|
|
81
|
-
self._channel.start_consuming()
|
|
82
|
-
except Exception as e:
|
|
83
|
-
self.logger.warning(
|
|
84
|
-
f"If this exception happens after channel.start_consuming finishes, it is expected:\n {e}"
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
def _intercept(self, body_obj):
|
|
88
|
-
self.logger.debug(f"Zambeze interceptor needs to intercept this:\n\t{json.dumps(body_obj)}")
|
|
89
|
-
task_msg = self.prepare_task_msg(body_obj)
|
|
90
|
-
self.intercept(task_msg.to_dict())
|
|
91
|
-
|
|
92
|
-
def callback(self, ch, method, properties, body):
|
|
93
|
-
"""Implement the callback."""
|
|
94
|
-
body_obj = json.loads(body)
|
|
95
|
-
if self.settings.key_values_to_filter is not None:
|
|
96
|
-
for key_value in self.settings.key_values_to_filter:
|
|
97
|
-
if key_value.key in body_obj:
|
|
98
|
-
if body_obj[key_value.key] == key_value.value:
|
|
99
|
-
self._intercept(body_obj)
|
|
100
|
-
break
|
|
101
|
-
else:
|
|
102
|
-
self._intercept(body_obj)
|
|
File without changes
|
|
File without changes
|