flowcept 0.8.9__py3-none-any.whl → 0.8.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. flowcept/cli.py +460 -0
  2. flowcept/commons/daos/keyvalue_dao.py +19 -23
  3. flowcept/commons/daos/mq_dao/mq_dao_base.py +29 -29
  4. flowcept/commons/daos/mq_dao/mq_dao_kafka.py +4 -3
  5. flowcept/commons/daos/mq_dao/mq_dao_mofka.py +4 -0
  6. flowcept/commons/daos/mq_dao/mq_dao_redis.py +38 -5
  7. flowcept/commons/daos/redis_conn.py +47 -0
  8. flowcept/commons/flowcept_dataclasses/task_object.py +36 -8
  9. flowcept/commons/settings_factory.py +2 -4
  10. flowcept/commons/task_data_preprocess.py +200 -0
  11. flowcept/commons/utils.py +1 -1
  12. flowcept/configs.py +11 -9
  13. flowcept/flowcept_api/flowcept_controller.py +30 -13
  14. flowcept/flowceptor/adapters/agents/__init__.py +1 -0
  15. flowcept/flowceptor/adapters/agents/agents_utils.py +89 -0
  16. flowcept/flowceptor/adapters/agents/flowcept_agent.py +292 -0
  17. flowcept/flowceptor/adapters/agents/flowcept_llm_prov_capture.py +186 -0
  18. flowcept/flowceptor/adapters/agents/prompts.py +51 -0
  19. flowcept/flowceptor/adapters/base_interceptor.py +17 -19
  20. flowcept/flowceptor/adapters/brokers/__init__.py +1 -0
  21. flowcept/flowceptor/adapters/brokers/mqtt_interceptor.py +132 -0
  22. flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +3 -3
  23. flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +3 -3
  24. flowcept/flowceptor/consumers/agent/__init__.py +1 -0
  25. flowcept/flowceptor/consumers/agent/base_agent_context_manager.py +101 -0
  26. flowcept/flowceptor/consumers/agent/client_agent.py +48 -0
  27. flowcept/flowceptor/consumers/agent/flowcept_agent_context_manager.py +145 -0
  28. flowcept/flowceptor/consumers/agent/flowcept_qa_manager.py +112 -0
  29. flowcept/flowceptor/consumers/base_consumer.py +90 -0
  30. flowcept/flowceptor/consumers/document_inserter.py +138 -53
  31. flowcept/flowceptor/telemetry_capture.py +1 -1
  32. flowcept/instrumentation/task_capture.py +19 -9
  33. flowcept/version.py +1 -1
  34. {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/METADATA +18 -6
  35. {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/RECORD +39 -25
  36. flowcept-0.8.11.dist-info/entry_points.txt +2 -0
  37. resources/sample_settings.yaml +44 -23
  38. flowcept/flowceptor/adapters/zambeze/__init__.py +0 -1
  39. flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +0 -41
  40. flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +0 -102
  41. {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/WHEEL +0 -0
  42. {flowcept-0.8.9.dist-info → flowcept-0.8.11.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- flowcept_version: 0.8.9 # Version of the Flowcept package. This setting file is compatible with this version.
1
+ flowcept_version: 0.8.11 # Version of the Flowcept package. This setting file is compatible with this version.
2
2
 
3
3
  project:
4
4
  debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.
@@ -25,7 +25,6 @@ telemetry_capture: # This toggles each individual type of telemetry capture. GPU
25
25
 
26
26
  instrumentation:
27
27
  enabled: true # This toggles data capture for instrumentation.
28
- singleton: true # Use a single instrumentation instance per process. Defaults to true
29
28
  torch:
30
29
  what: parent_and_children # Scope of instrumentation: "parent_only" -- will capture only at the main model level, "parent_and_children" -- will capture the inner layers, or ~ (disable).
31
30
  children_mode: telemetry_and_tensor_inspection # What to capture if parent_and_children is chosen in the scope. Possible values: "tensor_inspection" (i.e., tensor metadata), "telemetry", "telemetry_and_tensor_inspection"
@@ -40,18 +39,22 @@ experiment:
40
39
  mq:
41
40
  type: redis # or kafka or mofka; Please adjust the port (kafka's default is 9092; redis is 6379). If mofka, adjust the group_file.
42
41
  host: localhost
43
- # instances: ["localhost:6379"] # We can have multiple redis instances being accessed by the consumers but each interceptor will currently access one single redis.
42
+ # uri: ?
43
+ # instances: ["localhost:6379"] # We can have multiple MQ instances being accessed by the consumers but each interceptor will currently access one single MQ..
44
44
  port: 6379
45
45
  # group_file: mofka.json
46
46
  channel: interception
47
47
  buffer_size: 50
48
48
  insertion_buffer_time_secs: 5
49
49
  timing: false
50
+ # uri: use Redis connection uri here
50
51
  chunk_size: -1 # use 0 or -1 to disable this. Or simply omit this from the config file.
52
+ same_as_kvdb: false # Set this to true if you are using the same Redis instance both as an MQ and as the KV_DB. In that case, no need to repeat connection parameters in MQ. Use only what you define in KV_DB.
51
53
 
52
54
  kv_db:
53
55
  host: localhost
54
56
  port: 6379
57
+ enabled: true
55
58
  # uri: use Redis connection uri here
56
59
 
57
60
  web_server:
@@ -59,9 +62,9 @@ web_server:
59
62
  port: 5000
60
63
 
61
64
  sys_metadata:
62
- environment_id: "laptop"
65
+ environment_id: "laptop" # We use this to keep track of the environment used to run an experiment. Typical values include the cluster name, but it can be anything that you think will help identify your experimentation environment.
63
66
 
64
- extra_metadata:
67
+ extra_metadata: # We use this to store any extra metadata you want to keep track of during an experiment.
65
68
  place_holder: ""
66
69
 
67
70
  analytics:
@@ -70,13 +73,20 @@ analytics:
70
73
  generated.accuracy: maximum_first
71
74
 
72
75
  db_buffer:
73
- adaptive_buffer_size: true
74
- insertion_buffer_time_secs: 5
75
- max_buffer_size: 50
76
- min_buffer_size: 10
77
- remove_empty_fields: false
78
- stop_max_trials: 240
79
- stop_trials_sleep: 0.01
76
+ insertion_buffer_time_secs: 5 # Time interval (in seconds) to buffer incoming records before flushing to the database
77
+ buffer_size: 50 # Maximum number of records to hold in the buffer before forcing a flush
78
+ remove_empty_fields: false # If true, fields with null/empty values will be removed before insertion
79
+ stop_max_trials: 240 # Maximum number of trials before giving up when waiting for a fully safe stop (i.e., all records have been inserted as expected).
80
+ stop_trials_sleep: 0.01 # Sleep duration (in seconds) between trials when waiting for a fully safe stop.
81
+
82
+ agent:
83
+ enabled: false
84
+ mcp_host: localhost
85
+ mcp_port: 8000
86
+ llm_server_url: '?'
87
+ api_key: '?'
88
+ model: '?'
89
+ model_kwargs: {}
80
90
 
81
91
  databases:
82
92
 
@@ -89,20 +99,30 @@ databases:
89
99
  host: localhost
90
100
  port: 27017
91
101
  db: flowcept
92
- create_collection_index: true
102
+ create_collection_index: true # Whether flowcept should create collection indices if they haven't been created yet. This is done only at the Flowcept start up.
93
103
 
94
104
  adapters:
95
105
  # For each key below, you can have multiple instances. Like mlflow1, mlflow2; zambeze1, zambeze2. Use an empty dict, {}, if you won't use any adapter.
96
- zambeze:
97
- kind: zambeze
98
- host: localhost
99
- port: 5672
100
- queue_names:
101
- - hello
102
- - hello2
103
- # key_values_to_filter:
104
- # - key: activity_status
105
- # value: CREATED
106
+
107
+ broker_mqtt:
108
+ kind: broker
109
+ host: h
110
+ port: 30011
111
+ protocol: mqtt3.1.1
112
+ queues: ["#"]
113
+ username: postman
114
+ password: p
115
+ qos: 2
116
+ task_subtype: intersect_msg
117
+ tracked_keys:
118
+ used: payload
119
+ generated: ~
120
+ custom_metadata: [headers, msgId]
121
+ activity_id: operationId
122
+ submitted_at: ~
123
+ started_at: ~
124
+ ended_at: ~
125
+ registered_at: ~
106
126
 
107
127
  mlflow:
108
128
  kind: mlflow
@@ -125,3 +145,4 @@ adapters:
125
145
  worker_should_get_output: true
126
146
  scheduler_create_timestamps: true
127
147
  worker_create_timestamps: false
148
+
@@ -1 +0,0 @@
1
- """Zambeze subpackage."""
@@ -1,41 +0,0 @@
1
- """Zambeze dataclass module."""
2
-
3
- from dataclasses import dataclass
4
- from typing import List, Dict
5
-
6
- from flowcept.commons.flowcept_dataclasses.base_settings_dataclasses import (
7
- BaseSettings,
8
- KeyValue,
9
- )
10
-
11
-
12
- @dataclass
13
- class ZambezeMessage:
14
- """Zambeze message."""
15
-
16
- name: str
17
- activity_id: str
18
- campaign_id: str
19
- origin_agent_id: str
20
- files: List[str]
21
- command: str
22
- activity_status: str
23
- arguments: List[str]
24
- kwargs: Dict
25
- depends_on: List[str]
26
-
27
-
28
- @dataclass
29
- class ZambezeSettings(BaseSettings):
30
- """Zambeze settings."""
31
-
32
- host: str
33
- port: int
34
- queue_names: List[str]
35
- key_values_to_filter: List[KeyValue] = None
36
- kind = "zambeze"
37
-
38
- def __post_init__(self):
39
- """Set attributes after init."""
40
- self.observer_type = "message_broker"
41
- self.observer_subtype = "rabbit_mq"
@@ -1,102 +0,0 @@
1
- """Zambeze interceptor module."""
2
-
3
- from threading import Thread
4
- from time import sleep
5
- import pika
6
- import json
7
- from typing import Dict
8
-
9
- from flowcept.commons.utils import get_utc_now, get_status_from_str
10
- from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
11
- from flowcept.flowceptor.adapters.base_interceptor import (
12
- BaseInterceptor,
13
- )
14
-
15
-
16
- class ZambezeInterceptor(BaseInterceptor):
17
- """Zambeze interceptor."""
18
-
19
- def __init__(self, plugin_key="zambeze"):
20
- super().__init__(plugin_key)
21
- self._consumer_tag = None
22
- self._channel = None
23
- self._observer_thread: Thread = None
24
-
25
- def prepare_task_msg(self, zambeze_msg: Dict) -> TaskObject:
26
- """Prepare a task."""
27
- task_msg = TaskObject()
28
- task_msg.utc_timestamp = get_utc_now()
29
- task_msg.campaign_id = zambeze_msg.get("campaign_id", None)
30
- task_msg.task_id = zambeze_msg.get("activity_id", None)
31
- task_msg.activity_id = zambeze_msg.get("name", None)
32
- task_msg.dependencies = zambeze_msg.get("depends_on", None)
33
- task_msg.custom_metadata = {"command": zambeze_msg.get("command", None)}
34
- task_msg.status = get_status_from_str(zambeze_msg.get("activity_status", None))
35
- task_msg.used = {
36
- "args": zambeze_msg.get("arguments", None),
37
- "kwargs": zambeze_msg.get("kwargs", None),
38
- "files": zambeze_msg.get("files", None),
39
- }
40
- return task_msg
41
-
42
- def start(self, bundle_exec_id) -> "ZambezeInterceptor":
43
- """Start it."""
44
- super().start(bundle_exec_id)
45
- self._observer_thread = Thread(target=self.observe)
46
- self._observer_thread.start()
47
- return self
48
-
49
- def stop(self) -> bool:
50
- """Stop it."""
51
- self.logger.debug("Interceptor stopping...")
52
- super().stop()
53
- try:
54
- self._channel.stop_consuming()
55
- except Exception as e:
56
- self.logger.warning(f"This exception is expected to occur after channel.basic_cancel: {e}")
57
- sleep(2)
58
- self._observer_thread.join()
59
- self.logger.debug("Interceptor stopped.")
60
- return True
61
-
62
- def observe(self):
63
- """Observe it."""
64
- connection = pika.BlockingConnection(
65
- pika.ConnectionParameters(host=self.settings.host, port=self.settings.port)
66
- )
67
- self._channel = connection.channel()
68
- for queue in self.settings.queue_names:
69
- self._channel.queue_declare(queue=queue)
70
-
71
- # self._consumer_tag =\
72
- for queue in self.settings.queue_names:
73
- self._channel.basic_consume(
74
- queue=queue,
75
- on_message_callback=self.callback,
76
- auto_ack=True,
77
- )
78
- self.logger.debug(f"Waiting for Zambeze messages on queue {queue}")
79
-
80
- try:
81
- self._channel.start_consuming()
82
- except Exception as e:
83
- self.logger.warning(
84
- f"If this exception happens after channel.start_consuming finishes, it is expected:\n {e}"
85
- )
86
-
87
- def _intercept(self, body_obj):
88
- self.logger.debug(f"Zambeze interceptor needs to intercept this:\n\t{json.dumps(body_obj)}")
89
- task_msg = self.prepare_task_msg(body_obj)
90
- self.intercept(task_msg.to_dict())
91
-
92
- def callback(self, ch, method, properties, body):
93
- """Implement the callback."""
94
- body_obj = json.loads(body)
95
- if self.settings.key_values_to_filter is not None:
96
- for key_value in self.settings.key_values_to_filter:
97
- if key_value.key in body_obj:
98
- if body_obj[key_value.key] == key_value.value:
99
- self._intercept(body_obj)
100
- break
101
- else:
102
- self._intercept(body_obj)