flowcept 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowcept/__init__.py +5 -0
- flowcept/agents/flowcept_ctx_manager.py +10 -6
- flowcept/agents/gui/gui_utils.py +52 -1
- flowcept/agents/prompts/general_prompts.py +1 -1
- flowcept/agents/prompts/in_memory_query_prompts.py +36 -17
- flowcept/agents/tools/general_tools.py +1 -1
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py +19 -3
- flowcept/cli.py +41 -42
- flowcept/commons/autoflush_buffer.py +5 -0
- flowcept/commons/daos/docdb_dao/lmdb_dao.py +4 -1
- flowcept/commons/daos/mq_dao/mq_dao_base.py +1 -0
- flowcept/commons/flowcept_dataclasses/task_object.py +86 -6
- flowcept/commons/flowcept_dataclasses/workflow_object.py +41 -1
- flowcept/commons/task_data_preprocess.py +8 -2
- flowcept/configs.py +1 -1
- flowcept/flowcept_api/flowcept_controller.py +9 -1
- flowcept/flowceptor/consumers/base_consumer.py +5 -0
- flowcept/flowceptor/consumers/consumer_utils.py +11 -3
- flowcept/flowceptor/telemetry_capture.py +2 -1
- flowcept/instrumentation/task_capture.py +16 -16
- flowcept/version.py +1 -1
- flowcept-0.9.3.dist-info/METADATA +589 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/RECORD +27 -27
- resources/sample_settings.yaml +2 -2
- flowcept-0.9.1.dist-info/METADATA +0 -439
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/WHEEL +0 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/entry_points.txt +0 -0
- {flowcept-0.9.1.dist-info → flowcept-0.9.3.dist-info}/licenses/LICENSE +0 -0
flowcept/__init__.py
CHANGED
|
@@ -16,6 +16,11 @@ def __getattr__(name):
|
|
|
16
16
|
|
|
17
17
|
return WorkflowObject
|
|
18
18
|
|
|
19
|
+
elif name == "TaskObject":
|
|
20
|
+
from flowcept.commons.flowcept_dataclasses.task_object import TaskObject
|
|
21
|
+
|
|
22
|
+
return TaskObject
|
|
23
|
+
|
|
19
24
|
elif name == "flowcept_task":
|
|
20
25
|
from flowcept.instrumentation.flowcept_task import flowcept_task
|
|
21
26
|
|
|
@@ -53,7 +53,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
53
53
|
Current application context holding task state and QA components.
|
|
54
54
|
msgs_counter : int
|
|
55
55
|
Counter tracking how many task messages have been processed.
|
|
56
|
-
|
|
56
|
+
context_chunk_size : int
|
|
57
57
|
Number of task messages to collect before triggering QA index building and LLM analysis.
|
|
58
58
|
qa_manager : FlowceptQAManager
|
|
59
59
|
Utility for constructing QA chains from task summaries.
|
|
@@ -64,7 +64,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
64
64
|
self.tracker_config = dict(max_examples=3, max_str_len=50)
|
|
65
65
|
self.schema_tracker = DynamicSchemaTracker(**self.tracker_config)
|
|
66
66
|
self.msgs_counter = 0
|
|
67
|
-
self.
|
|
67
|
+
self.context_chunk_size = 1 # Should be in the settings
|
|
68
68
|
super().__init__()
|
|
69
69
|
|
|
70
70
|
def message_handler(self, msg_obj: Dict):
|
|
@@ -98,18 +98,22 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
|
|
|
98
98
|
if len(task_summary.get("tags", [])):
|
|
99
99
|
self.context.critical_tasks.append(task_summary)
|
|
100
100
|
|
|
101
|
-
if self.msgs_counter > 0 and self.msgs_counter % self.
|
|
101
|
+
if self.msgs_counter > 0 and self.msgs_counter % self.context_chunk_size == 0:
|
|
102
102
|
self.logger.debug(
|
|
103
|
-
f"Going to add to index! {(self.msgs_counter - self.
|
|
103
|
+
f"Going to add to index! {(self.msgs_counter - self.context_chunk_size, self.msgs_counter)}"
|
|
104
104
|
)
|
|
105
105
|
try:
|
|
106
106
|
self.update_schema_and_add_to_df(
|
|
107
|
-
tasks=self.context.task_summaries[
|
|
107
|
+
tasks=self.context.task_summaries[
|
|
108
|
+
self.msgs_counter - self.context_chunk_size : self.msgs_counter
|
|
109
|
+
]
|
|
108
110
|
)
|
|
109
111
|
except Exception as e:
|
|
110
112
|
self.logger.error(
|
|
111
113
|
f"Could not add these tasks to buffer!\n"
|
|
112
|
-
f"{
|
|
114
|
+
f"{
|
|
115
|
+
self.context.task_summaries[self.msgs_counter - self.context_chunk_size : self.msgs_counter]
|
|
116
|
+
}"
|
|
113
117
|
)
|
|
114
118
|
self.logger.exception(e)
|
|
115
119
|
|
flowcept/agents/gui/gui_utils.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import ast
|
|
1
3
|
import io
|
|
2
4
|
import json
|
|
3
5
|
|
|
@@ -122,6 +124,53 @@ def display_ai_msg_from_tool(tool_result: ToolResult):
|
|
|
122
124
|
return agent_reply
|
|
123
125
|
|
|
124
126
|
|
|
127
|
+
def _sniff_mime(b: bytes) -> str:
|
|
128
|
+
if b.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
129
|
+
return "image/png"
|
|
130
|
+
if b.startswith(b"\xff\xd8\xff"):
|
|
131
|
+
return "image/jpeg"
|
|
132
|
+
if b.startswith(b"GIF87a") or b.startswith(b"GIF89a"):
|
|
133
|
+
return "image/gif"
|
|
134
|
+
if b.startswith(b"BM"):
|
|
135
|
+
return "image/bmp"
|
|
136
|
+
if b.startswith(b"RIFF") and b[8:12] == b"WEBP":
|
|
137
|
+
return "image/webp"
|
|
138
|
+
return "application/octet-stream"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def ensure_data_uri(val):
|
|
142
|
+
r"""Accepts bytes/bytearray/memoryview or a repr like \"b'\\x89PNG...'\" and returns a data URL."""
|
|
143
|
+
if isinstance(val, str) and val.startswith("data:"):
|
|
144
|
+
return val
|
|
145
|
+
if isinstance(val, str) and val.startswith("b'"):
|
|
146
|
+
try:
|
|
147
|
+
val = ast.literal_eval(val) # turn repr into bytes
|
|
148
|
+
except Exception:
|
|
149
|
+
return None
|
|
150
|
+
if isinstance(val, memoryview):
|
|
151
|
+
val = val.tobytes()
|
|
152
|
+
if isinstance(val, bytearray):
|
|
153
|
+
val = bytes(val)
|
|
154
|
+
if isinstance(val, bytes):
|
|
155
|
+
mime = _sniff_mime(val)
|
|
156
|
+
return f"data:{mime};base64,{base64.b64encode(val).decode('ascii')}"
|
|
157
|
+
return val # path/URL, etc.
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _render_df(df: pd.DataFrame, image_width: int = 90, row_height: int = 90):
|
|
161
|
+
if "image" in df.columns:
|
|
162
|
+
df = df.copy()
|
|
163
|
+
df["image"] = df["image"].apply(ensure_data_uri)
|
|
164
|
+
st.dataframe(
|
|
165
|
+
df,
|
|
166
|
+
column_config={"image": st.column_config.ImageColumn("Preview", width=image_width)},
|
|
167
|
+
hide_index=True,
|
|
168
|
+
row_height=row_height, # make thumbnails visible
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
st.dataframe(df, hide_index=True)
|
|
172
|
+
|
|
173
|
+
|
|
125
174
|
def display_df_tool_response(tool_result: ToolResult):
|
|
126
175
|
r"""
|
|
127
176
|
Display the DataFrame contained in a ToolResult.
|
|
@@ -170,7 +219,8 @@ def display_df_tool_response(tool_result: ToolResult):
|
|
|
170
219
|
df = pd.read_csv(io.StringIO(result_df_str))
|
|
171
220
|
print("The result is a df")
|
|
172
221
|
if not df.empty:
|
|
173
|
-
|
|
222
|
+
_render_df(df)
|
|
223
|
+
|
|
174
224
|
print("Columns", str(df.columns))
|
|
175
225
|
print("Number of columns", len(df.columns))
|
|
176
226
|
else:
|
|
@@ -190,6 +240,7 @@ def display_df_tool_response(tool_result: ToolResult):
|
|
|
190
240
|
|
|
191
241
|
if summary:
|
|
192
242
|
st.markdown("📝 Summary:")
|
|
243
|
+
print(f"THIS IS THE SUMMARY\n{summary}")
|
|
193
244
|
st.markdown(summary)
|
|
194
245
|
elif summary_error:
|
|
195
246
|
st.markdown(f"⚠️ Encountered this error when summarizing the result dataframe:\n```text\n{summary_error}")
|
|
@@ -24,8 +24,8 @@ ROUTING_PROMPT = (
|
|
|
24
24
|
"Given the following user message, classify it into one of the following routes:\n"
|
|
25
25
|
"- small_talk: if it's casual conversation or some random word (e.g., 'hausdn', 'a', hello, how are you, what can you do, what's your name)\n"
|
|
26
26
|
"- plot: if user is requesting plots (e.g., plot, chart, visualize)\n"
|
|
27
|
+
"- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
|
|
27
28
|
"- historical_prov_query: if the user wants to query historical provenance data\n"
|
|
28
|
-
"- in_context_query: if the user appears to ask questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
|
|
29
29
|
"- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
|
|
30
30
|
"- unknown: if you don't know.\n"
|
|
31
31
|
"Respond with only the route label."
|
|
@@ -15,6 +15,7 @@ COMMON_TASK_FIELDS = """
|
|
|
15
15
|
| `ended_at` | datetime64[ns, UTC] | End time of a task. |
|
|
16
16
|
| `subtype` | string | Subtype of a task. |
|
|
17
17
|
| `tags` | List[str] | List of descriptive tags. |
|
|
18
|
+
| `image` | blob | Raw binary data related to an image. |
|
|
18
19
|
| `telemetry_summary.duration_sec` | float | Task duration (seconds). |
|
|
19
20
|
| `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start.|
|
|
20
21
|
| `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time ( seconds ) between task start and end times.|
|
|
@@ -27,6 +28,17 @@ COMMON_TASK_FIELDS = """
|
|
|
27
28
|
DF_FORM = "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
def get_example_values_prompt(example_values):
|
|
32
|
+
values_prompt = f"""
|
|
33
|
+
Now, this other dictionary below provides type (t), up to 3 example values (v), and, for lists, shape (s) and element type (et) for each field.
|
|
34
|
+
Field names do not include `used.` or `generated.` They represent the unprefixed form shared across roles. String values may be truncated if they exceed the length limit.
|
|
35
|
+
```python
|
|
36
|
+
{example_values}
|
|
37
|
+
```
|
|
38
|
+
"""
|
|
39
|
+
return values_prompt
|
|
40
|
+
|
|
41
|
+
|
|
30
42
|
def get_df_schema_prompt(dynamic_schema, example_values):
|
|
31
43
|
schema_prompt = f"""
|
|
32
44
|
## DATAFRAME STRUCTURE
|
|
@@ -52,14 +64,7 @@ def get_df_schema_prompt(dynamic_schema, example_values):
|
|
|
52
64
|
---
|
|
53
65
|
"""
|
|
54
66
|
|
|
55
|
-
values_prompt =
|
|
56
|
-
Now, this other dictionary below provides type (t), up to 3 example values (v), and, for lists, shape (s) and element type (et) for each field.
|
|
57
|
-
Field names do not include `used.` or `generated.` They represent the unprefixed form shared across roles. String values may be truncated if they exceed the length limit.
|
|
58
|
-
```python
|
|
59
|
-
{example_values}
|
|
60
|
-
```
|
|
61
|
-
"""
|
|
62
|
-
|
|
67
|
+
values_prompt = get_example_values_prompt(example_values)
|
|
63
68
|
# values_prompt = ""
|
|
64
69
|
prompt = schema_prompt + values_prompt
|
|
65
70
|
return prompt
|
|
@@ -221,7 +226,7 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values):
|
|
|
221
226
|
f"{JOB}"
|
|
222
227
|
f"{DF_FORM}"
|
|
223
228
|
f"{get_df_schema_prompt(dynamic_schema, example_values)}" # main tester
|
|
224
|
-
|
|
229
|
+
f"{QUERY_GUIDELINES}" # main tester
|
|
225
230
|
f"{FEW_SHOTS}" # main tester
|
|
226
231
|
f"{OUTPUT_FORMATTING}"
|
|
227
232
|
"User Query:"
|
|
@@ -230,9 +235,16 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values):
|
|
|
230
235
|
return prompt
|
|
231
236
|
|
|
232
237
|
|
|
233
|
-
def dataframe_summarizer_context(code, reduced_df, query) -> str:
|
|
238
|
+
def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query) -> str:
|
|
239
|
+
job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
|
|
240
|
+
|
|
241
|
+
if "image" in reduced_df.columns:
|
|
242
|
+
reduced_df = reduced_df.drop(columns=["image"])
|
|
243
|
+
|
|
234
244
|
prompt = f"""
|
|
235
|
-
|
|
245
|
+
{job}
|
|
246
|
+
|
|
247
|
+
Given:
|
|
236
248
|
|
|
237
249
|
**User Query**:
|
|
238
250
|
{query}
|
|
@@ -240,19 +252,26 @@ def dataframe_summarizer_context(code, reduced_df, query) -> str:
|
|
|
240
252
|
**Query_Code**:
|
|
241
253
|
{code}
|
|
242
254
|
|
|
243
|
-
**Reduced DataFrame** (rows sampled from full result):
|
|
255
|
+
**Reduced DataFrame `df` contents** (rows sampled from full result):
|
|
244
256
|
{reduced_df}
|
|
245
257
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
258
|
+
**Original df (before reduction) had this schema:
|
|
259
|
+
{get_df_schema_prompt(dynamic_schema, example_values)}
|
|
260
|
+
|
|
261
|
+
Your task is to find a concise and direct answer as an English sentence to the user query.
|
|
262
|
+
|
|
263
|
+
Only if the answer to the query is complex, provide more explanation by:
|
|
264
|
+
1. Analyzing the DataFrame values and columns for any meaningful or notable information.
|
|
265
|
+
2. Comparing the query_code with the data content to understand what the result represents. THIS IS A REDUCED DATAFRAME, the original dataframe, used to answer the query, may be much bigger. IT IS ALREADY KNOWN! Do not need to restate this.
|
|
266
|
+
3. If it makes sense, provide information beyond the recorded provenance, but state it clearly that you are inferring it.
|
|
267
|
+
|
|
268
|
+
In the end, conclude by giving your concise answer as follows: **Response**: <YOUR ANSWER>
|
|
250
269
|
|
|
251
270
|
Note that the user should not know that this is a reduced dataframe.
|
|
252
|
-
|
|
253
271
|
Keep your response short and focused.
|
|
254
272
|
|
|
255
273
|
"""
|
|
274
|
+
|
|
256
275
|
return prompt
|
|
257
276
|
|
|
258
277
|
|
|
@@ -74,7 +74,7 @@ def prompt_handler(message: str) -> ToolResult:
|
|
|
74
74
|
TextContent
|
|
75
75
|
The AI response or routing feedback.
|
|
76
76
|
"""
|
|
77
|
-
df_key_words =
|
|
77
|
+
df_key_words = ["df", "save", "result = df", "reset context"]
|
|
78
78
|
for key in df_key_words:
|
|
79
79
|
if key in message:
|
|
80
80
|
return run_df_query(llm=None, query=message, plot=False)
|
|
@@ -294,6 +294,8 @@ def generate_result_df(llm, query: str, dynamic_schema, example_values, df, atte
|
|
|
294
294
|
>>> generate_result_df(llm, "bad query", schema, examples, df, attempt_fix=False)
|
|
295
295
|
ToolResult(code=405, result="Failed to parse this as Python code: ...")
|
|
296
296
|
"""
|
|
297
|
+
if llm is None:
|
|
298
|
+
llm = build_llm_model()
|
|
297
299
|
try:
|
|
298
300
|
prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values)
|
|
299
301
|
response = llm(prompt)
|
|
@@ -351,7 +353,14 @@ def generate_result_df(llm, query: str, dynamic_schema, example_values, df, atte
|
|
|
351
353
|
summary, summary_error = None, None
|
|
352
354
|
if summarize:
|
|
353
355
|
try:
|
|
354
|
-
tool_result = summarize_result(
|
|
356
|
+
tool_result = summarize_result(
|
|
357
|
+
llm,
|
|
358
|
+
result_code,
|
|
359
|
+
result_df,
|
|
360
|
+
query,
|
|
361
|
+
dynamic_schema,
|
|
362
|
+
example_values,
|
|
363
|
+
)
|
|
355
364
|
if tool_result.is_success():
|
|
356
365
|
return_code = 301
|
|
357
366
|
summary = tool_result.result
|
|
@@ -570,7 +579,14 @@ def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
|
|
|
570
579
|
|
|
571
580
|
|
|
572
581
|
@mcp_flowcept.tool()
|
|
573
|
-
def summarize_result(
|
|
582
|
+
def summarize_result(
|
|
583
|
+
llm,
|
|
584
|
+
code,
|
|
585
|
+
result,
|
|
586
|
+
query: str,
|
|
587
|
+
dynamic_schema,
|
|
588
|
+
example_values,
|
|
589
|
+
) -> ToolResult:
|
|
574
590
|
"""
|
|
575
591
|
Summarize the pandas result with local reduction for large DataFrames.
|
|
576
592
|
- For wide DataFrames, selects top columns based on variance and uniqueness.
|
|
@@ -578,7 +594,7 @@ def summarize_result(llm, code, result, query: str) -> ToolResult:
|
|
|
578
594
|
- Constructs a detailed prompt for the LLM with original column context.
|
|
579
595
|
"""
|
|
580
596
|
summarized_df = summarize_df(result, code)
|
|
581
|
-
prompt = dataframe_summarizer_context(code, summarized_df, query)
|
|
597
|
+
prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query)
|
|
582
598
|
try:
|
|
583
599
|
response = llm(prompt)
|
|
584
600
|
return ToolResult(code=201, result=response)
|
flowcept/cli.py
CHANGED
|
@@ -101,17 +101,17 @@ def version():
|
|
|
101
101
|
print(f"Flowcept {__version__}")
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
def stream_messages(
|
|
104
|
+
def stream_messages(messages_file_path: Optional[str] = None, keys_to_show: List[str] = None):
|
|
105
105
|
"""
|
|
106
106
|
Listen to Flowcept's message stream and optionally echo/save messages.
|
|
107
107
|
|
|
108
108
|
Parameters.
|
|
109
109
|
----------
|
|
110
|
-
print_messages : bool, optional
|
|
111
|
-
If True, print each decoded message to stdout.
|
|
112
110
|
messages_file_path : str, optional
|
|
113
111
|
If provided, append each message as JSON (one per line) to this file.
|
|
114
112
|
If the file already exists, a new timestamped file is created instead.
|
|
113
|
+
keys_to_show : List[str], optional
|
|
114
|
+
List of object keys to show in the prints. Use comma-separated list: --keys-to-show 'activity_id','workflow_id'
|
|
115
115
|
"""
|
|
116
116
|
# Local imports to avoid changing module-level deps
|
|
117
117
|
from flowcept.configs import MQ_TYPE
|
|
@@ -123,10 +123,7 @@ def stream_messages(print_messages: bool = False, messages_file_path: Optional[s
|
|
|
123
123
|
import os
|
|
124
124
|
import json
|
|
125
125
|
from datetime import datetime
|
|
126
|
-
import
|
|
127
|
-
import msgpack
|
|
128
|
-
from flowcept.configs import MQ_HOST, MQ_PORT, MQ_CHANNEL, KVDB_URI
|
|
129
|
-
from flowcept.commons.daos.mq_dao.mq_dao_redis import MQDaoRedis
|
|
126
|
+
from flowcept.flowceptor.consumers.base_consumer import BaseConsumer
|
|
130
127
|
|
|
131
128
|
def _timestamped_path_if_exists(path: Optional[str]) -> Optional[str]:
|
|
132
129
|
if not path:
|
|
@@ -150,53 +147,53 @@ def stream_messages(print_messages: bool = False, messages_file_path: Optional[s
|
|
|
150
147
|
|
|
151
148
|
return json.dumps(obj, ensure_ascii=False, separators=(",", ":"), default=_default)
|
|
152
149
|
|
|
153
|
-
# Prepare output file (JSONL)
|
|
154
150
|
out_fh = None
|
|
155
151
|
if messages_file_path:
|
|
156
152
|
out_path = _timestamped_path_if_exists(messages_file_path)
|
|
157
153
|
out_fh = open(out_path, "w", encoding="utf-8", buffering=1) # line-buffered
|
|
158
154
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
pubsub.subscribe(MQ_CHANNEL)
|
|
163
|
-
|
|
164
|
-
print(f"Listening for messages on channel '{MQ_CHANNEL}'... (Ctrl+C to exit)")
|
|
165
|
-
|
|
166
|
-
try:
|
|
167
|
-
for message in pubsub.listen():
|
|
168
|
-
if not message or message.get("type") in MQDaoRedis.MESSAGE_TYPES_IGNORE:
|
|
169
|
-
continue
|
|
170
|
-
|
|
171
|
-
data = message.get("data")
|
|
172
|
-
if not isinstance(data, (bytes, bytearray)):
|
|
173
|
-
print(f"Skipping message with unexpected data type: {type(data)} - {data}")
|
|
174
|
-
continue
|
|
155
|
+
class MyConsumer(BaseConsumer):
|
|
156
|
+
def __init__(self):
|
|
157
|
+
super().__init__()
|
|
175
158
|
|
|
159
|
+
def message_handler(self, msg_obj: Dict) -> bool:
|
|
176
160
|
try:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
161
|
+
if keys_to_show is not None:
|
|
162
|
+
obj_to_print = {}
|
|
163
|
+
for k in keys_to_show:
|
|
164
|
+
v = msg_obj.get(k, None)
|
|
165
|
+
if v is not None:
|
|
166
|
+
obj_to_print[k] = v
|
|
167
|
+
if not obj_to_print:
|
|
168
|
+
obj_to_print = msg_obj
|
|
169
|
+
else:
|
|
170
|
+
obj_to_print = msg_obj
|
|
171
|
+
|
|
172
|
+
print(_json_dumps(obj_to_print))
|
|
183
173
|
|
|
184
174
|
if out_fh is not None:
|
|
185
|
-
out_fh.write(_json_dumps(
|
|
175
|
+
out_fh.write(_json_dumps(obj_to_print))
|
|
186
176
|
out_fh.write("\n")
|
|
187
|
-
|
|
177
|
+
except KeyboardInterrupt:
|
|
178
|
+
print("\nGracefully interrupted, shutting down...")
|
|
179
|
+
return False
|
|
188
180
|
except Exception as e:
|
|
189
|
-
print(
|
|
181
|
+
print(e)
|
|
182
|
+
return False
|
|
183
|
+
finally:
|
|
184
|
+
try:
|
|
185
|
+
if out_fh:
|
|
186
|
+
out_fh.close()
|
|
187
|
+
except Exception as e:
|
|
188
|
+
print(e)
|
|
189
|
+
return False
|
|
190
190
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
pubsub.close()
|
|
198
|
-
except Exception:
|
|
199
|
-
pass
|
|
191
|
+
return True
|
|
192
|
+
|
|
193
|
+
m = f"Printing only the keys {keys_to_show}" if keys_to_show is not None else ""
|
|
194
|
+
print(f"Listening for messages.{m} Ctrl+C to exit")
|
|
195
|
+
consumer = MyConsumer()
|
|
196
|
+
consumer.start(daemon=False)
|
|
200
197
|
|
|
201
198
|
|
|
202
199
|
def start_consumption_services(bundle_exec_id: str = None, check_safe_stops: bool = False, consumers: List[str] = None):
|
|
@@ -700,3 +697,5 @@ def main(): # noqa: D103
|
|
|
700
697
|
if __name__ == "__main__":
|
|
701
698
|
main()
|
|
702
699
|
# check_services()
|
|
700
|
+
|
|
701
|
+
__doc__ = None
|
|
@@ -47,6 +47,11 @@ class AutoflushBuffer:
|
|
|
47
47
|
if len(buffer) >= self._max_size:
|
|
48
48
|
self._swap_event.set()
|
|
49
49
|
|
|
50
|
+
@property
|
|
51
|
+
def current_buffer(self):
|
|
52
|
+
"""Return the currently active buffer (read-only)."""
|
|
53
|
+
return self._buffers[self._current_buffer_index]
|
|
54
|
+
|
|
50
55
|
def time_based_flush(self):
|
|
51
56
|
"""Time flush."""
|
|
52
57
|
while not self._stop_event.is_set():
|
|
@@ -56,7 +56,10 @@ class LMDBDAO(DocumentDBDAO):
|
|
|
56
56
|
t0 = 0
|
|
57
57
|
if PERF_LOG:
|
|
58
58
|
t0 = time()
|
|
59
|
-
indexed_buffer = curate_dict_task_messages(
|
|
59
|
+
indexed_buffer = curate_dict_task_messages(
|
|
60
|
+
docs, indexing_key, t0, convert_times=False, keys_to_drop=["data"]
|
|
61
|
+
)
|
|
62
|
+
|
|
60
63
|
with self._env.begin(write=True, db=self._tasks_db) as txn:
|
|
61
64
|
for key, value in indexed_buffer.items():
|
|
62
65
|
k, v = key.encode(), json.dumps(value).encode()
|
|
@@ -102,6 +102,7 @@ class MQDao(object):
|
|
|
102
102
|
|
|
103
103
|
with open(DUMP_BUFFER_PATH, "wb", buffering=1_048_576) as f:
|
|
104
104
|
for obj in buffer:
|
|
105
|
+
obj.pop("data", None) # We are not going to store data in the buffer file.
|
|
105
106
|
f.write(orjson.dumps(obj))
|
|
106
107
|
f.write(b"\n")
|
|
107
108
|
self.logger.info(f"Saved Flowcept messages into {DUMP_BUFFER_PATH}.")
|
|
@@ -16,45 +16,125 @@ from flowcept.configs import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class TaskObject:
|
|
19
|
-
"""Task class.
|
|
19
|
+
"""Task object class.
|
|
20
|
+
|
|
21
|
+
Represents a single provenance task in Flowcept, including inputs, outputs,
|
|
22
|
+
execution metadata, telemetry, and environment details.
|
|
23
|
+
"""
|
|
20
24
|
|
|
21
25
|
type = "task"
|
|
26
|
+
"""Constant type label for this object ("task")."""
|
|
27
|
+
|
|
22
28
|
subtype: AnyStr = None
|
|
23
|
-
|
|
29
|
+
"""Optional subtype of the task (e.g., iteration, ML step, custom)."""
|
|
30
|
+
|
|
31
|
+
task_id: AnyStr = None
|
|
32
|
+
"""Unique identifier of the task."""
|
|
33
|
+
|
|
24
34
|
utc_timestamp: float = None
|
|
35
|
+
"""UTC timestamp when the task object was created."""
|
|
36
|
+
|
|
25
37
|
adapter_id: AnyStr = None
|
|
38
|
+
"""Identifier of the adapter that produced this task (if any)."""
|
|
39
|
+
|
|
26
40
|
user: AnyStr = None
|
|
41
|
+
"""User who executed or triggered the task."""
|
|
42
|
+
|
|
27
43
|
data: Any = None
|
|
28
|
-
|
|
44
|
+
"""Arbitrary raw data payload associated with the task. It is good practice to add custom_metadata associated with
|
|
45
|
+
`data`, especially if it contains file contents.
|
|
46
|
+
In that case, `custom_metadata` should contain the keys "file_type", "file_content", "file_name", "extension".
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
used: Dict[AnyStr, Any] = None
|
|
50
|
+
"""Inputs consumed by the task (parameters, files, resources)."""
|
|
51
|
+
|
|
29
52
|
campaign_id: AnyStr = None
|
|
30
|
-
|
|
53
|
+
"""Campaign identifier grouping related tasks together."""
|
|
54
|
+
|
|
55
|
+
generated: Dict[AnyStr, Any] = None
|
|
56
|
+
"""Outputs produced by the task (results, artifacts, files)."""
|
|
57
|
+
|
|
31
58
|
submitted_at: float = None
|
|
59
|
+
"""Timestamp when the task was submitted."""
|
|
60
|
+
|
|
32
61
|
started_at: float = None
|
|
62
|
+
"""Timestamp when the task execution started."""
|
|
63
|
+
|
|
33
64
|
ended_at: float = None
|
|
34
|
-
|
|
65
|
+
"""Timestamp when the task execution ended."""
|
|
66
|
+
|
|
67
|
+
registered_at: float = None
|
|
68
|
+
"""Timestamp when the task was registered by the DocInserter."""
|
|
69
|
+
|
|
35
70
|
telemetry_at_start: Telemetry = None
|
|
71
|
+
"""Telemetry snapshot captured at the start of the task."""
|
|
72
|
+
|
|
36
73
|
telemetry_at_end: Telemetry = None
|
|
74
|
+
"""Telemetry snapshot captured at the end of the task."""
|
|
75
|
+
|
|
37
76
|
workflow_name: AnyStr = None
|
|
77
|
+
"""Name of the workflow this task belongs to."""
|
|
78
|
+
|
|
38
79
|
workflow_id: AnyStr = None
|
|
80
|
+
"""Identifier of the workflow this task belongs to."""
|
|
81
|
+
|
|
39
82
|
parent_task_id: AnyStr = None
|
|
83
|
+
"""Identifier of the parent task, if this task is nested or dependent."""
|
|
84
|
+
|
|
40
85
|
activity_id: AnyStr = None
|
|
41
|
-
|
|
86
|
+
"""Activity name (usually the function name) associated with the task."""
|
|
87
|
+
|
|
88
|
+
group_id: AnyStr = None
|
|
89
|
+
"""Grouping identifier, often used to link loop iterations together."""
|
|
90
|
+
|
|
42
91
|
status: Status = None
|
|
92
|
+
"""Execution status of the task (e.g., FINISHED, ERROR)."""
|
|
93
|
+
|
|
43
94
|
stdout: Union[AnyStr, Dict] = None
|
|
95
|
+
"""Captured standard output from the task, if available."""
|
|
96
|
+
|
|
44
97
|
stderr: Union[AnyStr, Dict] = None
|
|
98
|
+
"""Captured standard error from the task, if available."""
|
|
99
|
+
|
|
45
100
|
custom_metadata: Dict[AnyStr, Any] = None
|
|
101
|
+
"""Custom metadata dictionary provided by the developer/user."""
|
|
102
|
+
|
|
46
103
|
mq_host: str = None
|
|
104
|
+
"""Message queue host associated with the task."""
|
|
105
|
+
|
|
47
106
|
environment_id: AnyStr = None
|
|
107
|
+
"""Identifier of the environment where the task executed."""
|
|
108
|
+
|
|
48
109
|
node_name: AnyStr = None
|
|
110
|
+
"""Node name in a distributed system or HPC cluster."""
|
|
111
|
+
|
|
49
112
|
login_name: AnyStr = None
|
|
113
|
+
"""Login name of the user in the execution environment."""
|
|
114
|
+
|
|
50
115
|
public_ip: AnyStr = None
|
|
116
|
+
"""Public IP address of the machine executing the task."""
|
|
117
|
+
|
|
51
118
|
private_ip: AnyStr = None
|
|
119
|
+
"""Private IP address of the machine executing the task."""
|
|
120
|
+
|
|
52
121
|
hostname: AnyStr = None
|
|
122
|
+
"""Hostname of the machine executing the task."""
|
|
123
|
+
|
|
53
124
|
address: AnyStr = None
|
|
125
|
+
"""Optional network address associated with the task."""
|
|
126
|
+
|
|
54
127
|
dependencies: List = None
|
|
128
|
+
"""List of task IDs this task depends on."""
|
|
129
|
+
|
|
55
130
|
dependents: List = None
|
|
131
|
+
"""List of task IDs that depend on this task."""
|
|
132
|
+
|
|
56
133
|
tags: List = None
|
|
134
|
+
"""User-defined tags attached to the task."""
|
|
135
|
+
|
|
57
136
|
agent_id: str = None
|
|
137
|
+
"""Identifier of the agent responsible for executing this task (if any)."""
|
|
58
138
|
|
|
59
139
|
_DEFAULT_ENRICH_VALUES = {
|
|
60
140
|
"node_name": NODE_NAME,
|