flowcept 0.9.2__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,7 @@ class FlowceptAppContext(BaseAppContext):
37
37
  tasks_schema: Dict | None # TODO: we dont need to keep the tasks_schema in context, just in the manager's memory.
38
38
  value_examples: Dict | None
39
39
  tracker_config: Dict | None
40
+ custom_guidance: List[str] | None
40
41
 
41
42
 
42
43
  class FlowceptAgentContextManager(BaseAgentContextManager):
@@ -53,7 +54,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
53
54
  Current application context holding task state and QA components.
54
55
  msgs_counter : int
55
56
  Counter tracking how many task messages have been processed.
56
- context_size : int
57
+ context_chunk_size : int
57
58
  Number of task messages to collect before triggering QA index building and LLM analysis.
58
59
  qa_manager : FlowceptQAManager
59
60
  Utility for constructing QA chains from task summaries.
@@ -64,7 +65,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
64
65
  self.tracker_config = dict(max_examples=3, max_str_len=50)
65
66
  self.schema_tracker = DynamicSchemaTracker(**self.tracker_config)
66
67
  self.msgs_counter = 0
67
- self.context_size = 1
68
+ self.context_chunk_size = 1 # Should be in the settings
68
69
  super().__init__()
69
70
 
70
71
  def message_handler(self, msg_obj: Dict):
@@ -98,18 +99,22 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
98
99
  if len(task_summary.get("tags", [])):
99
100
  self.context.critical_tasks.append(task_summary)
100
101
 
101
- if self.msgs_counter > 0 and self.msgs_counter % self.context_size == 0:
102
+ if self.msgs_counter > 0 and self.msgs_counter % self.context_chunk_size == 0:
102
103
  self.logger.debug(
103
- f"Going to add to index! {(self.msgs_counter - self.context_size, self.msgs_counter)}"
104
+ f"Going to add to index! {(self.msgs_counter - self.context_chunk_size, self.msgs_counter)}"
104
105
  )
105
106
  try:
106
107
  self.update_schema_and_add_to_df(
107
- tasks=self.context.task_summaries[self.msgs_counter - self.context_size : self.msgs_counter]
108
+ tasks=self.context.task_summaries[
109
+ self.msgs_counter - self.context_chunk_size : self.msgs_counter
110
+ ]
108
111
  )
109
112
  except Exception as e:
110
113
  self.logger.error(
111
114
  f"Could not add these tasks to buffer!\n"
112
- f"{self.context.task_summaries[self.msgs_counter - self.context_size : self.msgs_counter]}"
115
+ f"{
116
+ self.context.task_summaries[self.msgs_counter - self.context_chunk_size : self.msgs_counter]
117
+ }"
113
118
  )
114
119
  self.logger.exception(e)
115
120
 
@@ -152,6 +157,7 @@ class FlowceptAgentContextManager(BaseAgentContextManager):
152
157
  df=pd.DataFrame(),
153
158
  tasks_schema={},
154
159
  value_examples={},
160
+ custom_guidance=[],
155
161
  tracker_config=self.tracker_config,
156
162
  )
157
163
  DEBUG = True # TODO debugging!
@@ -1,3 +1,5 @@
1
+ import base64
2
+ import ast
1
3
  import io
2
4
  import json
3
5
 
@@ -122,6 +124,53 @@ def display_ai_msg_from_tool(tool_result: ToolResult):
122
124
  return agent_reply
123
125
 
124
126
 
127
+ def _sniff_mime(b: bytes) -> str:
128
+ if b.startswith(b"\x89PNG\r\n\x1a\n"):
129
+ return "image/png"
130
+ if b.startswith(b"\xff\xd8\xff"):
131
+ return "image/jpeg"
132
+ if b.startswith(b"GIF87a") or b.startswith(b"GIF89a"):
133
+ return "image/gif"
134
+ if b.startswith(b"BM"):
135
+ return "image/bmp"
136
+ if b.startswith(b"RIFF") and b[8:12] == b"WEBP":
137
+ return "image/webp"
138
+ return "application/octet-stream"
139
+
140
+
141
+ def ensure_data_uri(val):
142
+ r"""Accepts bytes/bytearray/memoryview or a repr like \"b'\\x89PNG...'\" and returns a data URL."""
143
+ if isinstance(val, str) and val.startswith("data:"):
144
+ return val
145
+ if isinstance(val, str) and val.startswith("b'"):
146
+ try:
147
+ val = ast.literal_eval(val) # turn repr into bytes
148
+ except Exception:
149
+ return None
150
+ if isinstance(val, memoryview):
151
+ val = val.tobytes()
152
+ if isinstance(val, bytearray):
153
+ val = bytes(val)
154
+ if isinstance(val, bytes):
155
+ mime = _sniff_mime(val)
156
+ return f"data:{mime};base64,{base64.b64encode(val).decode('ascii')}"
157
+ return val # path/URL, etc.
158
+
159
+
160
+ def _render_df(df: pd.DataFrame, image_width: int = 90, row_height: int = 90):
161
+ if "image" in df.columns:
162
+ df = df.copy()
163
+ df["image"] = df["image"].apply(ensure_data_uri)
164
+ st.dataframe(
165
+ df,
166
+ column_config={"image": st.column_config.ImageColumn("Preview", width=image_width)},
167
+ hide_index=True,
168
+ row_height=row_height, # make thumbnails visible
169
+ )
170
+ else:
171
+ st.dataframe(df, hide_index=True)
172
+
173
+
125
174
  def display_df_tool_response(tool_result: ToolResult):
126
175
  r"""
127
176
  Display the DataFrame contained in a ToolResult.
@@ -170,7 +219,8 @@ def display_df_tool_response(tool_result: ToolResult):
170
219
  df = pd.read_csv(io.StringIO(result_df_str))
171
220
  print("The result is a df")
172
221
  if not df.empty:
173
- st.dataframe(df, hide_index=False)
222
+ _render_df(df)
223
+
174
224
  print("Columns", str(df.columns))
175
225
  print("Number of columns", len(df.columns))
176
226
  else:
@@ -190,6 +240,7 @@ def display_df_tool_response(tool_result: ToolResult):
190
240
 
191
241
  if summary:
192
242
  st.markdown("📝 Summary:")
243
+ print(f"THIS IS THE SUMMARY\n{summary}")
193
244
  st.markdown(summary)
194
245
  elif summary_error:
195
246
  st.markdown(f"⚠️ Encountered this error when summarizing the result dataframe:\n```text\n{summary_error}")
@@ -24,8 +24,9 @@ ROUTING_PROMPT = (
24
24
  "Given the following user message, classify it into one of the following routes:\n"
25
25
  "- small_talk: if it's casual conversation or some random word (e.g., 'hausdn', 'a', hello, how are you, what can you do, what's your name)\n"
26
26
  "- plot: if user is requesting plots (e.g., plot, chart, visualize)\n"
27
- "- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
28
- "- historical_prov_query: if the user wants to query historical provenance data\n"
27
+ #"- in_context_query: if the user asks questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
28
+ "- in_context_query: if the user is querying the provenance data questions about tasks or data in running workflow (or a workflow that ran recently) or if the user mentions the in-memory 'df' or a dataframe.\n"
29
+ #"- historical_prov_query: if the user wants to query historical provenance data\n"
29
30
  "- in_chat_query: if the user appears to be asking about something that has said recently in this chat.\n"
30
31
  "- unknown: if you don't know.\n"
31
32
  "Respond with only the route label."
@@ -15,6 +15,7 @@ COMMON_TASK_FIELDS = """
15
15
  | `ended_at` | datetime64[ns, UTC] | End time of a task. |
16
16
  | `subtype` | string | Subtype of a task. |
17
17
  | `tags` | List[str] | List of descriptive tags. |
18
+ | `image` | blob | Raw binary data related to an image. |
18
19
  | `telemetry_summary.duration_sec` | float | Task duration (seconds). |
19
20
  | `telemetry_summary.cpu.percent_all_diff` | float | Difference in overall CPU utilization percentage across all cores between task end and start.|
20
21
  | `telemetry_summary.cpu.user_time_diff` | float | Difference average per core CPU user time ( seconds ) between task start and end times.|
@@ -27,6 +28,17 @@ COMMON_TASK_FIELDS = """
27
28
  DF_FORM = "The user has a pandas DataFrame called `df`, created from flattened task objects using `pd.json_normalize`."
28
29
 
29
30
 
31
+ def get_example_values_prompt(example_values):
32
+ values_prompt = f"""
33
+ Now, this other dictionary below provides type (t), up to 3 example values (v), and, for lists, shape (s) and element type (et) for each field.
34
+ Field names do not include `used.` or `generated.` They represent the unprefixed form shared across roles. String values may be truncated if they exceed the length limit.
35
+ ```python
36
+ {example_values}
37
+ ```
38
+ """
39
+ return values_prompt
40
+
41
+
30
42
  def get_df_schema_prompt(dynamic_schema, example_values):
31
43
  schema_prompt = f"""
32
44
  ## DATAFRAME STRUCTURE
@@ -52,14 +64,7 @@ def get_df_schema_prompt(dynamic_schema, example_values):
52
64
  ---
53
65
  """
54
66
 
55
- values_prompt = f"""
56
- Now, this other dictionary below provides type (t), up to 3 example values (v), and, for lists, shape (s) and element type (et) for each field.
57
- Field names do not include `used.` or `generated.` They represent the unprefixed form shared across roles. String values may be truncated if they exceed the length limit.
58
- ```python
59
- {example_values}
60
- ```
61
- """
62
-
67
+ values_prompt = get_example_values_prompt(example_values)
63
68
  # values_prompt = ""
64
69
  prompt = schema_prompt + values_prompt
65
70
  return prompt
@@ -215,14 +220,23 @@ OUTPUT_FORMATTING = """
215
220
  """
216
221
 
217
222
 
218
- def generate_pandas_code_prompt(query: str, dynamic_schema, example_values):
223
+ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values, custom_user_guidances):
224
+ if custom_user_guidances is not None and isinstance(custom_user_guidances, list) and len(custom_user_guidances):
225
+ concatenated_guidance = "\n".join(f"- {msg}" for msg in custom_user_guidances)
226
+ custom_user_guidance_prompt = (f"You MUST consider the following guidance from the user:\n"
227
+ f"{concatenated_guidance}"
228
+ "------------------------------------------------------"
229
+ )
230
+ else:
231
+ custom_user_guidance_prompt = ""
219
232
  prompt = (
220
233
  f"{ROLE}"
221
234
  f"{JOB}"
222
235
  f"{DF_FORM}"
223
236
  f"{get_df_schema_prompt(dynamic_schema, example_values)}" # main tester
224
- # f"{QUERY_GUIDELINES}" # main tester
237
+ f"{QUERY_GUIDELINES}" # main tester
225
238
  f"{FEW_SHOTS}" # main tester
239
+ f"{custom_user_guidance_prompt}"
226
240
  f"{OUTPUT_FORMATTING}"
227
241
  "User Query:"
228
242
  f"{query}"
@@ -230,9 +244,16 @@ def generate_pandas_code_prompt(query: str, dynamic_schema, example_values):
230
244
  return prompt
231
245
 
232
246
 
233
- def dataframe_summarizer_context(code, reduced_df, query) -> str:
247
+ def dataframe_summarizer_context(code, reduced_df, dynamic_schema, example_values, query) -> str:
248
+ job = "You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query."
249
+
250
+ if "image" in reduced_df.columns:
251
+ reduced_df = reduced_df.drop(columns=["image"])
252
+
234
253
  prompt = f"""
235
- You are a Workflow Provenance Specialist analyzing a DataFrame that was obtained to answer a query. Given:
254
+ {job}
255
+
256
+ Given:
236
257
 
237
258
  **User Query**:
238
259
  {query}
@@ -240,19 +261,26 @@ def dataframe_summarizer_context(code, reduced_df, query) -> str:
240
261
  **Query_Code**:
241
262
  {code}
242
263
 
243
- **Reduced DataFrame** (rows sampled from full result):
264
+ **Reduced DataFrame `df` contents** (rows sampled from full result):
244
265
  {reduced_df}
245
266
 
246
- Your task is to:
247
- 1. Analyze the DataFrame values and columns for any meaningful or notable information.
248
- 2. Compare the query_code with the data content to understand what the result represents. THIS IS A REDUCED DATAFRAME, the original dataframe, used to answer the query, may be much bigger. IT IS ALREADY KNOWN! Do not need to restate this.
249
- 3. Provide a concise and direct answer to the user query. Your final response to the query should be within ```text .
267
+ **Original df (before reduction) had this schema:
268
+ {get_df_schema_prompt(dynamic_schema, example_values)}
269
+
270
+ Your task is to find a concise and direct answer as an English sentence to the user query.
271
+
272
+ Only if the answer to the query is complex, provide more explanation by:
273
+ 1. Analyzing the DataFrame values and columns for any meaningful or notable information.
274
+ 2. Comparing the query_code with the data content to understand what the result represents. THIS IS A REDUCED DATAFRAME, the original dataframe, used to answer the query, may be much bigger. IT IS ALREADY KNOWN! Do not need to restate this.
275
+ 3. If it makes sense, provide information beyond the recorded provenance, but state it clearly that you are inferring it.
276
+
277
+ In the end, conclude by giving your concise answer as follows: **Response**: <YOUR ANSWER>
250
278
 
251
279
  Note that the user should not know that this is a reduced dataframe.
252
-
253
280
  Keep your response short and focused.
254
281
 
255
282
  """
283
+
256
284
  return prompt
257
285
 
258
286
 
@@ -1,4 +1,5 @@
1
1
  import json
2
+ from typing import List
2
3
 
3
4
  from flowcept.agents.agents_utils import build_llm_model, ToolResult
4
5
  from flowcept.agents.flowcept_ctx_manager import mcp_flowcept
@@ -59,6 +60,16 @@ def check_llm() -> str:
59
60
  return response
60
61
 
61
62
 
63
+ @mcp_flowcept.tool()
64
+ def record_guidance(message: str) -> ToolResult:
65
+ ctx = mcp_flowcept.get_context()
66
+ message = message.replace("@record", "")
67
+ custom_guidance: List = ctx.request_context.lifespan_context.custom_guidance
68
+ custom_guidance.append(message)
69
+
70
+ return ToolResult(code=201, result=f"Ok. I recorded in my memory: {message}")
71
+
72
+
62
73
  @mcp_flowcept.tool()
63
74
  def prompt_handler(message: str) -> ToolResult:
64
75
  """
@@ -74,11 +85,15 @@ def prompt_handler(message: str) -> ToolResult:
74
85
  TextContent
75
86
  The AI response or routing feedback.
76
87
  """
77
- df_key_words = {"save", "result = df", "reset context"}
88
+ df_key_words = ["df", "save", "result = df", "reset context"]
78
89
  for key in df_key_words:
79
90
  if key in message:
80
91
  return run_df_query(llm=None, query=message, plot=False)
81
92
 
93
+ if "@record" in message:
94
+ return record_guidance(message)
95
+
96
+
82
97
  llm = build_llm_model()
83
98
 
84
99
  prompt = ROUTING_PROMPT + message
@@ -87,6 +87,7 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
87
87
  df: pd.DataFrame = ctx.request_context.lifespan_context.df
88
88
  schema = ctx.request_context.lifespan_context.tasks_schema
89
89
  value_examples = ctx.request_context.lifespan_context.value_examples
90
+ custom_user_guidance = ctx.request_context.lifespan_context.custom_guidance
90
91
  if df is None or not len(df):
91
92
  return ToolResult(code=404, result="Current df is empty or null.")
92
93
 
@@ -99,9 +100,9 @@ def run_df_query(llm, query: str, plot=False) -> ToolResult:
99
100
  return run_df_code(user_code=query, df=df)
100
101
 
101
102
  if plot:
102
- return generate_plot_code(llm, query, schema, value_examples, df)
103
+ return generate_plot_code(llm, query, schema, value_examples, df, custom_user_guidance=custom_user_guidance)
103
104
  else:
104
- return generate_result_df(llm, query, schema, value_examples, df)
105
+ return generate_result_df(llm, query, schema, value_examples, df, custom_user_guidance=custom_user_guidance)
105
106
 
106
107
 
107
108
  @mcp_flowcept.tool()
@@ -221,7 +222,7 @@ def generate_plot_code(llm, query, dynamic_schema, value_examples, df) -> ToolRe
221
222
 
222
223
 
223
224
  @mcp_flowcept.tool()
224
- def generate_result_df(llm, query: str, dynamic_schema, example_values, df, attempt_fix=True, summarize=True):
225
+ def generate_result_df(llm, query: str, dynamic_schema, example_values, df, custom_user_guidance=None, attempt_fix=True, summarize=True):
225
226
  """
226
227
  Generate a result DataFrame from a natural language query using an LLM.
227
228
 
@@ -294,8 +295,10 @@ def generate_result_df(llm, query: str, dynamic_schema, example_values, df, atte
294
295
  >>> generate_result_df(llm, "bad query", schema, examples, df, attempt_fix=False)
295
296
  ToolResult(code=405, result="Failed to parse this as Python code: ...")
296
297
  """
298
+ if llm is None:
299
+ llm = build_llm_model()
297
300
  try:
298
- prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values)
301
+ prompt = generate_pandas_code_prompt(query, dynamic_schema, example_values, custom_user_guidance)
299
302
  response = llm(prompt)
300
303
  except Exception as e:
301
304
  return ToolResult(code=400, result=str(e), extra=prompt)
@@ -351,7 +354,14 @@ def generate_result_df(llm, query: str, dynamic_schema, example_values, df, atte
351
354
  summary, summary_error = None, None
352
355
  if summarize:
353
356
  try:
354
- tool_result = summarize_result(llm, result_code, result_df, query)
357
+ tool_result = summarize_result(
358
+ llm,
359
+ result_code,
360
+ result_df,
361
+ query,
362
+ dynamic_schema,
363
+ example_values,
364
+ )
355
365
  if tool_result.is_success():
356
366
  return_code = 301
357
367
  summary = tool_result.result
@@ -570,7 +580,14 @@ def extract_or_fix_json_code(llm, raw_text) -> ToolResult:
570
580
 
571
581
 
572
582
  @mcp_flowcept.tool()
573
- def summarize_result(llm, code, result, query: str) -> ToolResult:
583
+ def summarize_result(
584
+ llm,
585
+ code,
586
+ result,
587
+ query: str,
588
+ dynamic_schema,
589
+ example_values,
590
+ ) -> ToolResult:
574
591
  """
575
592
  Summarize the pandas result with local reduction for large DataFrames.
576
593
  - For wide DataFrames, selects top columns based on variance and uniqueness.
@@ -578,7 +595,7 @@ def summarize_result(llm, code, result, query: str) -> ToolResult:
578
595
  - Constructs a detailed prompt for the LLM with original column context.
579
596
  """
580
597
  summarized_df = summarize_df(result, code)
581
- prompt = dataframe_summarizer_context(code, summarized_df, query)
598
+ prompt = dataframe_summarizer_context(code, summarized_df, dynamic_schema, example_values, query)
582
599
  try:
583
600
  response = llm(prompt)
584
601
  return ToolResult(code=201, result=response)
@@ -77,7 +77,7 @@ def normalize_output(result):
77
77
  raise TypeError(f"Unsupported result type: {type(result)}")
78
78
 
79
79
  if not len(_df):
80
- raise ValueError("Result DataFrame is Empty.")
80
+ return pd.DataFrame()
81
81
 
82
82
  return _df
83
83
 
@@ -137,7 +137,6 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
137
137
  "agent_id",
138
138
  "campaign_id",
139
139
  "subtype",
140
- "custom_metadata",
141
140
  ]:
142
141
  value = _safe_get(task, key)
143
142
  if value is not None:
@@ -146,7 +145,14 @@ def summarize_task(task: Dict, thresholds: Dict = None, logger=None) -> Dict:
146
145
  else:
147
146
  task_summary[key] = value
148
147
 
149
- # Special handling for timestamp field
148
+ # Adding image column if data is image. This is to handle special cases when there is an image associated to
149
+ # a provenance task.
150
+ if "data" in task:
151
+ if "custom_metadata" in task:
152
+ if "image" in task["custom_metadata"].get("mime_type", ""):
153
+ task_summary["image"] = task["data"]
154
+
155
+ # Special handling for timestamp field
150
156
  try:
151
157
  time_keys = ["started_at", "ended_at"]
152
158
  for time_key in time_keys:
@@ -25,7 +25,7 @@ from flowcept.flowceptor.adapters.base_interceptor import BaseInterceptor
25
25
 
26
26
 
27
27
  class Flowcept(object):
28
- """Flowcept Controller class."""
28
+ """Main Flowcept controller class."""
29
29
 
30
30
  _db = None
31
31
  # TODO: rename current_workflow_id to workflow_id. This will be a major refactor
@@ -18,26 +18,6 @@ class FlowceptLoop:
18
18
  while capturing metadata for each iteration and for the loop as a whole. This is particularly
19
19
  useful in scenarios where tracking and instrumentation of loop executions is required.
20
20
 
21
- Parameters
22
- ----------
23
- items : typing.Union[Sized, int, Iterator]
24
- The items to iterate over. Must either be an iterable with a `__len__` method or an integer
25
- representing the range of iteration.
26
- loop_name : str, optional
27
- A descriptive name for the loop (default is "loop").
28
- item_name : str, optional
29
- The name used for each item in the telemetry (default is "item").
30
- parent_task_id : str, optional
31
- The ID of the parent task associated with the loop, if applicable (default is None).
32
- workflow_id : str, optional
33
- The workflow ID to associate with this loop. If not provided, it will be generated or
34
- inferred from the current workflow context.
35
-
36
- Raises
37
- ------
38
- Exception
39
- If `items` is not an iterable with a `__len__` method or an integer.
40
-
41
21
  Notes
42
22
  -----
43
23
  This class integrates with the `Flowcept` system for telemetry and tracking, ensuring
@@ -57,6 +37,52 @@ class FlowceptLoop:
57
37
  items_length=0,
58
38
  capture_enabled=True,
59
39
  ):
40
+ """
41
+ Initialize a FlowceptLoop instance for tracking iterations.
42
+
43
+ This constructor wraps an iterable, numeric range, or explicit iterator into a
44
+ loop context where each iteration is instrumented with provenance and optional
45
+ telemetry. If instrumentation is disabled, the loop behaves like a normal
46
+ Python iterator with minimal overhead.
47
+
48
+ Parameters
49
+ ----------
50
+ items : Union[Sized, Iterator, int]
51
+ The items to iterate over. Can be:
52
+ - A sized iterable (e.g., list, range).
53
+ - An integer (interpreted as ``range(items)``).
54
+ - An iterator (requires ``items_length`` if length cannot be inferred).
55
+ loop_name : str, optional
56
+ A descriptive name for the loop. Used in provenance as the loop's activity
57
+ identifier. Default is ``"loop"``.
58
+ item_name : str, optional
59
+ The key name under which each iteration's item is recorded in provenance.
60
+ Default is ``"item"``.
61
+ parent_task_id : str, optional
62
+ The identifier of a parent task, if this loop is nested within another task.
63
+ Default is ``None``.
64
+ workflow_id : str, optional
65
+ Identifier for the workflow this loop belongs to. If not provided, it is
66
+ inherited from the current Flowcept context or generated as a UUID.
67
+ items_length : int, optional
68
+ Explicit number of items if ``items`` is an iterator without a defined length.
69
+ Default is ``0``.
70
+ capture_enabled : bool, optional
71
+ Whether to enable provenance/telemetry capture. If ``False``, the loop runs
72
+ without instrumentation. Default is ``True``.
73
+
74
+ Raises
75
+ ------
76
+ Exception
77
+ If ``items`` is not a supported type (sized iterable, integer, or iterator).
78
+
79
+ Notes
80
+ -----
81
+ - Each iteration is recorded with ``used`` (inputs) and optional ``generated``
82
+ values, plus telemetry if enabled.
83
+ - Iteration metadata is finalized at the end of each iteration and sent to the
84
+ active Flowcept interceptor.
85
+ """
60
86
  self._current_iteration_task = {}
61
87
  if not (INSTRUMENTATION_ENABLED and capture_enabled):
62
88
  # These do_nothing functions help reduce overhead if no instrumentation is needed
@@ -232,6 +258,51 @@ class FlowceptLightweightLoop:
232
258
  items_length=0,
233
259
  capture_enabled=True,
234
260
  ):
261
+ """
262
+ Initialize a FlowceptLightweightLoop instance for tracking iterations.
263
+
264
+ This constructor provides a lower-overhead loop wrapper compared to
265
+ ``FlowceptLoop``. Iterations are pre-registered as task objects, and capture
266
+ primarily updates ``used`` and ``generated`` fields as the loop progresses.
267
+
268
+ Parameters
269
+ ----------
270
+ items : Union[Sized, Iterator]
271
+ The items to iterate over. Must either be:
272
+ - A sized iterable (with ``__len__``).
273
+ - An explicit iterator (length must be given by ``items_length``).
274
+ loop_name : str, optional
275
+ A descriptive name for the loop. Used in provenance as the loop's activity
276
+ identifier. Default is ``"loop"``.
277
+ item_name : str, optional
278
+ The key name under which each iteration's item is recorded in provenance.
279
+ Default is ``"item"``.
280
+ parent_task_id : str, optional
281
+ The identifier of a parent task, if this loop is nested within another task.
282
+ Default is ``None``.
283
+ workflow_id : str, optional
284
+ Identifier for the workflow this loop belongs to. If not provided, it is
285
+ inherited from the current Flowcept context or generated as a UUID.
286
+ items_length : int, optional
287
+ Explicit number of items if ``items`` is an iterator without a defined length.
288
+ Default is ``0``.
289
+ capture_enabled : bool, optional
290
+ Whether to enable provenance/telemetry capture. If ``False``, the loop runs
291
+ without instrumentation. Default is ``True``.
292
+
293
+ Raises
294
+ ------
295
+ Exception
296
+ If ``items`` is neither a sized iterable nor an iterator.
297
+
298
+ Notes
299
+ -----
300
+ - This class is designed for high-performance scenarios with many iterations.
301
+ - Iteration tasks are pre-allocated, and provenance capture is batched via
302
+ the Flowcept interceptor.
303
+ - Compared to ``FlowceptLoop``, this class avoids per-iteration telemetry
304
+ overhead unless explicitly enabled.
305
+ """
235
306
  if isinstance(items, Iterator):
236
307
  self._iterator = items
237
308
  else:
flowcept/version.py CHANGED
@@ -4,4 +4,4 @@
4
4
  # The expected format is: <Major>.<Minor>.<Patch>
5
5
  # This file is supposed to be automatically modified by the CI Bot.
6
6
  # See .github/workflows/version_bumper.py
7
- __version__ = "0.9.2"
7
+ __version__ = "0.9.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: flowcept
3
- Version: 0.9.2
3
+ Version: 0.9.4
4
4
  Summary: Capture and query workflow provenance data using data observability
5
5
  Author: Oak Ridge National Laboratory
6
6
  License-Expression: MIT
@@ -25,6 +25,7 @@ Requires-Dist: gitpython; extra == 'all'
25
25
  Requires-Dist: google-genai; extra == 'all'
26
26
  Requires-Dist: jupyterlab; extra == 'all'
27
27
  Requires-Dist: langchain-community; extra == 'all'
28
+ Requires-Dist: langchain-openai; extra == 'all'
28
29
  Requires-Dist: lmdb; extra == 'all'
29
30
  Requires-Dist: mcp[cli]; extra == 'all'
30
31
  Requires-Dist: mlflow-skinny; extra == 'all'
@@ -85,11 +86,13 @@ Provides-Extra: kafka
85
86
  Requires-Dist: confluent-kafka<=2.8.0; extra == 'kafka'
86
87
  Provides-Extra: llm-agent
87
88
  Requires-Dist: langchain-community; extra == 'llm-agent'
89
+ Requires-Dist: langchain-openai; extra == 'llm-agent'
88
90
  Requires-Dist: mcp[cli]; extra == 'llm-agent'
89
91
  Requires-Dist: streamlit; extra == 'llm-agent'
90
92
  Provides-Extra: llm-google
91
93
  Requires-Dist: google-genai; extra == 'llm-google'
92
94
  Requires-Dist: langchain-community; extra == 'llm-google'
95
+ Requires-Dist: langchain-openai; extra == 'llm-google'
93
96
  Requires-Dist: mcp[cli]; extra == 'llm-google'
94
97
  Requires-Dist: streamlit; extra == 'llm-google'
95
98
  Provides-Extra: lmdb
@@ -1,27 +1,27 @@
1
1
  flowcept/__init__.py,sha256=urpwIEJeikV0P6ORXKsM5Lq4o6wCwhySS9A487BYGy4,2241
2
2
  flowcept/cli.py,sha256=eVnUrmZtVhZ1ldRMGB1QsqBzNC1Pf2CX33efnlaZ4gs,22842
3
3
  flowcept/configs.py,sha256=xw9cdk-bDkR4_bV2jBkDCe9__na9LKJW5tUG32by-m4,8216
4
- flowcept/version.py,sha256=vKyazpFciSPMlst6m2HH-9RdZcbRHHnKT0jk92I-APc,306
4
+ flowcept/version.py,sha256=r2ZjnVSR4af_jwtgraTsn5318tUsvsQueol5-HauJQY,306
5
5
  flowcept/agents/__init__.py,sha256=8eeD2CiKBtHiDsWdrHK_UreIkKlTq4dUbhHDyzw372o,175
6
6
  flowcept/agents/agent_client.py,sha256=UiBQkC9WE2weLZR2OTkEOEQt9-zqQOkPwRA17HfI-jk,2027
7
7
  flowcept/agents/agents_utils.py,sha256=Az5lvWTsBHs_3sWWwy7jSdDjNn-PvZ7KmYd79wxvdyU,6666
8
8
  flowcept/agents/dynamic_schema_tracker.py,sha256=TsmXRRkyUkqB-0bEgmeqSms8xj1tMMJeYvjoaO2mtwI,6829
9
9
  flowcept/agents/flowcept_agent.py,sha256=1sidjnNMdG0S6lUKBvml7ZfIb6o3u7zc6HNogsJbl9g,871
10
- flowcept/agents/flowcept_ctx_manager.py,sha256=-WYulunHE62w61z8cy3u7TEnvgV1hflNEXsnm8YxwFw,6840
10
+ flowcept/agents/flowcept_ctx_manager.py,sha256=-WmkddzzFY2dnU9LbZaoY4-5RcSAQH4FziEJgcC5LEI,7083
11
11
  flowcept/agents/gui/__init__.py,sha256=Qw9YKbAzgZqBjMQGnF7XWmfUo0fivtkDISQRK3LA3gU,113
12
12
  flowcept/agents/gui/agent_gui.py,sha256=8sTG3MjWBi6oc4tnfHa-duTBXWEE6RBxBE5uHooGkzI,2501
13
- flowcept/agents/gui/gui_utils.py,sha256=Qex0G9Asgb_UnLTySB8cYNEEy9ZnmLYnLddbornoDcI,7861
13
+ flowcept/agents/gui/gui_utils.py,sha256=61JpFKu-yd7luWVBW6HQYd3feOmupR01tYsZxl804No,9517
14
14
  flowcept/agents/llms/__init__.py,sha256=kzOaJic5VhMBnGvy_Fr5C6sRKVrRntH1ZnYz7f5_4-s,23
15
15
  flowcept/agents/llms/claude_gcp.py,sha256=fzz7235DgzVueuFj5odsr93jWtYHpYlXkSGW1kmmJwU,4915
16
16
  flowcept/agents/llms/gemini25.py,sha256=VARrjb3tITIh3_Wppmocp_ocSKVZNon0o0GeFEwTnTI,4229
17
17
  flowcept/agents/prompts/__init__.py,sha256=7ICsNhLYzvPS1esG3Vg519s51b1c4yN0WegJUb6Qvww,26
18
- flowcept/agents/prompts/general_prompts.py,sha256=q0KmR2QYEtBqQOssoF8W5EhZidqC59wL6XFVjF_dbWQ,3675
19
- flowcept/agents/prompts/in_memory_query_prompts.py,sha256=oWvZQNUHBBrGq-f94ulhIZW4bkkze02EzAuHY5640QM,17934
18
+ flowcept/agents/prompts/general_prompts.py,sha256=OWVyToJL3w16zjycA0U0oRIx3XQRuklg0wqiUOny_1U,3892
19
+ flowcept/agents/prompts/in_memory_query_prompts.py,sha256=sVoZJEfNjPpqU8ruDhRoVFIAKTnlA7btkw5n9R2mYBw,19358
20
20
  flowcept/agents/tools/__init__.py,sha256=Xqz2E4-LL_7DDcm1XYJFx2f5RdAsjeTpOJb_DPC7xyc,27
21
- flowcept/agents/tools/general_tools.py,sha256=Dw1vYNzVUp8dIB48KFPNxGenERoS8UqJj0HIEfhjQeA,2752
21
+ flowcept/agents/tools/general_tools.py,sha256=xoZqNPD_eOnAsbdbo38opG7FEkQzzWq_7BoLGw055-Q,3207
22
22
  flowcept/agents/tools/in_memory_queries/__init__.py,sha256=K8-JI_lXUgquKkgga8Nef8AntGg_logQtjjQjaEE7yI,39
23
- flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py,sha256=hrVal1ktf6lvBmVWS7cR_lQy4cIz7ZNYLC-MN61WNRg,25450
24
- flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py,sha256=xi69oywlGb6IUkhQKXoKoswYuWK5FyiWHy2MnRjTzds,9055
23
+ flowcept/agents/tools/in_memory_queries/in_memory_queries_tools.py,sha256=iziwjHvvi1_HJ5rMKdV6AK0hVoPdGHuL5hZmBf0CFp8,25951
24
+ flowcept/agents/tools/in_memory_queries/pandas_agent_utils.py,sha256=147Yly6p9sU6kKCbfoyNMBm9zV-ptxGWjBaRV2s8OPo,9030
25
25
  flowcept/analytics/__init__.py,sha256=46q-7vsHq_ddPNrzNnDgEOiRgvlx-5Ggu2ocyROMV0w,641
26
26
  flowcept/analytics/analytics_utils.py,sha256=FRJdBtQa7Hrk2oR_FFhmhmMf3X6YyZ4nbH5RIYh7KL4,8753
27
27
  flowcept/analytics/data_augmentation.py,sha256=Dyr5x316Zf-k1e8rVoQMCpFOrklYVHjfejRPrtoycmc,1641
@@ -31,7 +31,7 @@ flowcept/commons/autoflush_buffer.py,sha256=Ohy_RNbq6BXn0_R83OL5iaTgGPmV8cT1moIR
31
31
  flowcept/commons/flowcept_logger.py,sha256=0asRucrDMeRXvsdhuCmH6lWO7lAt_Z5o5uW7rrQhcjc,1857
32
32
  flowcept/commons/query_utils.py,sha256=3tyK5VYA10iDtmtzNwa8OQGn93DBxsu6rTjHDphftSc,2208
33
33
  flowcept/commons/settings_factory.py,sha256=bMTjgXRfb5HsL2lPnLfem-9trqELbNWE04Ie7lSlxYM,1731
34
- flowcept/commons/task_data_preprocess.py,sha256=yxLOq3PhfJYDeOUrbBzLc-x7zDrKqB30pwk1nIqtdgo,13552
34
+ flowcept/commons/task_data_preprocess.py,sha256=bJed8Jbo4Mxk6aRVt3sCn4_KxfV5jWXwAIQWwuqHm3U,13846
35
35
  flowcept/commons/utils.py,sha256=gF6ENWlTpR2ZSw3yVNPNBTVzSpcgy-WuzYzwWSXXsug,9252
36
36
  flowcept/commons/vocabulary.py,sha256=_GzHJ1wSYJlLsu_uu1Am6N3zvc59S4FCuT5yp7lynPw,713
37
37
  flowcept/commons/daos/__init__.py,sha256=RO51svfHOg9naN676zuQwbj_RQ6IFHu-RALeefvtwwk,23
@@ -53,7 +53,7 @@ flowcept/commons/flowcept_dataclasses/telemetry.py,sha256=9_5ONCo-06r5nKHXmi5HfI
53
53
  flowcept/commons/flowcept_dataclasses/workflow_object.py,sha256=cauWtXHhBv9lHS-q6cb7yUsNiwQ6PkZPuSinR1TKcqU,6161
54
54
  flowcept/flowcept_api/__init__.py,sha256=T1ty86YlocQ5Z18l5fUqHj_CC6Unq_iBv0lFyiI7Ao8,22
55
55
  flowcept/flowcept_api/db_api.py,sha256=hKXep-n50rp9cAzV0ljk2QVEF8O64yxi3ujXv5_Ibac,9723
56
- flowcept/flowcept_api/flowcept_controller.py,sha256=NFYBvv8OeDbZs2Q8o2dnFWC5N7fofvx7iiOfvmcoraE,15246
56
+ flowcept/flowcept_api/flowcept_controller.py,sha256=JcUQXJfEjmg-KQsolIN5Ul7vbSxZUg8QTWaGAahZKTE,15251
57
57
  flowcept/flowcept_api/task_query_api.py,sha256=SrwB0OCVtbpvCPECkE2ySM10G_g8Wlk5PJ8h-0xEaNc,23821
58
58
  flowcept/flowcept_webserver/__init__.py,sha256=8411GIXGddKTKoHUvbo_Rq6svosNG7tG8VzvUEBd7WI,28
59
59
  flowcept/flowcept_webserver/app.py,sha256=VUV8_JZbIbx9u_1O7m7XtRdhZb_7uifUa-iNlPhmZws,658
@@ -89,13 +89,13 @@ flowcept/flowceptor/consumers/agent/base_agent_context_manager.py,sha256=5fBPYs-
89
89
  flowcept/instrumentation/__init__.py,sha256=M5bTmg80E4QyN91gUX3qfw_nbtJSXwGWcKxdZP3vJz0,34
90
90
  flowcept/instrumentation/flowcept_agent_task.py,sha256=XN9JU4LODca0SgojUm4F5iU_V8tuWkOt1fAKcoOAG34,10757
91
91
  flowcept/instrumentation/flowcept_decorator.py,sha256=X4Lp_FSsoL08K8ZhRM4mC0OjKupbQtbMQR8zxy3ezDY,1350
92
- flowcept/instrumentation/flowcept_loop.py,sha256=7hkcolXxbwwccNzoSbAeCCEu02i4zT317YeJ6dO1MDs,12208
92
+ flowcept/instrumentation/flowcept_loop.py,sha256=jea_hYPuXg5_nOWf-nNb4vx8A__OBM4m96_92-J51o4,15670
93
93
  flowcept/instrumentation/flowcept_task.py,sha256=EmKODpjl8usNklKSVmsKYyCa6gC_QMqKhAr3DKaw44s,8199
94
94
  flowcept/instrumentation/flowcept_torch.py,sha256=kkZQRYq6cDBpdBU6J39_4oKRVkhyF3ODlz8ydV5WGKw,23455
95
95
  flowcept/instrumentation/task_capture.py,sha256=1g9EtLdqsTB0RHsF-eRmA2Xh9l_YqTd953d4v89IC24,8287
96
- resources/sample_settings.yaml,sha256=NxiDXh_IAVBsHdxyhB2U-v212hGPLtHTqAVE6_3GyJ0,6756
97
- flowcept-0.9.2.dist-info/METADATA,sha256=H91pextdVg1DgvI_HPgt4CnRJeH4UmrC5dsZpLwOAiY,31424
98
- flowcept-0.9.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
- flowcept-0.9.2.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
100
- flowcept-0.9.2.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
101
- flowcept-0.9.2.dist-info/RECORD,,
96
+ resources/sample_settings.yaml,sha256=LWR3StVNlcmy8sDd8ntUF2oNz3awWDBwsuuC3ABmbC4,6756
97
+ flowcept-0.9.4.dist-info/METADATA,sha256=oPtI5ythQuDHQfEO-oRIgZTyGwxAHA5tocZGq9VdjjA,31581
98
+ flowcept-0.9.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
+ flowcept-0.9.4.dist-info/entry_points.txt,sha256=i8q67WE0201rVxYI2lyBtS52shvgl93x2Szp4q8zMlw,47
100
+ flowcept-0.9.4.dist-info/licenses/LICENSE,sha256=r5-2P6tFTuRGWT5TiX32s1y0tnp4cIqBEC1QjTaXe2k,1086
101
+ flowcept-0.9.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
- flowcept_version: 0.9.2 # Version of the Flowcept package. This setting file is compatible with this version.
1
+ flowcept_version: 0.9.4 # Version of the Flowcept package. This setting file is compatible with this version.
2
2
 
3
3
  project:
4
4
  debug: true # Toggle debug mode. This will add a property `debug: true` to all saved data, making it easier to retrieve/delete them later.