dasein-core 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dasein/api.py +1202 -133
  2. dasein/capture.py +2379 -1803
  3. dasein/microturn.py +475 -0
  4. dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
  5. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
  6. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
  7. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
  8. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
  9. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
  10. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
  11. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
  12. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
  13. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
  14. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
  15. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
  16. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
  17. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
  18. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
  19. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
  20. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
  21. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
  22. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
  23. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
  24. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
  25. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
  26. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
  27. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
  28. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
  29. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
  30. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
  31. dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
  32. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
  33. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
  34. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
  35. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
  36. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
  37. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
  38. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
  39. dasein/pipecleaner.py +1917 -0
  40. dasein/wrappers.py +314 -0
  41. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/METADATA +4 -1
  42. dasein_core-0.2.9.dist-info/RECORD +59 -0
  43. dasein_core-0.2.7.dist-info/RECORD +0 -21
  44. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/WHEEL +0 -0
  45. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/licenses/LICENSE +0 -0
  46. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/top_level.txt +0 -0
dasein/capture.py CHANGED
@@ -1,1803 +1,2379 @@
1
- """
2
- Trace capture functionality for Dasein.
3
- """
4
-
5
- import hashlib
6
- from typing import Any, Dict, List, Optional, Union
7
- from datetime import datetime
8
- from langchain_core.callbacks.base import BaseCallbackHandler
9
- from langchain_core.callbacks.manager import CallbackManagerForToolRun
10
- from langchain_core.tools import BaseTool
11
-
12
-
13
- # ============================================================================
14
- # VERBOSE LOGGING HELPER
15
- # ============================================================================
16
-
17
- def _vprint(message: str, verbose: bool = False, force: bool = False):
18
- """
19
- Helper function for verbose printing.
20
-
21
- Args:
22
- message: Message to print
23
- verbose: Whether verbose mode is enabled
24
- force: If True, always print regardless of verbose setting
25
- """
26
- if force or verbose:
27
- print(message)
28
-
29
-
30
- # DEPRECATED: Global trace store removed for thread-safety
31
- # Traces are now stored instance-level in DaseinCallbackHandler._trace
32
- # _TRACE: List[Dict[str, Any]] = []
33
-
34
- # Hook cache for agent fingerprinting
35
- _HOOK_CACHE: Dict[str, Any] = {}
36
-
37
- # Store for modified tool inputs
38
- _MODIFIED_TOOL_INPUTS: Dict[str, str] = {}
39
-
40
-
41
- class DaseinToolWrapper(BaseTool):
42
- """Wrapper for tools that applies micro-turn modifications."""
43
-
44
- name: str = ""
45
- description: str = ""
46
- original_tool: Any = None
47
- callback_handler: Any = None
48
-
49
- def __init__(self, original_tool, callback_handler=None, verbose: bool = False):
50
- super().__init__(
51
- name=original_tool.name,
52
- description=original_tool.description
53
- )
54
- self.original_tool = original_tool
55
- self.callback_handler = callback_handler
56
- self._verbose = verbose
57
-
58
- def _vprint(self, message: str, force: bool = False):
59
- """Helper for verbose printing."""
60
- _vprint(message, self._verbose, force)
61
-
62
- def _run(self, *args, **kwargs):
63
- """Run the tool with micro-turn injection at execution level."""
64
- self._vprint(f"[DASEIN][TOOL_WRAPPER] _run called for {self.name} - VERSION 2.0")
65
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Args: {args}")
66
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Kwargs: {kwargs}")
67
-
68
- try:
69
- # Get the original input
70
- original_input = args[0] if args else ""
71
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Original input: {original_input[:100]}...")
72
-
73
- # Apply micro-turn injection if we have rules
74
- modified_input = self._apply_micro_turn_injection(str(original_input))
75
-
76
- if modified_input != original_input:
77
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
78
- # Use modified input
79
- result = self.original_tool._run(modified_input, *args[1:], **kwargs)
80
- else:
81
- self._vprint(f"[DASEIN][TOOL_WRAPPER] No micro-turn injection applied for {self.name}")
82
- # Use original input
83
- result = self.original_tool._run(*args, **kwargs)
84
-
85
- # Capture the tool output in the trace
86
- self._vprint(f"[DASEIN][TOOL_WRAPPER] About to capture tool output for {self.name}")
87
- self._capture_tool_output(self.name, args, kwargs, result)
88
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Finished capturing tool output for {self.name}")
89
-
90
- return result
91
-
92
- except Exception as e:
93
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Exception in _run: {e}")
94
- import traceback
95
- traceback.print_exc()
96
- # Still try to call the original tool
97
- result = self.original_tool._run(*args, **kwargs)
98
- return result
99
-
100
- def invoke(self, input_data, config=None, **kwargs):
101
- """Invoke the tool with micro-turn injection."""
102
- # Get the original input
103
- original_input = str(input_data)
104
-
105
- # Apply micro-turn injection if we have rules
106
- modified_input = self._apply_micro_turn_injection(original_input)
107
-
108
- if modified_input != original_input:
109
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
110
- # Use modified input
111
- return self.original_tool.invoke(modified_input, config, **kwargs)
112
- else:
113
- # Use original input
114
- return self.original_tool.invoke(input_data, config, **kwargs)
115
-
116
- async def _arun(self, *args, **kwargs):
117
- """Async run the tool with micro-turn injection at execution level."""
118
- self._vprint(f"[DASEIN][TOOL_WRAPPER] _arun called for {self.name} - ASYNC VERSION")
119
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Args: {args}")
120
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Kwargs: {kwargs}")
121
-
122
- try:
123
- # Get the original input
124
- original_input = args[0] if args else ""
125
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Original input: {original_input[:100]}...")
126
-
127
- # Apply micro-turn injection if we have rules
128
- modified_input = self._apply_micro_turn_injection(str(original_input))
129
-
130
- if modified_input != original_input:
131
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
132
- # Use modified input
133
- result = await self.original_tool._arun(modified_input, *args[1:], **kwargs)
134
- else:
135
- self._vprint(f"[DASEIN][TOOL_WRAPPER] No micro-turn injection applied for {self.name}")
136
- # Use original input
137
- result = await self.original_tool._arun(*args, **kwargs)
138
-
139
- # Capture the tool output in the trace
140
- self._vprint(f"[DASEIN][TOOL_WRAPPER] About to capture tool output for {self.name}")
141
- self._capture_tool_output(self.name, args, kwargs, result)
142
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Finished capturing tool output for {self.name}")
143
-
144
- return result
145
-
146
- except Exception as e:
147
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Exception in _arun: {e}")
148
- import traceback
149
- traceback.print_exc()
150
- # Still try to call the original tool
151
- result = await self.original_tool._arun(*args, **kwargs)
152
- return result
153
-
154
- async def ainvoke(self, input_data, config=None, **kwargs):
155
- """Async invoke the tool with micro-turn injection."""
156
- self._vprint(f"[DASEIN][TOOL_WRAPPER] ainvoke called for {self.name} - ASYNC VERSION")
157
-
158
- # Get the original input
159
- original_input = str(input_data)
160
-
161
- # Apply micro-turn injection if we have rules
162
- modified_input = self._apply_micro_turn_injection(original_input)
163
-
164
- if modified_input != original_input:
165
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
166
- # Use modified input
167
- return await self.original_tool.ainvoke(modified_input, config, **kwargs)
168
- else:
169
- # Use original input
170
- return await self.original_tool.ainvoke(input_data, config, **kwargs)
171
-
172
- def _apply_micro_turn_injection(self, original_input: str) -> str:
173
- """Apply micro-turn injection to the tool input."""
174
- try:
175
- # Check if we have a callback handler with rules and LLM
176
- if not self.callback_handler:
177
- return original_input
178
-
179
- # Normalize selected rules into Rule objects (handle (rule, metadata) tuples)
180
- normalized_rules = []
181
- for rule_meta in getattr(self.callback_handler, "_selected_rules", []) or []:
182
- if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
183
- rule_obj, _metadata = rule_meta
184
- else:
185
- rule_obj = rule_meta
186
- normalized_rules.append(rule_obj)
187
-
188
- # Filter tool_start rules
189
- tool_rules = [r for r in normalized_rules if getattr(r, 'target_step_type', '') == "tool_start"]
190
-
191
- if not tool_rules:
192
- self._vprint(f"[DASEIN][MICROTURN] No tool rules selected - skipping micro-turn for {self.name}")
193
- return original_input
194
-
195
- # Check if any rule covers this tool
196
- covered_rules = [rule for rule in tool_rules
197
- if self._rule_covers_tool(rule, self.name, original_input)]
198
-
199
- if not covered_rules:
200
- return original_input
201
-
202
- # Fire micro-turn LLM call (use first matching rule)
203
- rule = covered_rules[0]
204
- self._vprint(f"[DASEIN][MICROTURN] rule_id={rule.id} tool={self.name}")
205
-
206
- # Create micro-turn prompt
207
- micro_turn_prompt = self._create_micro_turn_prompt(rule, self.name, original_input)
208
-
209
- # Execute micro-turn LLM call
210
- modified_input = self._execute_micro_turn_llm_call(micro_turn_prompt, original_input)
211
-
212
- self._vprint(f"[DASEIN][MICROTURN] Applied rule {rule.id}: {str(original_input)[:50]}... -> {str(modified_input)[:50]}...")
213
- return modified_input
214
-
215
- except Exception as e:
216
- self._vprint(f"[DASEIN][MICROTURN] Error in micro-turn injection: {e}")
217
- return original_input
218
-
219
- def _rule_covers_tool(self, rule, tool_name: str, tool_input: str) -> bool:
220
- """Check if a rule covers this tool call."""
221
- if not hasattr(rule, 'references') or not rule.references:
222
- return False
223
-
224
- # Check if the rule references this tool
225
- tools = rule.references.get('tools', [])
226
- return tool_name in tools
227
-
228
- def _create_micro_turn_prompt(self, rule, tool_name: str, tool_input: str) -> str:
229
- """Create the prompt for the micro-turn LLM call."""
230
- return f"""You are applying a rule to fix a tool input.
231
-
232
- Rule: {rule.advice_text}
233
-
234
- Tool: {tool_name}
235
- Current Input: {tool_input}
236
-
237
- Apply the rule to fix the input. Return only the corrected input, nothing else."""
238
-
239
- def _execute_micro_turn_llm_call(self, prompt: str, original_input: str) -> str:
240
- """Execute the actual micro-turn LLM call."""
241
- try:
242
- if not self.callback_handler or not self.callback_handler._llm:
243
- self._vprint(f"[DASEIN][MICROTURN] No LLM available for micro-turn call")
244
- return original_input
245
-
246
- self._vprint(f"[DASEIN][MICROTURN] Executing micro-turn LLM call")
247
- self._vprint(f"[DASEIN][MICROTURN] Prompt: {prompt[:200]}...")
248
-
249
- # Make the micro-turn LLM call
250
- messages = [{"role": "user", "content": prompt}]
251
- response = self.callback_handler._llm.invoke(messages)
252
-
253
- # Extract the response content
254
- if hasattr(response, 'content'):
255
- modified_input = response.content.strip()
256
- elif isinstance(response, str):
257
- modified_input = response.strip()
258
- else:
259
- modified_input = str(response).strip()
260
-
261
- self._vprint(f"[DASEIN][MICROTURN] LLM response: {modified_input[:100]}...")
262
-
263
- # 🚨 CRITICAL: Parse JSON responses with markdown fences
264
- if modified_input.startswith('```json') or modified_input.startswith('```'):
265
- try:
266
- # Extract JSON from markdown fences
267
- import re
268
- import json
269
- json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', modified_input, re.DOTALL)
270
- if json_match:
271
- json_str = json_match.group(1)
272
- parsed_json = json.loads(json_str)
273
- # Convert back to the expected format
274
- if isinstance(parsed_json, dict) and 'name' in parsed_json and 'args' in parsed_json:
275
- modified_input = parsed_json
276
- self._vprint(f"[DASEIN][MICROTURN] Parsed JSON from markdown fences: {parsed_json}")
277
- else:
278
- self._vprint(f"[DASEIN][MICROTURN] JSON doesn't have expected structure, using as-is")
279
- else:
280
- self._vprint(f"[DASEIN][MICROTURN] Could not extract JSON from markdown fences")
281
- except Exception as e:
282
- self._vprint(f"[DASEIN][MICROTURN] Error parsing JSON: {e}")
283
-
284
- # Validate the response - only fallback if completely empty
285
- if not modified_input:
286
- self._vprint(f"[DASEIN][MICROTURN] LLM response empty, using original input")
287
- return original_input
288
-
289
- return modified_input
290
-
291
- except Exception as e:
292
- self._vprint(f"[DASEIN][MICROTURN] Error executing micro-turn LLM call: {e}")
293
- return original_input
294
-
295
- def _capture_tool_output(self, tool_name, args, kwargs, result):
296
- """Capture tool output in the trace."""
297
- try:
298
- # Create args excerpt
299
- args_str = str(args) if args else ""
300
- if len(args_str) > 1000:
301
- args_str = args_str[:1000] + "..."
302
-
303
- # Create result excerpt (with 10k limit)
304
- result_str = str(result) if result else ""
305
- if len(result_str) > 10000:
306
- result_str = result_str[:10000] + "..."
307
-
308
- # Add tool_end step to trace
309
- step = {
310
- "step_type": "tool_end",
311
- "tool_name": tool_name,
312
- "args_excerpt": args_str,
313
- "outcome": result_str,
314
- "ts": datetime.now().isoformat(),
315
- "run_id": f"tool_{id(self)}_{datetime.now().timestamp()}",
316
- "parent_run_id": None,
317
- }
318
-
319
- # Add to LLM wrapper's trace if available
320
- if self.callback_handler and hasattr(self.callback_handler, '_llm') and self.callback_handler._llm:
321
- if hasattr(self.callback_handler._llm, '_trace'):
322
- self.callback_handler._llm._trace.append(step)
323
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Added to LLM wrapper trace")
324
- else:
325
- self._vprint(f"[DASEIN][TOOL_WRAPPER] LLM wrapper has no _trace attribute")
326
- else:
327
- self._vprint(f"[DASEIN][TOOL_WRAPPER] No LLM wrapper available")
328
-
329
- # Also add to callback handler's trace if it has one
330
- if self.callback_handler and hasattr(self.callback_handler, '_trace'):
331
- self.callback_handler._trace.append(step)
332
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Added to callback handler trace")
333
-
334
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Captured tool output for {tool_name}")
335
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Output length: {len(result_str)} chars")
336
- self._vprint(f"[DASEIN][TOOL_WRAPPER] First 200 chars: {result_str[:200]}")
337
- if self.callback_handler and hasattr(self.callback_handler, '_trace'):
338
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Callback handler trace length after capture: {len(self.callback_handler._trace)}")
339
-
340
- except Exception as e:
341
- self._vprint(f"[DASEIN][TOOL_WRAPPER] Error capturing tool output: {e}")
342
-
343
-
344
- class DaseinCallbackHandler(BaseCallbackHandler):
345
- """
346
- Callback handler that captures step-by-step traces and implements rule injection.
347
- """
348
-
349
- def __init__(self, weights=None, llm=None, is_langgraph=False, coordinator_node=None, planning_nodes=None, verbose: bool = False):
350
- super().__init__()
351
- self._weights = weights
352
- self._selected_rules = [] # Rules selected for this run
353
- self._injection_guard = set() # Prevent duplicate injections
354
- self._last_modified_prompts = [] # Store modified prompts for LLM wrapper
355
- self._llm = llm # Store reference to LLM for micro-turn calls
356
- self._tool_name_by_run_id = {} # Track tool names by run_id
357
- self._discovered_tools = set() # Track tools discovered during execution
358
- self._wrapped_dynamic_tools = {} # Cache of wrapped dynamic tools
359
- self._is_langgraph = is_langgraph # Flag to skip planning rule injection for LangGraph
360
- self._coordinator_node = coordinator_node # Coordinator node (for future targeted injection)
361
- self._planning_nodes = planning_nodes if planning_nodes else set() # Planning-capable nodes (including subgraph children)
362
- self._current_chain_node = None # Track current LangGraph node
363
- self._agent_was_recreated = False # Track if agent was successfully recreated
364
- self._function_calls_made = {} # Track function calls: {function_name: [{'step': N, 'ts': timestamp}]}
365
- self._trace = [] # Instance-level trace storage (not global) for thread-safety
366
- self._verbose = verbose
367
- self._start_times = {} # Track start times for duration calculation: {step_index: datetime}
368
- self._vprint(f"[DASEIN][CALLBACK] Initialized callback handler (LangGraph: {is_langgraph})")
369
- if coordinator_node:
370
- self._vprint(f"[DASEIN][CALLBACK] Coordinator: {coordinator_node}")
371
- if planning_nodes:
372
- self._vprint(f"[DASEIN][CALLBACK] Planning nodes: {planning_nodes}")
373
- self._vprint(f"[DASEIN][CALLBACK] Dynamic tool detection enabled (tools discovered at runtime)")
374
-
375
- def _vprint(self, message: str, force: bool = False):
376
- """Helper for verbose printing."""
377
- _vprint(message, self._verbose, force)
378
-
379
- def reset_run_state(self):
380
- """Reset state that should be cleared between runs."""
381
- self._function_calls_made = {}
382
- self._injection_guard = set()
383
- self._trace = [] # Clear instance trace
384
- self._start_times = {} # Clear start times
385
- self._vprint(f"[DASEIN][CALLBACK] Reset run state (trace, function calls, injection guard, and start times cleared)")
386
-
387
- def on_llm_start(
388
- self,
389
- serialized: Dict[str, Any],
390
- prompts: List[str],
391
- **kwargs: Any,
392
- ) -> None:
393
- """Called when an LLM starts running."""
394
- model_name = serialized.get("name", "unknown") if serialized else "unknown"
395
-
396
- # 🎯 CRITICAL: Track current node from kwargs metadata (LangGraph includes langgraph_node)
397
- if self._is_langgraph and 'metadata' in kwargs and isinstance(kwargs['metadata'], dict):
398
- if 'langgraph_node' in kwargs['metadata']:
399
- node_name = kwargs['metadata']['langgraph_node']
400
- self._current_chain_node = node_name
401
-
402
- # Inject rules if applicable
403
- modified_prompts = self._inject_rule_if_applicable("llm_start", model_name, prompts)
404
-
405
- # Store the modified prompts for the LLM wrapper to use
406
- self._last_modified_prompts = modified_prompts
407
-
408
- # 🚨 OPTIMIZED: For LangGraph, check if kwargs contains 'invocation_params' with messages
409
- # Extract the most recent message instead of full history
410
- # Use from_end=True to capture the END of system prompts (where user's actual query is)
411
- if 'invocation_params' in kwargs and 'messages' in kwargs['invocation_params']:
412
- args_excerpt = self._extract_recent_message({'messages': kwargs['invocation_params']['messages']})
413
- else:
414
- args_excerpt = self._excerpt(" | ".join(modified_prompts), from_end=True)
415
-
416
- # GNN-related fields
417
- step_index = len(self._trace)
418
-
419
- # Track which rules triggered at this step (llm_start rules)
420
- rule_triggered_here = []
421
- if hasattr(self, '_selected_rules') and self._selected_rules:
422
- for rule_meta in self._selected_rules:
423
- if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
424
- rule_obj, _metadata = rule_meta
425
- else:
426
- rule_obj = rule_meta
427
- target_step_type = getattr(rule_obj, 'target_step_type', '')
428
- if target_step_type in ['llm_start', 'chain_start']:
429
- rule_triggered_here.append(getattr(rule_obj, 'id', 'unknown'))
430
-
431
- # Record start time for duration calculation
432
- start_time = datetime.now()
433
- self._start_times[step_index] = start_time
434
-
435
- step = {
436
- "step_type": "llm_start",
437
- "tool_name": model_name,
438
- "args_excerpt": args_excerpt,
439
- "outcome": "",
440
- "ts": start_time.isoformat(),
441
- "run_id": None,
442
- "parent_run_id": None,
443
- "node": self._current_chain_node, # LangGraph node name (if available)
444
- # GNN step-level fields
445
- "step_index": step_index,
446
- "rule_triggered_here": rule_triggered_here,
447
- }
448
- self._trace.append(step)
449
- # self._vprint(f"[DASEIN][CALLBACK] Captured llm_start: {len(_TRACE)} total steps") # Commented out - too noisy
450
-
451
- def on_llm_end(
452
- self,
453
- response: Any,
454
- **kwargs: Any,
455
- ) -> None:
456
- """Called when an LLM ends running."""
457
- outcome = ""
458
- try:
459
- # Debug: Print ALL available data to see what we're getting
460
- # print(f"[DEBUG] on_llm_end called")
461
- # print(f" response type: {type(response)}")
462
- # print(f" kwargs keys: {kwargs.keys()}")
463
-
464
- # Try multiple extraction strategies
465
- # Strategy 1: Standard LangChain LLMResult structure
466
- if hasattr(response, 'generations') and response.generations:
467
- if len(response.generations) > 0:
468
- first_gen = response.generations[0]
469
- if isinstance(first_gen, list) and len(first_gen) > 0:
470
- generation = first_gen[0]
471
- else:
472
- generation = first_gen
473
-
474
- # Try multiple content fields
475
- if hasattr(generation, 'text') and generation.text:
476
- outcome = self._excerpt(generation.text)
477
- elif hasattr(generation, 'message'):
478
- if hasattr(generation.message, 'content'):
479
- outcome = self._excerpt(generation.message.content)
480
- elif hasattr(generation.message, 'text'):
481
- outcome = self._excerpt(generation.message.text)
482
- elif hasattr(generation, 'content'):
483
- outcome = self._excerpt(generation.content)
484
- else:
485
- outcome = self._excerpt(str(generation))
486
-
487
- # Strategy 2: Check if response itself has content
488
- elif hasattr(response, 'content'):
489
- outcome = self._excerpt(response.content)
490
-
491
- # Strategy 3: Check kwargs for output/response
492
- elif 'output' in kwargs:
493
- outcome = self._excerpt(str(kwargs['output']))
494
- elif 'result' in kwargs:
495
- outcome = self._excerpt(str(kwargs['result']))
496
-
497
- # Fallback
498
- if not outcome:
499
- outcome = self._excerpt(str(response))
500
-
501
- # Debug: Warn if still empty
502
- if not outcome or len(outcome) == 0:
503
- self._vprint(f"[DASEIN][CALLBACK] WARNING: on_llm_end got empty outcome!")
504
- print(f" Response: {str(response)[:200]}")
505
- print(f" kwargs keys: {list(kwargs.keys())}")
506
-
507
- except (AttributeError, IndexError, TypeError) as e:
508
- self._vprint(f"[DASEIN][CALLBACK] Error in on_llm_end: {e}")
509
- outcome = self._excerpt(str(response))
510
-
511
- # 🎯 CRITICAL: Extract function calls for state tracking (agent-agnostic)
512
- try:
513
- if hasattr(response, 'generations') and response.generations:
514
- first_gen = response.generations[0]
515
- if isinstance(first_gen, list) and len(first_gen) > 0:
516
- generation = first_gen[0]
517
- else:
518
- generation = first_gen
519
-
520
- # Check for function_call in message additional_kwargs
521
- if hasattr(generation, 'message') and hasattr(generation.message, 'additional_kwargs'):
522
- func_call = generation.message.additional_kwargs.get('function_call')
523
- if func_call and isinstance(func_call, dict) and 'name' in func_call:
524
- func_name = func_call['name']
525
- step_num = len(self._trace)
526
-
527
- # Extract arguments and create preview
528
- args_str = func_call.get('arguments', '')
529
- preview = ''
530
- if args_str and len(args_str) > 0:
531
- # Take first 100 chars as preview
532
- preview = args_str[:100].replace('\n', ' ').replace('\r', '')
533
- if len(args_str) > 100:
534
- preview += '...'
535
-
536
- call_info = {
537
- 'step': step_num,
538
- 'ts': datetime.now().isoformat(),
539
- 'preview': preview
540
- }
541
-
542
- if func_name not in self._function_calls_made:
543
- self._function_calls_made[func_name] = []
544
- self._function_calls_made[func_name].append(call_info)
545
-
546
- self._vprint(f"[DASEIN][STATE] Tracked function call: {func_name} (count: {len(self._function_calls_made[func_name])})")
547
- except Exception as e:
548
- pass # Silently skip if function call extraction fails
549
-
550
- # Extract token usage from response metadata
551
- input_tokens = 0
552
- output_tokens = 0
553
- try:
554
- # DEBUG: Print response structure for first LLM call
555
- # Uncomment to see token structure:
556
- # import json
557
- # print(f"[DEBUG] Response structure:")
558
- # print(f" Has llm_output: {hasattr(response, 'llm_output')}")
559
- # if hasattr(response, 'llm_output'):
560
- # print(f" llm_output keys: {response.llm_output.keys() if response.llm_output else None}")
561
- # print(f" Has generations: {hasattr(response, 'generations')}")
562
- # if hasattr(response, 'generations') and response.generations:
563
- # gen = response.generations[0][0] if isinstance(response.generations[0], list) else response.generations[0]
564
- # print(f" generation_info: {gen.generation_info if hasattr(gen, 'generation_info') else None}")
565
-
566
- # Try LangChain's standard llm_output field
567
- if hasattr(response, 'llm_output') and response.llm_output:
568
- llm_output = response.llm_output
569
- # Different providers use different field names
570
- if 'token_usage' in llm_output:
571
- usage = llm_output['token_usage']
572
- input_tokens = usage.get('prompt_tokens', 0) or usage.get('input_tokens', 0)
573
- output_tokens = usage.get('completion_tokens', 0) or usage.get('output_tokens', 0)
574
- elif 'usage_metadata' in llm_output:
575
- usage = llm_output['usage_metadata']
576
- input_tokens = usage.get('input_tokens', 0) or usage.get('prompt_tokens', 0)
577
- output_tokens = usage.get('output_tokens', 0) or usage.get('completion_tokens', 0)
578
-
579
- # Try generations metadata (Google GenAI format)
580
- if (input_tokens == 0 and output_tokens == 0) and hasattr(response, 'generations') and response.generations:
581
- first_gen = response.generations[0]
582
- if isinstance(first_gen, list) and len(first_gen) > 0:
583
- gen = first_gen[0]
584
- else:
585
- gen = first_gen
586
-
587
- # Check message.usage_metadata (Google GenAI stores it here!)
588
- if hasattr(gen, 'message') and hasattr(gen.message, 'usage_metadata'):
589
- usage = gen.message.usage_metadata
590
- input_tokens = usage.get('input_tokens', 0)
591
- output_tokens = usage.get('output_tokens', 0)
592
-
593
- # Fallback: Check generation_info
594
- elif hasattr(gen, 'generation_info') and gen.generation_info:
595
- gen_info = gen.generation_info
596
- if 'usage_metadata' in gen_info:
597
- usage = gen_info['usage_metadata']
598
- input_tokens = usage.get('prompt_token_count', 0) or usage.get('input_tokens', 0)
599
- output_tokens = usage.get('candidates_token_count', 0) or usage.get('output_tokens', 0)
600
-
601
- # Log if we got tokens
602
- # if input_tokens > 0 or output_tokens > 0:
603
- # self._vprint(f"[DASEIN][TOKENS] Captured: {input_tokens} in, {output_tokens} out")
604
-
605
- except Exception as e:
606
- # Print error for debugging
607
- self._vprint(f"[DASEIN][CALLBACK] Error extracting tokens: {e}")
608
- import traceback
609
- traceback.print_exc()
610
-
611
- # GNN-related fields: compute tokens_delta
612
- step_index = len(self._trace)
613
- tokens_delta = 0
614
- # Find previous step with tokens_output to compute delta
615
- for prev_step in reversed(self._trace):
616
- if 'tokens_output' in prev_step and prev_step['tokens_output'] > 0:
617
- tokens_delta = output_tokens - prev_step['tokens_output']
618
- break
619
-
620
- # Calculate duration_ms by matching with corresponding llm_start
621
- duration_ms = 0
622
- for i in range(len(self._trace) - 1, -1, -1):
623
- if self._trace[i].get('step_type') == 'llm_start':
624
- # Found the matching llm_start
625
- if i in self._start_times:
626
- start_time = self._start_times[i]
627
- end_time = datetime.now()
628
- duration_ms = int((end_time - start_time).total_seconds() * 1000)
629
- # Update the llm_start step with duration_ms
630
- self._trace[i]['duration_ms'] = duration_ms
631
- break
632
-
633
- step = {
634
- "step_type": "llm_end",
635
- "tool_name": "",
636
- "args_excerpt": "",
637
- "outcome": self._excerpt(outcome, max_len=1000), # Truncate to 1000 chars
638
- "ts": datetime.now().isoformat(),
639
- "run_id": None,
640
- "parent_run_id": None,
641
- "tokens_input": input_tokens,
642
- "tokens_output": output_tokens,
643
- "node": self._current_chain_node, # LangGraph node name (if available)
644
- # GNN step-level fields
645
- "step_index": step_index,
646
- "tokens_delta": tokens_delta,
647
- "duration_ms": duration_ms,
648
- }
649
- self._trace.append(step)
650
-
651
- def on_agent_action(
652
- self,
653
- action: Any,
654
- **kwargs: Any,
655
- ) -> None:
656
- """Called when an agent takes an action."""
657
- tool_name = getattr(action, 'tool', 'unknown')
658
- args_excerpt = self._excerpt(str(getattr(action, 'tool_input', '')))
659
- outcome = self._excerpt(str(getattr(action, 'log', '')))
660
-
661
- step = {
662
- "step_type": "agent_action",
663
- "tool_name": tool_name,
664
- "args_excerpt": args_excerpt,
665
- "outcome": outcome,
666
- "ts": datetime.now().isoformat(),
667
- "run_id": None,
668
- "parent_run_id": None,
669
- }
670
- self._trace.append(step)
671
-
672
- def on_agent_finish(
673
- self,
674
- finish: Any,
675
- **kwargs: Any,
676
- ) -> None:
677
- """Called when an agent finishes."""
678
- outcome = self._excerpt(str(getattr(finish, 'return_values', '')))
679
-
680
- step = {
681
- "step_type": "agent_finish",
682
- "tool_name": None,
683
- "args_excerpt": "",
684
- "outcome": outcome,
685
- "ts": datetime.now().isoformat(),
686
- "run_id": None,
687
- "parent_run_id": None,
688
- }
689
- self._trace.append(step)
690
-
691
- def on_tool_start(
692
- self,
693
- serialized: Dict[str, Any],
694
- input_str: str,
695
- *,
696
- run_id: str,
697
- parent_run_id: Optional[str] = None,
698
- tags: Optional[List[str]] = None,
699
- metadata: Optional[Dict[str, Any]] = None,
700
- inputs: Optional[Dict[str, Any]] = None,
701
- **kwargs: Any,
702
- ) -> None:
703
- """Called when a tool starts running.
704
-
705
- This is where we detect and track dynamic tools that weren't
706
- statically attached to the agent at init time.
707
- """
708
- tool_name = serialized.get("name", "unknown") if serialized else "unknown"
709
-
710
- # Track discovered tools for reporting
711
- if tool_name != "unknown" and tool_name not in self._discovered_tools:
712
- self._discovered_tools.add(tool_name)
713
- # Tool discovered and tracked (silently)
714
-
715
- # Store tool name for later use in on_tool_end
716
- self._tool_name_by_run_id[run_id] = tool_name
717
-
718
- # Apply tool-level rule injection
719
- # self._vprint(f"[DASEIN][CALLBACK] on_tool_start called!") # Commented out - too noisy
720
- # self._vprint(f"[DASEIN][CALLBACK] Tool: {tool_name}") # Commented out - too noisy
721
- # self._vprint(f"[DASEIN][CALLBACK] Input: {input_str[:100]}...") # Commented out - too noisy
722
- # self._vprint(f"[DASEIN][APPLY] on_tool_start: selected_rules={len(self._selected_rules)}") # Commented out - too noisy
723
- modified_input = self._inject_tool_rule_if_applicable("tool_start", tool_name, input_str)
724
-
725
- args_excerpt = self._excerpt(modified_input)
726
-
727
- # GNN-related fields: capture step-level metrics
728
- step_index = len(self._trace)
729
- tool_input_chars = len(str(input_str))
730
-
731
- # Track which rules triggered at this step
732
- rule_triggered_here = []
733
- if hasattr(self, '_selected_rules') and self._selected_rules:
734
- for rule_meta in self._selected_rules:
735
- if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
736
- rule_obj, _metadata = rule_meta
737
- else:
738
- rule_obj = rule_meta
739
- if getattr(rule_obj, 'target_step_type', '') == "tool_start":
740
- rule_triggered_here.append(getattr(rule_obj, 'id', 'unknown'))
741
-
742
- # Record start time for duration calculation (keyed by run_id for tools)
743
- start_time = datetime.now()
744
- self._start_times[run_id] = start_time
745
-
746
- step = {
747
- "step_type": "tool_start",
748
- "tool_name": tool_name,
749
- "args_excerpt": args_excerpt,
750
- "outcome": "",
751
- "ts": start_time.isoformat(),
752
- "run_id": run_id,
753
- "parent_run_id": parent_run_id,
754
- "node": self._current_chain_node, # LangGraph node name (if available)
755
- # GNN step-level fields
756
- "step_index": step_index,
757
- "tool_input_chars": tool_input_chars,
758
- "rule_triggered_here": rule_triggered_here,
759
- }
760
- self._trace.append(step)
761
-
762
- def on_tool_end(
763
- self,
764
- output: str,
765
- *,
766
- run_id: str,
767
- parent_run_id: Optional[str] = None,
768
- tags: Optional[List[str]] = None,
769
- **kwargs: Any,
770
- ) -> None:
771
- """Called when a tool ends running."""
772
- # Get the tool name from the corresponding tool_start
773
- tool_name = self._tool_name_by_run_id.get(run_id, "unknown")
774
-
775
- # Handle different output types (LangGraph may pass ToolMessage objects)
776
- output_str = str(output)
777
- outcome = self._excerpt(output_str)
778
-
779
- # self._vprint(f"[DASEIN][CALLBACK] on_tool_end called!") # Commented out - too noisy
780
- # self._vprint(f"[DASEIN][CALLBACK] Tool: {tool_name}") # Commented out - too noisy
781
- # self._vprint(f"[DASEIN][CALLBACK] Output length: {len(output_str)} chars") # Commented out - too noisy
782
- # self._vprint(f"[DASEIN][CALLBACK] Outcome length: {len(outcome)} chars") # Commented out - too noisy
783
-
784
- # GNN-related fields: capture tool output metrics
785
- step_index = len(self._trace)
786
- tool_output_chars = len(output_str)
787
-
788
- # Estimate tool_output_items (heuristic: count lines, or rows if SQL-like)
789
- tool_output_items = 0
790
- try:
791
- # Try to count lines as a proxy for items
792
- if output_str:
793
- tool_output_items = output_str.count('\n') + 1
794
- except:
795
- tool_output_items = 0
796
-
797
- # Calculate duration_ms using run_id to match with tool_start
798
- duration_ms = 0
799
- if run_id in self._start_times:
800
- start_time = self._start_times[run_id]
801
- end_time = datetime.now()
802
- duration_ms = int((end_time - start_time).total_seconds() * 1000)
803
- # Update the corresponding tool_start step with duration_ms
804
- for i in range(len(self._trace) - 1, -1, -1):
805
- if self._trace[i].get('step_type') == 'tool_start' and self._trace[i].get('run_id') == run_id:
806
- self._trace[i]['duration_ms'] = duration_ms
807
- break
808
- # Clean up start time
809
- del self._start_times[run_id]
810
-
811
- # Extract available selectors from DOM-like output (web browse agents)
812
- available_selectors = None
813
- if tool_name in ['extract_text', 'get_elements', 'extract_hyperlinks', 'extract_content']:
814
- available_selectors = self._extract_semantic_selectors(output_str)
815
-
816
- step = {
817
- "step_type": "tool_end",
818
- "tool_name": tool_name,
819
- "args_excerpt": "",
820
- "outcome": self._excerpt(outcome, max_len=1000), # Truncate to 1000 chars
821
- "ts": datetime.now().isoformat(),
822
- "run_id": run_id,
823
- "parent_run_id": parent_run_id,
824
- "node": self._current_chain_node, # LangGraph node name (if available)
825
- # GNN step-level fields
826
- "step_index": step_index,
827
- "tool_output_chars": tool_output_chars,
828
- "tool_output_items": tool_output_items,
829
- "duration_ms": duration_ms,
830
- }
831
-
832
- # Add available_selectors only if found (keep trace light)
833
- if available_selectors:
834
- step["available_selectors"] = available_selectors
835
- self._trace.append(step)
836
-
837
- # Clean up the stored tool name
838
- if run_id in self._tool_name_by_run_id:
839
- del self._tool_name_by_run_id[run_id]
840
-
841
- def on_tool_error(
842
- self,
843
- error: BaseException,
844
- *,
845
- run_id: str,
846
- parent_run_id: Optional[str] = None,
847
- tags: Optional[List[str]] = None,
848
- **kwargs: Any,
849
- ) -> None:
850
- """Called when a tool encounters an error."""
851
- error_msg = self._excerpt(str(error))
852
-
853
- step = {
854
- "step_type": "tool_error",
855
- "tool_name": "",
856
- "args_excerpt": "",
857
- "outcome": f"ERROR: {error_msg}",
858
- "ts": datetime.now().isoformat(),
859
- "run_id": run_id,
860
- "parent_run_id": parent_run_id,
861
- }
862
- self._trace.append(step)
863
-
864
- def on_chain_start(
865
- self,
866
- serialized: Dict[str, Any],
867
- inputs: Dict[str, Any],
868
- **kwargs: Any,
869
- ) -> None:
870
- """Called when a chain starts running."""
871
- chain_name = serialized.get("name", "unknown") if serialized else "unknown"
872
- # self._vprint(f"[DASEIN][CALLBACK] on_chain_start called!") # Commented out - too noisy
873
- # self._vprint(f"[DASEIN][CALLBACK] Chain: {chain_name}") # Commented out - too noisy
874
-
875
- # 🚨 OPTIMIZED: For LangGraph agents, suppress redundant chain_start events
876
- # LangGraph fires on_chain_start for every internal node, creating noise
877
- # We already capture llm_start, llm_end, tool_start, tool_end which are more meaningful
878
- if self._is_langgraph:
879
- # Track current chain node for future targeted injection
880
- # 🎯 CRITICAL: Extract actual node name from metadata (same as on_llm_start)
881
- if 'metadata' in kwargs and isinstance(kwargs['metadata'], dict):
882
- if 'langgraph_node' in kwargs['metadata']:
883
- self._current_chain_node = kwargs['metadata']['langgraph_node']
884
- else:
885
- self._current_chain_node = chain_name
886
- else:
887
- self._current_chain_node = chain_name
888
-
889
- # self._vprint(f"[DASEIN][CALLBACK] Suppressing redundant chain_start for LangGraph agent") # Commented out - too noisy
890
- # Still handle tool executors
891
- if chain_name in {"tools", "ToolNode", "ToolExecutor"}:
892
- # self._vprint(f"[DASEIN][CALLBACK] Bridging chain_start to tool_start for {chain_name}") # Commented out - too noisy
893
- pass
894
- self._handle_tool_executor_start(serialized, inputs, **kwargs)
895
- return
896
-
897
- # For standard LangChain agents, keep chain_start events
898
- # Bridge to tool_start for tool executors
899
- if chain_name in {"tools", "ToolNode", "ToolExecutor"}:
900
- # self._vprint(f"[DASEIN][CALLBACK] Bridging chain_start to tool_start for {chain_name}") # Commented out - too noisy
901
- self._handle_tool_executor_start(serialized, inputs, **kwargs)
902
-
903
- args_excerpt = self._excerpt(str(inputs))
904
-
905
- # Record start time for duration calculation
906
- step_index = len(self._trace)
907
- start_time = datetime.now()
908
- self._start_times[f"chain_{step_index}"] = start_time
909
-
910
- step = {
911
- "step_type": "chain_start",
912
- "tool_name": chain_name,
913
- "args_excerpt": args_excerpt,
914
- "outcome": "",
915
- "ts": start_time.isoformat(),
916
- "run_id": None,
917
- "parent_run_id": None,
918
- "step_index": step_index,
919
- }
920
- self._trace.append(step)
921
-
922
- def on_chain_end(
923
- self,
924
- outputs: Dict[str, Any],
925
- **kwargs: Any,
926
- ) -> None:
927
- """Called when a chain ends running."""
928
- # 🚨 OPTIMIZED: Suppress redundant chain_end for LangGraph agents
929
- if self._is_langgraph:
930
- return
931
-
932
- outcome = self._excerpt(str(outputs))
933
-
934
- # Calculate duration_ms by matching with corresponding chain_start
935
- duration_ms = 0
936
- for i in range(len(self._trace) - 1, -1, -1):
937
- if self._trace[i].get('step_type') == 'chain_start':
938
- # Found the matching chain_start
939
- chain_key = f"chain_{i}"
940
- if chain_key in self._start_times:
941
- start_time = self._start_times[chain_key]
942
- end_time = datetime.now()
943
- duration_ms = int((end_time - start_time).total_seconds() * 1000)
944
- # Update the chain_start step with duration_ms
945
- self._trace[i]['duration_ms'] = duration_ms
946
- # Clean up start time
947
- del self._start_times[chain_key]
948
- break
949
-
950
- step = {
951
- "step_type": "chain_end",
952
- "tool_name": "",
953
- "args_excerpt": "",
954
- "outcome": outcome,
955
- "ts": datetime.now().isoformat(),
956
- "run_id": None,
957
- "parent_run_id": None,
958
- "duration_ms": duration_ms,
959
- }
960
- self._trace.append(step)
961
-
962
- def on_chain_error(
963
- self,
964
- error: BaseException,
965
- **kwargs: Any,
966
- ) -> None:
967
- """Called when a chain encounters an error."""
968
- error_msg = self._excerpt(str(error))
969
-
970
- step = {
971
- "step_type": "chain_error",
972
- "tool_name": "",
973
- "args_excerpt": "",
974
- "outcome": f"ERROR: {error_msg}",
975
- "ts": datetime.now().isoformat(),
976
- "run_id": None,
977
- "parent_run_id": None,
978
- }
979
- self._trace.append(step)
980
-
981
- def _extract_recent_message(self, inputs: Dict[str, Any]) -> str:
982
- """
983
- Extract the most recent message from LangGraph inputs to show thought progression.
984
-
985
- For LangGraph agents, inputs contain {'messages': [msg1, msg2, ...]}.
986
- Instead of showing the entire history, we extract just the last message.
987
- """
988
- try:
989
- # Check if this is a LangGraph message format
990
- if isinstance(inputs, dict) and 'messages' in inputs:
991
- messages = inputs['messages']
992
- if isinstance(messages, list) and len(messages) > 0:
993
- # Get the most recent message
994
- last_msg = messages[-1]
995
-
996
- # Extract content based on message type
997
- if hasattr(last_msg, 'content'):
998
- # LangChain message object
999
- content = last_msg.content
1000
- msg_type = getattr(last_msg, 'type', 'unknown')
1001
- return self._excerpt(f"[{msg_type}] {content}")
1002
- elif isinstance(last_msg, tuple) and len(last_msg) >= 2:
1003
- # Tuple format: (role, content)
1004
- return self._excerpt(f"[{last_msg[0]}] {last_msg[1]}")
1005
- else:
1006
- # Unknown format, convert to string
1007
- return self._excerpt(str(last_msg))
1008
-
1009
- # For non-message inputs, check if it's a list of actions/tool calls
1010
- if isinstance(inputs, list) and len(inputs) > 0:
1011
- # This might be tool call info
1012
- return self._excerpt(str(inputs[0]))
1013
-
1014
- # Fall back to original behavior for non-LangGraph agents
1015
- return self._excerpt(str(inputs))
1016
-
1017
- except Exception as e:
1018
- # On any error, fall back to original behavior
1019
- return self._excerpt(str(inputs))
1020
-
1021
- def _excerpt(self, obj: Any, max_len: int = 250, from_end: bool = False) -> str:
1022
- """
1023
- Truncate text to max_length with ellipsis.
1024
-
1025
- Args:
1026
- obj: Object to convert to string and truncate
1027
- max_len: Maximum length of excerpt
1028
- from_end: If True, take LAST max_len chars (better for system prompts).
1029
- If False, take FIRST max_len chars (better for tool args).
1030
- """
1031
- text = str(obj)
1032
- if len(text) <= max_len:
1033
- return text
1034
-
1035
- if from_end:
1036
- # Take last X chars - better for system prompts where the end contains user's actual query
1037
- return "..." + text[-(max_len-3):]
1038
- else:
1039
- # Take first X chars - better for tool inputs
1040
- return text[:max_len-3] + "..."
1041
-
1042
- def _extract_semantic_selectors(self, html_text: str) -> List[Dict[str, int]]:
1043
- """
1044
- Extract semantic HTML tags from output for grounding web browse rules.
1045
- Only extracts semantic tags (nav, header, h1, etc.) to keep trace lightweight.
1046
-
1047
- Args:
1048
- html_text: Output text that may contain HTML
1049
-
1050
- Returns:
1051
- List of {"tag": str, "count": int} sorted by count descending, or None if no HTML
1052
- """
1053
- import re
1054
-
1055
- # Quick check: does this look like HTML?
1056
- if '<' not in html_text or '>' not in html_text:
1057
- return None
1058
-
1059
- # Semantic tags we care about (prioritized for web browse agents)
1060
- semantic_tags = [
1061
- # Navigation/Structure (highest priority)
1062
- 'nav', 'header', 'footer', 'main', 'article', 'section', 'aside',
1063
-
1064
- # Headers (critical for "find headers" queries!)
1065
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
1066
-
1067
- # Interactive
1068
- 'a', 'button', 'form', 'input', 'textarea', 'select', 'label',
1069
-
1070
- # Lists (often used for navigation)
1071
- 'ul', 'ol', 'li',
1072
-
1073
- # Tables (data extraction)
1074
- 'table', 'thead', 'tbody', 'tr', 'th', 'td',
1075
-
1076
- # Media
1077
- 'img', 'video', 'audio'
1078
- ]
1079
-
1080
- # Count occurrences of each semantic tag
1081
- found_tags = {}
1082
- for tag in semantic_tags:
1083
- # Pattern: <tag ...> or <tag> (opening tags only)
1084
- pattern = f'<{tag}[\\s>]'
1085
- matches = re.findall(pattern, html_text, re.IGNORECASE)
1086
- if matches:
1087
- found_tags[tag] = len(matches)
1088
-
1089
- # Return None if no semantic tags found
1090
- if not found_tags:
1091
- return None
1092
-
1093
- # Convert to list format, sorted by count descending
1094
- # Limit to top 15 to keep trace light
1095
- result = [{"tag": tag, "count": count}
1096
- for tag, count in sorted(found_tags.items(), key=lambda x: -x[1])]
1097
- return result[:15] # Top 15 most common tags
1098
-
1099
- def set_selected_rules(self, rules: List[Dict[str, Any]]):
1100
- """Set the rules selected for this run.
1101
- Normalize incoming dicts/tuples into attribute-accessible rule objects.
1102
- """
1103
- try:
1104
- from types import SimpleNamespace
1105
- normalized = []
1106
- for item in rules or []:
1107
- # Unwrap (rule, metadata) tuples if present
1108
- if isinstance(item, tuple) and len(item) == 2:
1109
- rule_candidate = item[0]
1110
- else:
1111
- rule_candidate = item
1112
- # Convert dicts to attribute-accessible objects
1113
- if isinstance(rule_candidate, dict):
1114
- # Ensure advice_text exists
1115
- if 'advice_text' not in rule_candidate and 'advice' in rule_candidate:
1116
- rule_candidate['advice_text'] = rule_candidate.get('advice')
1117
- normalized.append(SimpleNamespace(**rule_candidate))
1118
- else:
1119
- normalized.append(rule_candidate)
1120
- self._selected_rules = normalized
1121
- except Exception:
1122
- # Fallback to raw rules
1123
- self._selected_rules = rules
1124
-
1125
- def get_trace(self) -> List[Dict[str, Any]]:
1126
- """Get the current trace (instance-level, thread-safe)."""
1127
- return self._trace.copy()
1128
-
1129
- def _inject_first_turn_override(self, prompts: List[str]) -> List[str]:
1130
- """Inject a generic first-turn override to own turn 1."""
1131
- if not prompts:
1132
- return prompts
1133
-
1134
- # Create a generic first-turn override
1135
- first_turn_override = """🚨🚨🚨 CRITICAL SYSTEM DIRECTIVE 🚨🚨🚨
1136
- ⚠️ MANDATORY: You MUST follow this exact sequence or the task will FAIL
1137
-
1138
- TURN 1 REQUIREMENT:
1139
- - Output ONLY: Action: sql_db_list_tables
1140
- Action Input: ACK_RULES:[r1]
1141
- - Do NOT use any other tools
1142
- - Do NOT perform any planning
1143
- - Do NOT output anything else
1144
-
1145
- TURN 2+ (After ACK):
1146
- - If ACK was correct, proceed with normal tools and schema
1147
- - Skip table discovery and schema introspection
1148
- - Use known tables directly
1149
-
1150
- 🚨 FAILURE TO ACK IN TURN 1 = IMMEDIATE TASK TERMINATION 🚨
1151
-
1152
- """
1153
-
1154
- # Put the injection at the VERY BEGINNING of the system prompt
1155
- modified_prompts = prompts.copy()
1156
- if modified_prompts:
1157
- modified_prompts[0] = first_turn_override + modified_prompts[0]
1158
-
1159
- self._vprint(f"[DASEIN][APPLY] Injected first-turn override")
1160
- return modified_prompts
1161
-
1162
- def _should_inject_rule(self, step_type: str, tool_name: str) -> bool:
1163
- """Determine if we should inject a rule at this step."""
1164
- # Inject for LLM starts (system-level rules) and tool starts (tool-level rules)
1165
- if step_type == "llm_start":
1166
- return True
1167
- if step_type == "tool_start":
1168
- return True
1169
- return False
1170
-
1171
- def _inject_rule_if_applicable(self, step_type: str, tool_name: str, prompts: List[str]) -> List[str]:
1172
- """Inject rules into prompts if applicable."""
1173
- if not self._should_inject_rule(step_type, tool_name):
1174
- return prompts
1175
-
1176
- # If no rules selected yet, return prompts unchanged
1177
- if not self._selected_rules:
1178
- return prompts
1179
-
1180
- # Check guard to prevent duplicate injection
1181
- # 🎯 CRITICAL: For LangGraph planning nodes, SKIP the guard - we need to inject on EVERY call
1182
- # because the same node (e.g., supervisor) can be called multiple times dynamically
1183
- use_guard = True
1184
- if hasattr(self, '_is_langgraph') and self._is_langgraph:
1185
- if step_type == 'llm_start' and hasattr(self, '_current_chain_node'):
1186
- # For planning nodes, skip guard to allow re-injection on subsequent calls
1187
- if hasattr(self, '_planning_nodes') and self._current_chain_node in self._planning_nodes:
1188
- use_guard = False
1189
-
1190
- if use_guard:
1191
- guard_key = (step_type, tool_name)
1192
- if guard_key in self._injection_guard:
1193
- return prompts
1194
-
1195
- try:
1196
- # Inject rules that target llm_start and tool_start (both go to system prompt)
1197
- system_rules = []
1198
- for rule_meta in self._selected_rules:
1199
- # Handle tuple format from select_rules: (rule, metadata)
1200
- if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
1201
- rule, metadata = rule_meta
1202
- elif isinstance(rule_meta, dict):
1203
- if 'rule' in rule_meta:
1204
- rule = rule_meta.get('rule', {})
1205
- else:
1206
- rule = rule_meta
1207
- else:
1208
- rule = rule_meta
1209
-
1210
- # Check if this rule targets system-level injection (llm_start only)
1211
- target_step_type = getattr(rule, 'target_step_type', '')
1212
-
1213
- # 🚨 CRITICAL: For LangGraph agents, only skip planning rules if agent was successfully recreated
1214
- # If recreation failed, we need to inject via callback as fallback
1215
- if step_type == 'llm_start' and hasattr(self, '_is_langgraph') and self._is_langgraph:
1216
- # Only skip if agent was actually recreated with planning rules embedded
1217
- if hasattr(self, '_agent_was_recreated') and self._agent_was_recreated:
1218
- if target_step_type in ['llm_start', 'chain_start']:
1219
- self._vprint(f"[DASEIN][CALLBACK] Skipping planning rule {getattr(rule, 'id', 'unknown')} for LangGraph agent (already injected at creation)")
1220
- continue
1221
-
1222
- # 🎯 COORDINATOR-GATED INJECTION: Only apply planning rules when executing planning-capable nodes
1223
- if target_step_type in ['llm_start', 'chain_start']:
1224
- # If we have planning nodes, only inject planning rules when we're in one of them
1225
- if hasattr(self, '_planning_nodes') and self._planning_nodes:
1226
- current_node = getattr(self, '_current_chain_node', None)
1227
- # Check if current node is in the planning nodes set
1228
- if current_node not in self._planning_nodes:
1229
- # Silently skip non-planning nodes
1230
- continue
1231
- # Injecting into planning node (logged in detailed injection log below)
1232
-
1233
- advice = getattr(rule, 'advice_text', getattr(rule, 'advice', ''))
1234
- if advice:
1235
- system_rules.append(advice)
1236
-
1237
- # Apply system-level rules if any
1238
- if system_rules and prompts:
1239
- modified_prompts = prompts.copy()
1240
- system_prompt = modified_prompts[0]
1241
-
1242
- # Combine all system rules with much stronger language
1243
- rule_injections = []
1244
- for advice in system_rules:
1245
- if "TOOL RULE:" in advice:
1246
- # Make tool rules even more explicit
1247
- rule_injections.append(f"🚨 CRITICAL TOOL OVERRIDE: {advice}")
1248
- else:
1249
- rule_injections.append(f"🚨 CRITICAL SYSTEM OVERRIDE: {advice}")
1250
-
1251
- # Build execution state context (agent-agnostic, with argument previews)
1252
- # Strategy: Show all if ≤5 calls, else show most recent 3
1253
- # Rationale: Small counts get full context; larger counts show recent to prevent duplicates
1254
- state_context = ""
1255
- if hasattr(self, '_function_calls_made') and self._function_calls_made:
1256
- state_lines = []
1257
- for func_name in sorted(self._function_calls_made.keys()):
1258
- calls = self._function_calls_made[func_name]
1259
- count = len(calls)
1260
-
1261
- # Hybrid window: show all if ≤5 calls, else show recent 3
1262
- if count <= 5:
1263
- # Show all calls with previews
1264
- state_lines.append(f" {func_name}: called {count}x:")
1265
- for call in calls:
1266
- preview = call.get('preview', '')
1267
- if preview:
1268
- state_lines.append(f" [step {call['step']}] {preview}")
1269
- else:
1270
- state_lines.append(f" [step {call['step']}] (no args)")
1271
- else:
1272
- # Show summary + recent 3 with previews
1273
- state_lines.append(f" • {func_name}: called {count}x (most recent 3):")
1274
- for call in calls[-3:]:
1275
- preview = call.get('preview', '')
1276
- if preview:
1277
- state_lines.append(f" [step {call['step']}] {preview}")
1278
- else:
1279
- state_lines.append(f" [step {call['step']}] (no args)")
1280
-
1281
- if state_lines:
1282
- state_context = f"""
1283
- EXECUTION STATE (functions called so far in this run):
1284
- {chr(10).join(state_lines)}
1285
-
1286
- """
1287
-
1288
- combined_injection = f""" SYSTEM OVERRIDE — PLANNING TURN ONLY
1289
- These rules OVERRIDE all defaults. You MUST enforce them exactly or the task FAILS.
1290
-
1291
- Tags: AVOID (absolute ban), SKIP (force bypass), FIX (mandatory params), PREFER (ranked choice), HINT (optional).
1292
- Precedence: AVOID/SKIP > FIX > PREFER > HINT. On conflict, the higher rule ALWAYS wins.
1293
-
1294
- {state_context}Checklist (non-negotiable):
1295
- - AVOID: no banned targets under ANY condition.
1296
- - SKIP: bypass skipped steps/tools; NEVER retry them.
1297
- - FIX: all required params/settings MUST be included.
1298
- - PREFER: when multiple compliant options exist, choose the preferred—NO exceptions.
1299
- - Recovery: if a banned/skipped item already failed, IMMEDIATELY switch to a compliant alternative.
1300
-
1301
- Output Contract: Produce ONE compliant tool/function call (or direct answer if none is needed).
1302
- NO reasoning, NO justification, NO markdown.
1303
-
1304
- Rules to Enforce:
1305
-
1306
-
1307
- {chr(10).join(rule_injections)}
1308
-
1309
-
1310
- """
1311
- # Put the injection at the VERY BEGINNING of the system prompt
1312
- modified_prompts[0] = combined_injection + system_prompt
1313
-
1314
- # Add to guard (only if we're using the guard)
1315
- if use_guard:
1316
- self._injection_guard.add(guard_key)
1317
-
1318
- # Log the complete injection for debugging
1319
- # Compact injection summary
1320
- if hasattr(self, '_is_langgraph') and self._is_langgraph:
1321
- # LangGraph: show node name
1322
- func_count = len(self._function_calls_made) if hasattr(self, '_function_calls_made') and state_context else 0
1323
- node_name = getattr(self, '_current_chain_node', 'unknown')
1324
- print(f"[DASEIN] 🎯 Injecting {len(system_rules)} rule(s) into {node_name} | State: {func_count} functions tracked")
1325
- else:
1326
- # LangChain: simpler logging without node name
1327
- print(f"[DASEIN] 🎯 Injecting {len(system_rules)} rule(s) into agent")
1328
-
1329
- return modified_prompts
1330
-
1331
- except Exception as e:
1332
- self._vprint(f"[DASEIN][APPLY] Injection failed: {e}")
1333
-
1334
- return prompts
1335
-
1336
- def _inject_tool_rule_if_applicable(self, step_type: str, tool_name: str, input_str: str) -> str:
1337
- """Inject rules into tool input if applicable."""
1338
- if not self._should_inject_rule(step_type, tool_name):
1339
- return input_str
1340
-
1341
- # If no rules selected yet, return input unchanged
1342
- if not self._selected_rules:
1343
- return input_str
1344
-
1345
- # Check guard to prevent duplicate injection
1346
- guard_key = (step_type, tool_name)
1347
- if guard_key in self._injection_guard:
1348
- return input_str
1349
-
1350
- try:
1351
- # Inject rules that target tool_start
1352
- tool_rules = []
1353
- for rule_meta in self._selected_rules:
1354
- # Handle tuple format from select_rules: (rule, metadata)
1355
- if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
1356
- rule, metadata = rule_meta
1357
- else:
1358
- rule = rule_meta
1359
- metadata = {}
1360
-
1361
- # Only apply rules that target tool_start
1362
- if rule.target_step_type == "tool_start":
1363
- tool_rules.append(rule)
1364
- self._vprint(f"[DASEIN][APPLY] Tool rule: {rule.advice_text[:100]}...")
1365
-
1366
- if tool_rules:
1367
- # Apply tool-level rule injection
1368
- modified_input = self._apply_tool_rules(input_str, tool_rules)
1369
- self._injection_guard.add(guard_key)
1370
- return modified_input
1371
- else:
1372
- return input_str
1373
-
1374
- except Exception as e:
1375
- self._vprint(f"[DASEIN][APPLY] Error injecting tool rules: {e}")
1376
- return input_str
1377
-
1378
- def _apply_tool_rules(self, input_str: str, rules: List) -> str:
1379
- """Apply tool-level rules to modify the input string."""
1380
- modified_input = input_str
1381
-
1382
- for rule in rules:
1383
- try:
1384
- # Apply the rule's advice to modify the tool input
1385
- if "strip" in rule.advice_text.lower() and "fence" in rule.advice_text.lower():
1386
- # Strip markdown code fences
1387
- import re
1388
- # Remove ```sql...``` or ```...``` patterns
1389
- modified_input = re.sub(r'```(?:sql)?\s*(.*?)\s*```', r'\1', modified_input, flags=re.DOTALL)
1390
- self._vprint(f"[DASEIN][APPLY] Stripped code fences from tool input")
1391
- elif "strip" in rule.advice_text.lower() and "whitespace" in rule.advice_text.lower():
1392
- # Strip leading/trailing whitespace
1393
- modified_input = modified_input.strip()
1394
- self._vprint(f"[DASEIN][APPLY] Stripped whitespace from tool input")
1395
- # Add more rule types as needed
1396
-
1397
- except Exception as e:
1398
- self._vprint(f"[DASEIN][APPLY] Error applying tool rule: {e}")
1399
- continue
1400
-
1401
- return modified_input
1402
-
1403
- def _handle_tool_executor_start(
1404
- self,
1405
- serialized: Dict[str, Any],
1406
- inputs: Dict[str, Any],
1407
- **kwargs: Any,
1408
- ) -> None:
1409
- """Handle tool executor start - bridge from chain_start to tool_start."""
1410
- self._vprint(f"[DASEIN][CALLBACK] tool_start (from chain_start)")
1411
-
1412
- # Extract tool information from inputs
1413
- tool_name = "unknown"
1414
- tool_input = ""
1415
-
1416
- if isinstance(inputs, dict):
1417
- if "tool" in inputs:
1418
- tool_name = inputs["tool"]
1419
- elif "tool_name" in inputs:
1420
- tool_name = inputs["tool_name"]
1421
-
1422
- if "tool_input" in inputs:
1423
- tool_input = str(inputs["tool_input"])
1424
- elif "input" in inputs:
1425
- tool_input = str(inputs["input"])
1426
- else:
1427
- tool_input = str(inputs)
1428
- else:
1429
- tool_input = str(inputs)
1430
-
1431
- self._vprint(f"[DASEIN][CALLBACK] Tool: {tool_name}")
1432
- self._vprint(f"[DASEIN][CALLBACK] Input: {tool_input[:100]}...")
1433
-
1434
- # Check if we have tool_start rules that cover this tool
1435
- tool_rules = [rule for rule in self._selected_rules if rule.target_step_type == "tool_start"]
1436
- covered_rules = [rule for rule in tool_rules if self._rule_covers_tool(rule, tool_name, tool_input)]
1437
-
1438
- if covered_rules:
1439
- self._vprint(f"[DASEIN][APPLY] tool_start: {len(covered_rules)} rules cover this tool call")
1440
- # Fire micro-turn for rule application
1441
- modified_input = self._fire_micro_turn_for_tool_rules(covered_rules, tool_name, tool_input)
1442
- else:
1443
- self._vprint(f"[DASEIN][APPLY] tool_start: no rules cover this tool call")
1444
- modified_input = tool_input
1445
-
1446
- args_excerpt = self._excerpt(modified_input)
1447
-
1448
- step = {
1449
- "step_type": "tool_start",
1450
- "tool_name": tool_name,
1451
- "args_excerpt": args_excerpt,
1452
- "outcome": "",
1453
- "ts": datetime.now().isoformat(),
1454
- "run_id": kwargs.get("run_id"),
1455
- "parent_run_id": kwargs.get("parent_run_id"),
1456
- }
1457
- self._trace.append(step)
1458
-
1459
- def _rule_covers_tool(self, rule, tool_name: str, tool_input: str) -> bool:
1460
- """Check if a rule covers the given tool call."""
1461
- try:
1462
- # Check if rule references this tool
1463
- if hasattr(rule, 'references') and rule.references:
1464
- if hasattr(rule.references, 'tools') and rule.references.tools:
1465
- if tool_name not in rule.references.tools:
1466
- return False
1467
-
1468
- # Check trigger patterns if they exist
1469
- if hasattr(rule, 'trigger_pattern') and rule.trigger_pattern:
1470
- # For now, assume all tool_start rules cover their referenced tools
1471
- # This can be made more sophisticated later
1472
- pass
1473
-
1474
- return True
1475
- except Exception as e:
1476
- self._vprint(f"[DASEIN][COVERAGE] Error checking rule coverage: {e}")
1477
- return False
1478
-
1479
- def _fire_micro_turn_for_tool_rules(self, rules, tool_name: str, tool_input: str) -> str:
1480
- """Fire a micro-turn LLM call to apply tool rules."""
1481
- try:
1482
- # Use the first rule for now (can be extended to handle multiple rules)
1483
- rule = rules[0]
1484
- rule_id = getattr(rule, 'id', 'unknown')
1485
-
1486
- self._vprint(f"[DASEIN][MICROTURN] rule_id={rule_id} tool={tool_name}")
1487
-
1488
- # Create micro-turn prompt
1489
- micro_turn_prompt = self._create_micro_turn_prompt(rule, tool_name, tool_input)
1490
-
1491
- # Fire actual micro-turn LLM call
1492
- modified_input = self._execute_micro_turn_llm_call(micro_turn_prompt, tool_input)
1493
-
1494
- # Store the modified input for retrieval during tool execution
1495
- input_key = f"{tool_name}:{hash(tool_input)}"
1496
- _MODIFIED_TOOL_INPUTS[input_key] = modified_input
1497
-
1498
- self._vprint(f"[DASEIN][MICROTURN] Applied rule {rule_id}: {str(tool_input)[:50]}... -> {str(modified_input)[:50]}...")
1499
-
1500
- return modified_input
1501
-
1502
- except Exception as e:
1503
- self._vprint(f"[DASEIN][MICROTURN] Error in micro-turn: {e}")
1504
- return tool_input
1505
-
1506
- def _create_micro_turn_prompt(self, rule, tool_name: str, tool_input: str) -> str:
1507
- """Create the micro-turn prompt for rule application."""
1508
- advice = getattr(rule, 'advice', '')
1509
- return f"""Apply this rule to the tool input:
1510
-
1511
- Rule: {advice}
1512
- Tool: {tool_name}
1513
- Current Input: {tool_input}
1514
-
1515
- Output only the corrected tool input:"""
1516
-
1517
- def _execute_micro_turn_llm_call(self, prompt: str, original_input: str) -> str:
1518
- """Execute the actual micro-turn LLM call."""
1519
- try:
1520
- if not self._llm:
1521
- self._vprint(f"[DASEIN][MICROTURN] No LLM available for micro-turn call")
1522
- return original_input
1523
-
1524
- self._vprint(f"[DASEIN][MICROTURN] Executing micro-turn LLM call")
1525
- self._vprint(f"[DASEIN][MICROTURN] Prompt: {prompt[:200]}...")
1526
-
1527
- # Make the micro-turn LLM call
1528
- # Create a simple message list for the LLM
1529
- messages = [{"role": "user", "content": prompt}]
1530
-
1531
- # Call the LLM
1532
- response = self._llm.invoke(messages)
1533
-
1534
- # Extract the response content
1535
- if hasattr(response, 'content'):
1536
- modified_input = response.content.strip()
1537
- elif isinstance(response, str):
1538
- modified_input = response.strip()
1539
- else:
1540
- modified_input = str(response).strip()
1541
-
1542
- self._vprint(f"[DASEIN][MICROTURN] LLM response: {modified_input[:100]}...")
1543
-
1544
- # 🚨 CRITICAL: Parse JSON responses with markdown fences
1545
- if modified_input.startswith('```json') or modified_input.startswith('```'):
1546
- try:
1547
- # Extract JSON from markdown fences
1548
- import re
1549
- import json
1550
- json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', modified_input, re.DOTALL)
1551
- if json_match:
1552
- json_str = json_match.group(1)
1553
- parsed_json = json.loads(json_str)
1554
- # Convert back to the expected format
1555
- if isinstance(parsed_json, dict) and 'name' in parsed_json and 'args' in parsed_json:
1556
- modified_input = parsed_json
1557
- self._vprint(f"[DASEIN][MICROTURN] Parsed JSON from markdown fences: {parsed_json}")
1558
- else:
1559
- self._vprint(f"[DASEIN][MICROTURN] JSON doesn't have expected structure, using as-is")
1560
- else:
1561
- self._vprint(f"[DASEIN][MICROTURN] Could not extract JSON from markdown fences")
1562
- except Exception as e:
1563
- self._vprint(f"[DASEIN][MICROTURN] Error parsing JSON: {e}")
1564
-
1565
- # Validate the response - only fallback if completely empty
1566
- if not modified_input:
1567
- self._vprint(f"[DASEIN][MICROTURN] LLM response empty, using original input")
1568
- return original_input
1569
-
1570
- return modified_input
1571
-
1572
- except Exception as e:
1573
- self._vprint(f"[DASEIN][MICROTURN] Error executing micro-turn LLM call: {e}")
1574
- return original_input
1575
-
1576
-
1577
- def get_trace() -> List[Dict[str, Any]]:
1578
- """
1579
- DEPRECATED: Legacy function for backward compatibility.
1580
- Get the current trace from active CognateProxy instances.
1581
-
1582
- Returns:
1583
- List of trace step dictionaries (empty if no active traces)
1584
- """
1585
- # Try to get trace from active CognateProxy instances
1586
- try:
1587
- import gc
1588
- for obj in gc.get_objects():
1589
- if hasattr(obj, '_last_run_trace') and obj._last_run_trace:
1590
- return obj._last_run_trace.copy()
1591
- if hasattr(obj, '_callback_handler') and hasattr(obj._callback_handler, '_trace'):
1592
- return obj._callback_handler._trace.copy()
1593
- except Exception:
1594
- pass
1595
-
1596
- return [] # Return empty list if no trace found
1597
-
1598
-
1599
- def get_modified_tool_input(tool_name: str, original_input: str) -> str:
1600
- """
1601
- Get the modified tool input if it exists.
1602
-
1603
- Args:
1604
- tool_name: Name of the tool
1605
- original_input: Original tool input
1606
-
1607
- Returns:
1608
- Modified tool input if available, otherwise original input
1609
- """
1610
- input_key = f"{tool_name}:{hash(original_input)}"
1611
- return _MODIFIED_TOOL_INPUTS.get(input_key, original_input)
1612
-
1613
-
1614
- def clear_modified_tool_inputs():
1615
- """Clear all modified tool inputs."""
1616
- global _MODIFIED_TOOL_INPUTS
1617
- _MODIFIED_TOOL_INPUTS.clear()
1618
-
1619
-
1620
- def clear_trace() -> None:
1621
- """
1622
- DEPRECATED: Legacy function for backward compatibility.
1623
- Clear traces in active CognateProxy instances.
1624
- """
1625
- # Try to clear traces in active CognateProxy instances
1626
- try:
1627
- import gc
1628
- for obj in gc.get_objects():
1629
- if hasattr(obj, '_callback_handler') and hasattr(obj._callback_handler, 'reset_run_state'):
1630
- obj._callback_handler.reset_run_state()
1631
- except Exception:
1632
- pass # Ignore if not available
1633
-
1634
-
1635
- def print_trace(max_chars: int = 240, only: tuple[str, ...] | None = None, suppress: tuple[str, ...] = ("chain_end",), show_tree: bool = True, show_summary: bool = True) -> None:
1636
- """
1637
- Print a compact fixed-width table of the trace with tree-like view and filtering.
1638
-
1639
- Args:
1640
- max_chars: Maximum characters per line (default 240)
1641
- only: Filter by step_type if provided (e.g., ("llm_start", "llm_end"))
1642
- suppress: Suppress any step_type in this tuple (default: ("chain_end",))
1643
- show_tree: If True, left-pad args_excerpt by 2*depth spaces for tree-like view
1644
- show_summary: If True, show step_type counts and deduped rows summary
1645
- """
1646
- # Try to get trace from active CognateProxy instances
1647
- trace = None
1648
- try:
1649
- # Import here to avoid circular imports
1650
- from dasein.api import _global_cognate_proxy
1651
- if _global_cognate_proxy and hasattr(_global_cognate_proxy, '_wrapped_llm') and _global_cognate_proxy._wrapped_llm:
1652
- trace = _global_cognate_proxy._wrapped_llm.get_trace()
1653
- except:
1654
- pass
1655
-
1656
- if not trace:
1657
- trace = get_trace() # Use the updated get_trace() function
1658
-
1659
- # If global trace is empty, try to get it from the last completed run
1660
- if not trace:
1661
- # Try to get trace from any active CognateProxy instances
1662
- try:
1663
- import gc
1664
- for obj in gc.get_objects():
1665
- # Look for CognateProxy instances with captured traces
1666
- if hasattr(obj, '_last_run_trace') and obj._last_run_trace:
1667
- trace = obj._last_run_trace
1668
- print(f"[DASEIN][TRACE] Retrieved trace from CognateProxy: {len(trace)} steps")
1669
- break
1670
- # Fallback: try callback handler
1671
- elif hasattr(obj, '_callback_handler') and hasattr(obj._callback_handler, 'get_trace'):
1672
- potential_trace = obj._callback_handler.get_trace()
1673
- if potential_trace:
1674
- trace = potential_trace
1675
- print(f"[DASEIN][TRACE] Retrieved trace from callback handler: {len(trace)} steps")
1676
- break
1677
- except Exception as e:
1678
- pass
1679
-
1680
- if not trace:
1681
- print("No trace data available.")
1682
- return
1683
-
1684
- # Print execution state if available
1685
- try:
1686
- from dasein.api import _global_cognate_proxy
1687
- if _global_cognate_proxy and hasattr(_global_cognate_proxy, '_callback_handler'):
1688
- handler = _global_cognate_proxy._callback_handler
1689
- if hasattr(handler, '_function_calls_made') and handler._function_calls_made:
1690
- print("\n" + "=" * 80)
1691
- print("EXECUTION STATE (Functions Called During Run):")
1692
- print("=" * 80)
1693
- for func_name in sorted(handler._function_calls_made.keys()):
1694
- calls = handler._function_calls_made[func_name]
1695
- count = len(calls)
1696
- print(f" • {func_name}: called {count}x")
1697
- # Hybrid window: show all if ≤5, else show most recent 3 (matches injection logic)
1698
- if count <= 5:
1699
- # Show all calls
1700
- for call in calls:
1701
- preview = call.get('preview', '(no preview)')
1702
- if len(preview) > 80:
1703
- preview = preview[:80] + '...'
1704
- print(f" [step {call['step']}] {preview}")
1705
- else:
1706
- # Show recent 3
1707
- print(f" ... (showing most recent 3 of {count}):")
1708
- for call in calls[-3:]:
1709
- preview = call.get('preview', '(no preview)')
1710
- if len(preview) > 80:
1711
- preview = preview[:80] + '...'
1712
- print(f" [step {call['step']}] {preview}")
1713
- print("=" * 80 + "\n")
1714
- except Exception as e:
1715
- pass # Silently skip if state not available
1716
-
1717
- # Filter by step_type if only is provided
1718
- filtered_trace = trace
1719
- if only:
1720
- filtered_trace = [step for step in trace if step.get("step_type") in only]
1721
-
1722
- # Suppress any step_type in suppress tuple
1723
- if suppress:
1724
- filtered_trace = [step for step in filtered_trace if step.get("step_type") not in suppress]
1725
-
1726
- if not filtered_trace:
1727
- print("No trace data matching filter criteria.")
1728
- return
1729
-
1730
- # Build depth map from parent_run_id
1731
- depth_map = {}
1732
- for step in filtered_trace:
1733
- run_id = step.get("run_id")
1734
- parent_run_id = step.get("parent_run_id")
1735
-
1736
- if run_id is None or parent_run_id is None or parent_run_id not in depth_map:
1737
- depth_map[run_id] = 0
1738
- else:
1739
- depth_map[run_id] = depth_map[parent_run_id] + 1
1740
-
1741
- # Calculate column widths based on max_chars
1742
- # Reserve space for: # (3), step_type (15), tool_name (25), separators (6)
1743
- available_width = max_chars - 3 - 15 - 25 - 6
1744
- excerpt_width = available_width // 2
1745
- outcome_width = available_width - excerpt_width
1746
-
1747
- # Print header
1748
- print(f"{'#':<3} {'step_type':<15} {'tool_name':<25} {'args_excerpt':<{excerpt_width}} {'outcome':<{outcome_width}}")
1749
- print("-" * max_chars)
1750
-
1751
- # Print each step
1752
- for i, step in enumerate(filtered_trace, 1):
1753
- step_type = step.get("step_type", "")[:15]
1754
- tool_name = str(step.get("tool_name", ""))[:25]
1755
- args_excerpt = step.get("args_excerpt", "")
1756
- outcome = step.get("outcome", "")
1757
-
1758
- # Apply tree indentation if show_tree is True
1759
- if show_tree:
1760
- run_id = step.get("run_id")
1761
- depth = depth_map.get(run_id, 0)
1762
- args_excerpt = " " * depth + args_excerpt
1763
-
1764
- # Truncate to fit column widths
1765
- args_excerpt = args_excerpt[:excerpt_width]
1766
- outcome = outcome[:outcome_width]
1767
-
1768
- print(f"{i:<3} {step_type:<15} {tool_name:<25} {args_excerpt:<{excerpt_width}} {outcome:<{outcome_width}}")
1769
-
1770
- # Show summary if requested
1771
- if show_summary:
1772
- print("\n" + "=" * max_chars)
1773
-
1774
- # Count steps by step_type
1775
- step_counts = {}
1776
- for step in filtered_trace:
1777
- step_type = step.get("step_type", "unknown")
1778
- step_counts[step_type] = step_counts.get(step_type, 0) + 1
1779
-
1780
- print("Step counts:")
1781
- for step_type, count in sorted(step_counts.items()):
1782
- print(f" {step_type}: {count}")
1783
-
1784
- # Add compact function call summary
1785
- try:
1786
- from dasein.api import _global_cognate_proxy
1787
- if _global_cognate_proxy and hasattr(_global_cognate_proxy, '_callback_handler'):
1788
- handler = _global_cognate_proxy._callback_handler
1789
- if hasattr(handler, '_function_calls_made') and handler._function_calls_made:
1790
- print("\nFunction calls:")
1791
- for func_name in sorted(handler._function_calls_made.keys()):
1792
- count = len(handler._function_calls_made[func_name])
1793
- print(f" {func_name}: {count}")
1794
- except Exception:
1795
- pass
1796
-
1797
- # Count deduped rows skipped (steps that were filtered out)
1798
- total_steps = len(trace)
1799
- shown_steps = len(filtered_trace)
1800
- skipped_steps = total_steps - shown_steps
1801
-
1802
- if skipped_steps > 0:
1803
- print(f"Deduped rows skipped: {skipped_steps}")
1
+ """
2
+ Trace capture functionality for Dasein.
3
+ """
4
+
5
+ # Suppress third-party warnings triggered by pipecleaner dependencies
6
+ import warnings
7
+ warnings.filterwarnings('ignore', category=FutureWarning, message='.*torch.distributed.reduce_op.*')
8
+ warnings.filterwarnings('ignore', category=DeprecationWarning, message='.*Importing chat models from langchain.*')
9
+
10
+ import hashlib
11
+ from typing import Any, Dict, List, Optional, Union
12
+ from datetime import datetime
13
+ from langchain_core.callbacks.base import BaseCallbackHandler
14
+ from langchain_core.callbacks.manager import CallbackManagerForToolRun
15
+ from langchain_core.tools import BaseTool
16
+
17
+
18
+ # ============================================================================
19
+ # VERBOSE LOGGING HELPER
20
+ # ============================================================================
21
+
22
+ def _vprint(message: str, verbose: bool = False, force: bool = False):
23
+ """
24
+ Helper function for verbose printing.
25
+
26
+ Args:
27
+ message: Message to print
28
+ verbose: Whether verbose mode is enabled
29
+ force: If True, always print regardless of verbose setting
30
+ """
31
+ if force or verbose:
32
+ print(message)
33
+
34
+
35
+ # DEPRECATED: Global trace store removed for thread-safety
36
+ # Traces are now stored instance-level in DaseinCallbackHandler._trace
37
+ # _TRACE: List[Dict[str, Any]] = []
38
+
39
+ # Hook cache for agent fingerprinting
40
+ _HOOK_CACHE: Dict[str, Any] = {}
41
+
42
+ # Store for modified tool inputs
43
+ _MODIFIED_TOOL_INPUTS: Dict[str, str] = {}
44
+
45
+
46
+ class DaseinToolWrapper(BaseTool):
47
+ """Wrapper for tools that applies micro-turn modifications."""
48
+
49
+ name: str = ""
50
+ description: str = ""
51
+ original_tool: Any = None
52
+ callback_handler: Any = None
53
+
54
+ def __init__(self, original_tool, callback_handler=None, verbose: bool = False):
55
+ super().__init__(
56
+ name=original_tool.name,
57
+ description=original_tool.description
58
+ )
59
+ self.original_tool = original_tool
60
+ self.callback_handler = callback_handler
61
+ self._verbose = verbose
62
+
63
+ def _vprint(self, message: str, force: bool = False):
64
+ """Helper for verbose printing."""
65
+ _vprint(message, self._verbose, force)
66
+
67
+ def _run(self, *args, **kwargs):
68
+ """Run the tool with micro-turn injection at execution level."""
69
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] _run called for {self.name} - VERSION 2.0")
70
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Args: {args}")
71
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Kwargs: {kwargs}")
72
+
73
+ try:
74
+ # Get the original input
75
+ original_input = args[0] if args else ""
76
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Original input: {original_input[:100]}...")
77
+
78
+ # Apply micro-turn injection if we have rules
79
+ modified_input = self._apply_micro_turn_injection(str(original_input))
80
+
81
+ if modified_input != original_input:
82
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
83
+ # Use modified input
84
+ result = self.original_tool._run(modified_input, *args[1:], **kwargs)
85
+ else:
86
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] No micro-turn injection applied for {self.name}")
87
+ # Use original input
88
+ result = self.original_tool._run(*args, **kwargs)
89
+
90
+ # 🧹 PIPECLEANER: Apply deduplication to tool result (microturn-style interception)
91
+ result = self._apply_pipecleaner_to_result(result)
92
+
93
+ # Capture the tool output in the trace
94
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] About to capture tool output for {self.name}")
95
+ self._capture_tool_output(self.name, args, kwargs, result)
96
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Finished capturing tool output for {self.name}")
97
+
98
+ return result
99
+
100
+ except Exception as e:
101
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Exception in _run: {e}")
102
+ import traceback
103
+ traceback.print_exc()
104
+ # Still try to call the original tool
105
+ result = self.original_tool._run(*args, **kwargs)
106
+ return result
107
+
108
+ def invoke(self, input_data, config=None, **kwargs):
109
+ """Invoke the tool with micro-turn injection."""
110
+ # Get the original input
111
+ original_input = str(input_data)
112
+
113
+ # Apply micro-turn injection if we have rules
114
+ modified_input = self._apply_micro_turn_injection(original_input)
115
+
116
+ if modified_input != original_input:
117
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
118
+ # Use modified input
119
+ return self.original_tool.invoke(modified_input, config, **kwargs)
120
+ else:
121
+ # Use original input
122
+ return self.original_tool.invoke(input_data, config, **kwargs)
123
+
124
+ async def _arun(self, *args, **kwargs):
125
+ """Async run the tool with micro-turn injection at execution level."""
126
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] _arun called for {self.name} - ASYNC VERSION")
127
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Args: {args}")
128
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Kwargs: {kwargs}")
129
+
130
+ try:
131
+ # Get the original input
132
+ original_input = args[0] if args else ""
133
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Original input: {original_input[:100]}...")
134
+
135
+ # Apply micro-turn injection if we have rules
136
+ modified_input = self._apply_micro_turn_injection(str(original_input))
137
+
138
+ if modified_input != original_input:
139
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
140
+ # Use modified input
141
+ result = await self.original_tool._arun(modified_input, *args[1:], **kwargs)
142
+ else:
143
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] No micro-turn injection applied for {self.name}")
144
+ # Use original input
145
+ result = await self.original_tool._arun(*args, **kwargs)
146
+
147
+ # 🧹 PIPECLEANER: Apply deduplication to tool result (microturn-style interception)
148
+ result = self._apply_pipecleaner_to_result(result)
149
+
150
+ # Capture the tool output in the trace
151
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] About to capture tool output for {self.name}")
152
+ self._capture_tool_output(self.name, args, kwargs, result)
153
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Finished capturing tool output for {self.name}")
154
+
155
+ return result
156
+
157
+ except Exception as e:
158
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Exception in _arun: {e}")
159
+ import traceback
160
+ traceback.print_exc()
161
+ # Still try to call the original tool
162
+ result = await self.original_tool._arun(*args, **kwargs)
163
+ return result
164
+
165
+ async def ainvoke(self, input_data, config=None, **kwargs):
166
+ """Async invoke the tool with micro-turn injection."""
167
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] ainvoke called for {self.name} - ASYNC VERSION")
168
+
169
+ # Get the original input
170
+ original_input = str(input_data)
171
+
172
+ # Apply micro-turn injection if we have rules
173
+ modified_input = self._apply_micro_turn_injection(original_input)
174
+
175
+ if modified_input != original_input:
176
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Applied micro-turn injection for {self.name}: {original_input[:50]}... -> {modified_input[:50]}...")
177
+ # Use modified input
178
+ result = await self.original_tool.ainvoke(modified_input, config, **kwargs)
179
+ else:
180
+ # Use original input
181
+ result = await self.original_tool.ainvoke(input_data, config, **kwargs)
182
+
183
+ # 🧹 PIPECLEANER: Apply deduplication to tool result (microturn-style interception)
184
+ result = self._apply_pipecleaner_to_result(result)
185
+
186
+ return result
187
+
188
+ def _apply_micro_turn_injection(self, original_input: str) -> str:
189
+ """Apply micro-turn injection to the tool input."""
190
+ try:
191
+ # Check if we have a callback handler with rules and LLM
192
+ if not self.callback_handler:
193
+ return original_input
194
+
195
+ # Normalize selected rules into Rule objects (handle (rule, metadata) tuples)
196
+ normalized_rules = []
197
+ for rule_meta in getattr(self.callback_handler, "_selected_rules", []) or []:
198
+ if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
199
+ rule_obj, _metadata = rule_meta
200
+ else:
201
+ rule_obj = rule_meta
202
+ normalized_rules.append(rule_obj)
203
+
204
+ # Filter tool_start rules
205
+ tool_rules = [r for r in normalized_rules if getattr(r, 'target_step_type', '') == "tool_start"]
206
+
207
+ if not tool_rules:
208
+ self._vprint(f"[DASEIN][MICROTURN] No tool rules selected - skipping micro-turn for {self.name}")
209
+ return original_input
210
+
211
+ # Check if any rule covers this tool
212
+ covered_rules = [rule for rule in tool_rules
213
+ if self._rule_covers_tool(rule, self.name, original_input)]
214
+
215
+ if not covered_rules:
216
+ return original_input
217
+
218
+ # Fire micro-turn LLM call (use first matching rule)
219
+ rule = covered_rules[0]
220
+ self._vprint(f"[DASEIN][MICROTURN] rule_id={rule.id} tool={self.name}")
221
+
222
+ # Create micro-turn prompt
223
+ micro_turn_prompt = self._create_micro_turn_prompt(rule, self.name, original_input)
224
+
225
+ # Execute micro-turn LLM call
226
+ modified_input = self._execute_micro_turn_llm_call(micro_turn_prompt, original_input)
227
+
228
+ self._vprint(f"[DASEIN][MICROTURN] Applied rule {rule.id}: {str(original_input)[:50]}... -> {str(modified_input)[:50]}...")
229
+ return modified_input
230
+
231
+ except Exception as e:
232
+ self._vprint(f"[DASEIN][MICROTURN] Error in micro-turn injection: {e}")
233
+ return original_input
234
+
235
+ def _rule_covers_tool(self, rule, tool_name: str, tool_input: str) -> bool:
236
+ """Check if a rule covers this tool call."""
237
+ if not hasattr(rule, 'references') or not rule.references:
238
+ return False
239
+
240
+ # Check if the rule references this tool
241
+ tools = rule.references.get('tools', [])
242
+ return tool_name in tools
243
+
244
+ def _create_micro_turn_prompt(self, rule, tool_name: str, tool_input: str) -> str:
245
+ """Create the prompt for the micro-turn LLM call."""
246
+ return f"""You are applying a rule to fix a tool input.
247
+
248
+ Rule: {rule.advice_text}
249
+
250
+ Tool: {tool_name}
251
+ Current Input: {tool_input}
252
+
253
+ Apply the rule to fix the input. Return only the corrected input, nothing else."""
254
+
255
+ def _execute_micro_turn_llm_call(self, prompt: str, original_input: str) -> str:
256
+ """Execute the actual micro-turn LLM call."""
257
+ try:
258
+ if not self.callback_handler or not self.callback_handler._llm:
259
+ self._vprint(f"[DASEIN][MICROTURN] No LLM available for micro-turn call")
260
+ return original_input
261
+
262
+ self._vprint(f"[DASEIN][MICROTURN] Executing micro-turn LLM call")
263
+ self._vprint(f"[DASEIN][MICROTURN] Prompt: {prompt[:200]}...")
264
+
265
+ # Make the micro-turn LLM call
266
+ messages = [{"role": "user", "content": prompt}]
267
+ response = self.callback_handler._llm.invoke(messages)
268
+
269
+ # Extract the response content
270
+ if hasattr(response, 'content'):
271
+ modified_input = response.content.strip()
272
+ elif isinstance(response, str):
273
+ modified_input = response.strip()
274
+ else:
275
+ modified_input = str(response).strip()
276
+
277
+ self._vprint(f"[DASEIN][MICROTURN] LLM response: {modified_input[:100]}...")
278
+
279
+ # 🚨 CRITICAL: Parse JSON responses with markdown fences
280
+ if modified_input.startswith('```json') or modified_input.startswith('```'):
281
+ try:
282
+ # Extract JSON from markdown fences
283
+ import re
284
+ import json
285
+ json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', modified_input, re.DOTALL)
286
+ if json_match:
287
+ json_str = json_match.group(1)
288
+ parsed_json = json.loads(json_str)
289
+ # Convert back to the expected format
290
+ if isinstance(parsed_json, dict) and 'name' in parsed_json and 'args' in parsed_json:
291
+ modified_input = parsed_json
292
+ self._vprint(f"[DASEIN][MICROTURN] Parsed JSON from markdown fences: {parsed_json}")
293
+ else:
294
+ self._vprint(f"[DASEIN][MICROTURN] JSON doesn't have expected structure, using as-is")
295
+ else:
296
+ self._vprint(f"[DASEIN][MICROTURN] Could not extract JSON from markdown fences")
297
+ except Exception as e:
298
+ self._vprint(f"[DASEIN][MICROTURN] Error parsing JSON: {e}")
299
+
300
+ # Validate the response - only fallback if completely empty
301
+ if not modified_input:
302
+ self._vprint(f"[DASEIN][MICROTURN] LLM response empty, using original input")
303
+ return original_input
304
+
305
+ return modified_input
306
+
307
+ except Exception as e:
308
+ self._vprint(f"[DASEIN][MICROTURN] Error executing micro-turn LLM call: {e}")
309
+ return original_input
310
+
311
+ def _apply_pipecleaner_to_result(self, result):
312
+ """
313
+ Apply pipecleaner deduplication to tool result (microturn-style interception).
314
+
315
+ This is called right after tool execution, before returning result to agent.
316
+ Similar to how microturn intercepts LLM responses.
317
+ """
318
+ try:
319
+ # Get callback handler's rules
320
+ if not self.callback_handler or not hasattr(self.callback_handler, '_selected_rules'):
321
+ return result
322
+
323
+ # Convert result to string
324
+ result_str = str(result)
325
+
326
+ print(f"[PIPECLEANER DEBUG] Tool wrapper intercepted: {self.name}")
327
+ print(f"[PIPECLEANER DEBUG] Result length: {len(result_str)} chars")
328
+ print(f"[PIPECLEANER DEBUG] Rules count: {len(self.callback_handler._selected_rules)}")
329
+
330
+ # Apply pipecleaner if filter search rule exists
331
+ from .pipecleaner import apply_pipecleaner_if_applicable
332
+
333
+ # Get or initialize cached model from callback handler
334
+ cached_model = getattr(self.callback_handler, '_pipecleaner_embedding_model', None)
335
+
336
+ deduplicated_str, model = apply_pipecleaner_if_applicable(
337
+ self.name,
338
+ result_str,
339
+ self.callback_handler._selected_rules,
340
+ cached_model=cached_model
341
+ )
342
+
343
+ # Cache model for next search
344
+ if model is not None:
345
+ self.callback_handler._pipecleaner_embedding_model = model
346
+
347
+ # Return deduplicated result (or original if no filter applied)
348
+ return deduplicated_str
349
+
350
+ except Exception as e:
351
+ print(f"[PIPECLEANER] Error in result interception: {e}")
352
+ import traceback
353
+ traceback.print_exc()
354
+ return result
355
+
356
+ def _capture_tool_output(self, tool_name, args, kwargs, result):
357
+ """Capture tool output in the trace."""
358
+ try:
359
+ # Create args excerpt
360
+ args_str = str(args) if args else ""
361
+ if len(args_str) > 1000:
362
+ args_str = args_str[:1000] + "..."
363
+
364
+ # Create result excerpt (with 10k limit)
365
+ result_str = str(result) if result else ""
366
+ if len(result_str) > 10000:
367
+ result_str = result_str[:10000] + "..."
368
+
369
+ # Add tool_end step to trace
370
+ step = {
371
+ "step_type": "tool_end",
372
+ "tool_name": tool_name,
373
+ "args_excerpt": args_str,
374
+ "outcome": result_str,
375
+ "ts": datetime.now().isoformat(),
376
+ "run_id": f"tool_{id(self)}_{datetime.now().timestamp()}",
377
+ "parent_run_id": None,
378
+ }
379
+
380
+ # Add to LLM wrapper's trace if available
381
+ if self.callback_handler and hasattr(self.callback_handler, '_llm') and self.callback_handler._llm:
382
+ if hasattr(self.callback_handler._llm, '_trace'):
383
+ self.callback_handler._llm._trace.append(step)
384
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Added to LLM wrapper trace")
385
+ else:
386
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] LLM wrapper has no _trace attribute")
387
+ else:
388
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] No LLM wrapper available")
389
+
390
+ # Also add to callback handler's trace if it has one
391
+ if self.callback_handler and hasattr(self.callback_handler, '_trace'):
392
+ self.callback_handler._trace.append(step)
393
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Added to callback handler trace")
394
+
395
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Captured tool output for {tool_name}")
396
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Output length: {len(result_str)} chars")
397
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] First 200 chars: {result_str[:200]}")
398
+ if self.callback_handler and hasattr(self.callback_handler, '_trace'):
399
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Callback handler trace length after capture: {len(self.callback_handler._trace)}")
400
+
401
+ except Exception as e:
402
+ self._vprint(f"[DASEIN][TOOL_WRAPPER] Error capturing tool output: {e}")
403
+
404
+
405
+ class DaseinCallbackHandler(BaseCallbackHandler):
406
+ """
407
+ Callback handler that captures step-by-step traces and implements rule injection.
408
+ """
409
+
410
+ def __init__(self, weights=None, llm=None, is_langgraph=False, coordinator_node=None, planning_nodes=None, verbose: bool = False, agent=None, extract_tools_fn=None):
411
+ super().__init__()
412
+ self._weights = weights
413
+ self._selected_rules = [] # Rules selected for this run
414
+ self._injection_guard = set() # Prevent duplicate injections
415
+ self._last_modified_prompts = [] # Store modified prompts for LLM wrapper
416
+ self._llm = llm # Store reference to LLM for micro-turn calls
417
+ self._tool_name_by_run_id = {} # Track tool names by run_id
418
+ self._discovered_tools = set() # Track tools discovered during execution
419
+ self._wrapped_dynamic_tools = {} # Cache of wrapped dynamic tools
420
+ self._is_langgraph = is_langgraph # Flag to skip planning rule injection for LangGraph
421
+ self._run_number = 1 # Track which run this is (for microturn testing)
422
+ self._coordinator_node = coordinator_node # Coordinator node (for future targeted injection)
423
+ self._planning_nodes = planning_nodes if planning_nodes else set() # Planning-capable nodes (including subgraph children)
424
+ self._current_chain_node = None # Track current LangGraph node
425
+ self._agent_was_recreated = False # Track if agent was successfully recreated
426
+ self._function_calls_made = {} # Track function calls: {function_name: [{'step': N, 'ts': timestamp}]}
427
+ self._trace = [] # Instance-level trace storage (not global) for thread-safety
428
+ self._verbose = verbose
429
+ self._start_times = {} # Track start times for duration calculation: {step_index: datetime}
430
+ self._agent = agent # CRITICAL: Reference to agent for runtime tool extraction
431
+ self._extract_tools_fn = extract_tools_fn # Function to extract tools
432
+ self._runtime_tools_extracted = False # Flag to extract tools only once during execution
433
+ self._compiled_tools_metadata = [] # Store extracted tools
434
+ self._pipecleaner_embedding_model = None # Cache embedding model for this run
435
+ self._current_tool_name = None # Track currently executing tool for hotpath deduplication
436
+
437
+ # Generate stable run_id for corpus deduplication
438
+ import uuid
439
+ self.run_id = str(uuid.uuid4())
440
+
441
+ self._vprint(f"[DASEIN][CALLBACK] Initialized callback handler (LangGraph: {is_langgraph}, run_id: {self.run_id[:8]})")
442
+ if coordinator_node:
443
+ self._vprint(f"[DASEIN][CALLBACK] Coordinator: {coordinator_node}")
444
+ if planning_nodes:
445
+ self._vprint(f"[DASEIN][CALLBACK] Planning nodes: {planning_nodes}")
446
+ self._vprint(f"[DASEIN][CALLBACK] Dynamic tool detection enabled (tools discovered at runtime)")
447
+
448
+ def _vprint(self, message: str, force: bool = False):
449
+ """Helper for verbose printing."""
450
+ _vprint(message, self._verbose, force)
451
+
452
+ def reset_run_state(self):
453
+ """Reset state that should be cleared between runs."""
454
+ self._function_calls_made = {}
455
+ self._injection_guard = set()
456
+ self._trace = [] # Clear instance trace
457
+ self._start_times = {} # Clear start times
458
+ self._run_number = getattr(self, '_run_number', 1) + 1 # Increment run number
459
+ self._vprint(f"[DASEIN][CALLBACK] Reset run state (trace, function calls, injection guard, and start times cleared) - now on RUN {self._run_number}")
460
+
461
+ def get_compiled_tools_summary(self):
462
+ """Return 1-line summary of extracted tools."""
463
+ if not self._compiled_tools_metadata:
464
+ return None
465
+ # Group by node
466
+ by_node = {}
467
+ for tool in self._compiled_tools_metadata:
468
+ node = tool.get('node', 'unknown')
469
+ if node not in by_node:
470
+ by_node[node] = []
471
+ by_node[node].append(tool['name'])
472
+ # Format as: node1:[tool1,tool2] node2:[tool3]
473
+ parts = [f"{node}:[{','.join(tools)}]" for node, tools in sorted(by_node.items())]
474
+ return f"{len(self._compiled_tools_metadata)} tools extracted: {' '.join(parts)}"
475
+
476
+ def _patch_tools_for_node(self, node_name: str):
477
+ """
478
+ Patch tool objects for a specific node when they're discovered at runtime.
479
+
480
+ Called from on_llm_start when tools are detected for a node.
481
+ """
482
+ try:
483
+ print(f"\n{'='*70}")
484
+ print(f"[DASEIN][TOOL_PATCH] 🔧 Patching tools for node: {node_name}")
485
+ print(f"{'='*70}")
486
+
487
+ from .wrappers import patch_tool_instance
488
+
489
+ # Track patched tools to avoid double-patching
490
+ if not hasattr(self, '_patched_tools'):
491
+ self._patched_tools = set()
492
+ print(f"[DASEIN][TOOL_PATCH] Initialized patched tools tracker")
493
+
494
+ # Find the actual tool objects for this node in the agent graph
495
+ print(f"[DASEIN][TOOL_PATCH] Searching for tool objects in node '{node_name}'...")
496
+ tool_objects = self._find_tool_objects_for_node(node_name)
497
+
498
+ if not tool_objects:
499
+ print(f"[DASEIN][TOOL_PATCH] ⚠️ No tool objects found for node '{node_name}'")
500
+ print(f"{'='*70}\n")
501
+ return
502
+
503
+ print(f"[DASEIN][TOOL_PATCH] Found {len(tool_objects)} tool object(s)")
504
+
505
+ # Patch each tool
506
+ patched_count = 0
507
+ for i, tool_obj in enumerate(tool_objects, 1):
508
+ tool_name = getattr(tool_obj, 'name', 'unknown')
509
+ tool_type = type(tool_obj).__name__
510
+ tool_id = f"{node_name}:{tool_name}"
511
+
512
+ print(f"[DASEIN][TOOL_PATCH] [{i}/{len(tool_objects)}] Tool: '{tool_name}' (type: {tool_type})")
513
+
514
+ if tool_id in self._patched_tools:
515
+ print(f"[DASEIN][TOOL_PATCH] ⏭️ Already patched, skipping")
516
+ else:
517
+ print(f"[DASEIN][TOOL_PATCH] 🔨 Patching...")
518
+ if patch_tool_instance(tool_obj, self):
519
+ self._patched_tools.add(tool_id)
520
+ patched_count += 1
521
+ print(f"[DASEIN][TOOL_PATCH] ✅ Successfully patched '{tool_name}'")
522
+ else:
523
+ print(f"[DASEIN][TOOL_PATCH] ❌ Failed to patch '{tool_name}'")
524
+
525
+ print(f"[DASEIN][TOOL_PATCH] Summary: Patched {patched_count}/{len(tool_objects)} tools")
526
+ print(f"[DASEIN][TOOL_PATCH] Total tools patched so far: {len(self._patched_tools)}")
527
+ print(f"{'='*70}\n")
528
+
529
+ except Exception as e:
530
+ print(f"[DASEIN][TOOL_PATCH] ERROR patching tools for node {node_name}: {e}")
531
+ import traceback
532
+ traceback.print_exc()
533
+ print(f"{'='*70}\n")
534
+
535
+ def _search_node_recursively(self, node_name: str, nodes: dict, depth: int = 0) -> list:
536
+ """Recursively search for a node by name in graphs and subgraphs."""
537
+ indent = " " * depth
538
+ tool_objects = []
539
+
540
+ for parent_name, parent_node in nodes.items():
541
+ if parent_name.startswith('__'):
542
+ continue
543
+
544
+ print(f"[DASEIN][TOOL_PATCH]{indent} Checking node: {parent_name}")
545
+ print(f"[DASEIN][TOOL_PATCH]{indent} Node type: {type(parent_node).__name__}")
546
+ print(f"[DASEIN][TOOL_PATCH]{indent} Has .data: {hasattr(parent_node, 'data')}")
547
+ if hasattr(parent_node, 'data'):
548
+ print(f"[DASEIN][TOOL_PATCH]{indent} .data type: {type(parent_node.data).__name__}")
549
+ print(f"[DASEIN][TOOL_PATCH]{indent} .data has .nodes: {hasattr(parent_node.data, 'nodes')}")
550
+
551
+ # Check if this parent has a subgraph
552
+ if hasattr(parent_node, 'data') and hasattr(parent_node.data, 'nodes'):
553
+ print(f"[DASEIN][TOOL_PATCH]{indent} Has subgraph!")
554
+ try:
555
+ subgraph = parent_node.data.get_graph()
556
+ sub_nodes = subgraph.nodes
557
+ print(f"[DASEIN][TOOL_PATCH]{indent} Subgraph nodes: {list(sub_nodes.keys())}")
558
+
559
+ # Check if target node is in this subgraph
560
+ if node_name in sub_nodes:
561
+ print(f"[DASEIN][TOOL_PATCH]{indent} ✓ Found '{node_name}' in subgraph!")
562
+ target_node = sub_nodes[node_name]
563
+ if hasattr(target_node, 'node'):
564
+ actual_node = target_node.node
565
+ tool_objects = self._extract_tools_from_node_object(actual_node)
566
+ if tool_objects:
567
+ return tool_objects
568
+
569
+ # Not found here, recurse deeper into this subgraph
570
+ print(f"[DASEIN][TOOL_PATCH]{indent} Recursing into subgraph nodes...")
571
+ tool_objects = self._search_node_recursively(node_name, sub_nodes, depth + 1)
572
+ if tool_objects:
573
+ return tool_objects
574
+
575
+ except Exception as e:
576
+ print(f"[DASEIN][TOOL_PATCH]{indent} Error: {e}")
577
+ import traceback
578
+ traceback.print_exc()
579
+ else:
580
+ print(f"[DASEIN][TOOL_PATCH]{indent} No subgraph")
581
+
582
+ return tool_objects
583
+
584
+ def _find_tool_objects_for_node(self, node_name: str):
585
+ """Find actual Python tool objects for a given node."""
586
+ tool_objects = []
587
+
588
+ try:
589
+ if not hasattr(self._agent, 'get_graph'):
590
+ print(f"[DASEIN][TOOL_PATCH] Agent has no get_graph method")
591
+ return tool_objects
592
+
593
+ graph = self._agent.get_graph()
594
+ nodes = graph.nodes
595
+ node_names = list(nodes.keys())
596
+ print(f"[DASEIN][TOOL_PATCH] Graph has {len(nodes)} nodes: {node_names}")
597
+
598
+ # Check if node_name contains a dot (subgraph notation like "research_supervisor.ConductResearch")
599
+ if '.' in node_name:
600
+ print(f"[DASEIN][TOOL_PATCH] Node is subgraph: {node_name}")
601
+ parent_name, sub_name = node_name.split('.', 1)
602
+ parent_node = nodes.get(parent_name)
603
+
604
+ if parent_node and hasattr(parent_node, 'data'):
605
+ print(f"[DASEIN][TOOL_PATCH] Found parent node, getting subgraph...")
606
+ subgraph = parent_node.data.get_graph()
607
+ sub_nodes = subgraph.nodes
608
+ print(f"[DASEIN][TOOL_PATCH] Subgraph has {len(sub_nodes)} nodes")
609
+ target_node = sub_nodes.get(sub_name)
610
+
611
+ if target_node and hasattr(target_node, 'node'):
612
+ print(f"[DASEIN][TOOL_PATCH] Found target subnode, extracting tools...")
613
+ actual_node = target_node.node
614
+ tool_objects = self._extract_tools_from_node_object(actual_node)
615
+ else:
616
+ print(f"[DASEIN][TOOL_PATCH] ⚠️ Subnode not found or has no .node attribute")
617
+ else:
618
+ print(f"[DASEIN][TOOL_PATCH] ⚠️ Parent node not found or has no .data attribute")
619
+ else:
620
+ # Top-level node
621
+ print(f"[DASEIN][TOOL_PATCH] Node is top-level: {node_name}")
622
+ target_node = nodes.get(node_name)
623
+
624
+ if target_node:
625
+ print(f"[DASEIN][TOOL_PATCH] Found node, checking for .node attribute...")
626
+ if hasattr(target_node, 'node'):
627
+ print(f"[DASEIN][TOOL_PATCH] Has .node attribute, extracting tools...")
628
+ actual_node = target_node.node
629
+ tool_objects = self._extract_tools_from_node_object(actual_node)
630
+ else:
631
+ print(f"[DASEIN][TOOL_PATCH] ⚠️ Node has no .node attribute")
632
+ else:
633
+ # Not found as top-level, search in subgraphs
634
+ print(f"[DASEIN][TOOL_PATCH] ⚠️ Node '{node_name}' not found in top-level graph")
635
+ print(f"[DASEIN][TOOL_PATCH] Searching in subgraphs...")
636
+
637
+ # Recursively search all subgraphs
638
+ tool_objects = self._search_node_recursively(node_name, nodes)
639
+
640
+ if not tool_objects:
641
+ print(f"[DASEIN][TOOL_PATCH] ⚠️ Node '{node_name}' not found in any subgraph")
642
+
643
+ except Exception as e:
644
+ print(f"[DASEIN][TOOL_PATCH] ❌ Exception while finding tools: {e}")
645
+ import traceback
646
+ traceback.print_exc()
647
+
648
+ return tool_objects
649
+
650
+ def _extract_tools_from_node_object(self, node_obj):
651
+ """Extract tool objects from a node object."""
652
+ tools = []
653
+
654
+ print(f"[DASEIN][TOOL_PATCH] Checking node_obj type: {type(node_obj).__name__}")
655
+
656
+ # Check tools_by_name
657
+ if hasattr(node_obj, 'tools_by_name'):
658
+ print(f"[DASEIN][TOOL_PATCH] ✓ Has tools_by_name with {len(node_obj.tools_by_name)} tools")
659
+ tools.extend(node_obj.tools_by_name.values())
660
+ else:
661
+ print(f"[DASEIN][TOOL_PATCH] ✗ No tools_by_name")
662
+
663
+ # Check runnable.tools
664
+ if hasattr(node_obj, 'runnable'):
665
+ print(f"[DASEIN][TOOL_PATCH] ✓ Has runnable")
666
+ if hasattr(node_obj.runnable, 'tools'):
667
+ print(f"[DASEIN][TOOL_PATCH] ✓ runnable.tools exists")
668
+ runnable_tools = node_obj.runnable.tools
669
+ if callable(runnable_tools):
670
+ print(f"[DASEIN][TOOL_PATCH] runnable.tools is callable, calling...")
671
+ try:
672
+ runnable_tools = runnable_tools()
673
+ print(f"[DASEIN][TOOL_PATCH] Got {len(runnable_tools) if isinstance(runnable_tools, list) else 1} tool(s)")
674
+ except Exception as e:
675
+ print(f"[DASEIN][TOOL_PATCH] ❌ Failed to call: {e}")
676
+ if isinstance(runnable_tools, list):
677
+ tools.extend(runnable_tools)
678
+ elif runnable_tools:
679
+ tools.append(runnable_tools)
680
+ else:
681
+ print(f"[DASEIN][TOOL_PATCH] ✗ No runnable.tools")
682
+ else:
683
+ print(f"[DASEIN][TOOL_PATCH] ✗ No runnable")
684
+
685
+ # Check bound.tools
686
+ if hasattr(node_obj, 'bound'):
687
+ print(f"[DASEIN][TOOL_PATCH] ✓ Has bound")
688
+ if hasattr(node_obj.bound, 'tools'):
689
+ print(f"[DASEIN][TOOL_PATCH] ✓ bound.tools exists")
690
+ bound_tools = node_obj.bound.tools
691
+ if isinstance(bound_tools, list):
692
+ print(f"[DASEIN][TOOL_PATCH] Got {len(bound_tools)} tool(s)")
693
+ tools.extend(bound_tools)
694
+ elif bound_tools:
695
+ print(f"[DASEIN][TOOL_PATCH] Got 1 tool")
696
+ tools.append(bound_tools)
697
+ else:
698
+ print(f"[DASEIN][TOOL_PATCH] No bound.tools")
699
+ else:
700
+ print(f"[DASEIN][TOOL_PATCH] No bound")
701
+
702
+ # Check steps
703
+ if hasattr(node_obj, 'steps'):
704
+ print(f"[DASEIN][TOOL_PATCH] ✓ Has steps ({len(node_obj.steps)})")
705
+ for i, step in enumerate(node_obj.steps):
706
+ if hasattr(step, 'tools_by_name'):
707
+ print(f"[DASEIN][TOOL_PATCH] ✓ Step {i} has tools_by_name with {len(step.tools_by_name)} tools")
708
+ tools.extend(step.tools_by_name.values())
709
+ break
710
+ else:
711
+ print(f"[DASEIN][TOOL_PATCH] ✗ No steps")
712
+
713
+ print(f"[DASEIN][TOOL_PATCH] Total tools extracted: {len(tools)}")
714
+
715
+ return tools
716
+
717
+ def on_llm_start(
718
+ self,
719
+ serialized: Dict[str, Any],
720
+ prompts: List[str],
721
+ *,
722
+ run_id: str = None,
723
+ parent_run_id: Optional[str] = None,
724
+ tags: Optional[List[str]] = None,
725
+ metadata: Optional[Dict[str, Any]] = None,
726
+ **kwargs: Any,
727
+ ) -> None:
728
+ """Called when an LLM starts running."""
729
+ model_name = serialized.get("name", "unknown") if serialized else "unknown"
730
+
731
+ # PIPECLEANER: Intercept Summary LLM calls
732
+ tools_in_call = None
733
+ if 'invocation_params' in kwargs:
734
+ tools_in_call = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions')
735
+
736
+ if tools_in_call:
737
+ tool_names = [t.get('name') or t.get('function', {}).get('name', 'unknown') for t in tools_in_call]
738
+
739
+ if 'Summary' in tool_names:
740
+ # NOTE: Deduplication now happens in the HOT PATH (monkey-patched LLM methods)
741
+ # This callback is just for tracking, not deduplication
742
+ pass
743
+
744
+ if False and 'Summary' in tool_names: # DISABLED: Deduplication moved to hotpath
745
+ # Check if run-scoped corpus is enabled (has filter search rules)
746
+ has_filter_rules = False
747
+ if hasattr(self, '_selected_rules'):
748
+ from .pipecleaner import _find_filter_search_rules
749
+ filter_rules = _find_filter_search_rules('summary', self._selected_rules)
750
+ has_filter_rules = len(filter_rules) > 0
751
+
752
+ if not has_filter_rules:
753
+ # Silent fail - no corpus deduplication if no rules
754
+ pass
755
+ else:
756
+ # Only print when we actually have rules and will deduplicate
757
+ print(f"[CORPUS] 📥 Summary LLM detected with {len(prompts)} prompts")
758
+ # Re-entrancy guard: prevent nested calls from corrupting state
759
+ from contextvars import ContextVar
760
+ if not hasattr(DaseinCallbackHandler, '_in_corpus_processing'):
761
+ DaseinCallbackHandler._in_corpus_processing = ContextVar('in_corpus', default=False)
762
+ DaseinCallbackHandler._reentrancy_count = 0
763
+
764
+ if DaseinCallbackHandler._in_corpus_processing.get():
765
+ # Already processing corpus in this call stack, fail-open
766
+ DaseinCallbackHandler._reentrancy_count += 1
767
+ print(f"[CORPUS] ⚠️ Re-entrancy detected #{DaseinCallbackHandler._reentrancy_count}, skipping nested call")
768
+ return
769
+
770
+ # Set re-entrancy guard
771
+ token = DaseinCallbackHandler._in_corpus_processing.set(True)
772
+
773
+ try:
774
+ # Get or create run-scoped corpus
775
+ from .pipecleaner import get_or_create_corpus
776
+ import threading
777
+ corpus = get_or_create_corpus(self.run_id, verbose=self._verbose)
778
+
779
+ # Module-level lock for atomic snapshot/swap (shared across all instances)
780
+ if not hasattr(DaseinCallbackHandler, '_prompts_lock'):
781
+ DaseinCallbackHandler._prompts_lock = threading.Lock()
782
+
783
+ # STEP 1: Snapshot under lock (atomic read, NEVER iterate live dict)
784
+ with DaseinCallbackHandler._prompts_lock:
785
+ try:
786
+ snapshot = tuple(prompts) # Immutable snapshot, safe to iterate
787
+ except RuntimeError:
788
+ print(f"[CORPUS] ⚠️ Skipping (prompts being iterated)")
789
+ return
790
+
791
+ # STEP 2: Process outside lock (no contention)
792
+ cleaned_prompts = []
793
+ total_original_chars = 0
794
+ total_cleaned_chars = 0
795
+ total_original_tokens_est = 0
796
+ total_cleaned_tokens_est = 0
797
+
798
+ for i, prompt in enumerate(snapshot):
799
+ prompt_str = str(prompt)
800
+
801
+ # Skip if too short
802
+ if len(prompt_str) < 2500:
803
+ cleaned_prompts.append(prompt_str)
804
+ continue
805
+
806
+ # Track original
807
+ original_chars = len(prompt_str)
808
+ original_tokens_est = original_chars // 4 # Rough estimate: 4 chars/token
809
+ total_original_chars += original_chars
810
+ total_original_tokens_est += original_tokens_est
811
+
812
+ # Split: first 2000 chars (system prompt) + rest (content to dedupe)
813
+ system_part = prompt_str[:2000]
814
+ content_part = prompt_str[2000:]
815
+
816
+ # Generate unique prompt_id
817
+ import hashlib
818
+ prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
819
+
820
+ # Enqueue into corpus (barrier will handle batching, blocks until ready)
821
+ # Call synchronous enqueue (will block until batch is processed, then released sequentially)
822
+ deduplicated_content = corpus.enqueue_prompt(prompt_id, content_part)
823
+
824
+ # Reassemble
825
+ cleaned_prompt = system_part + deduplicated_content
826
+
827
+ # Track cleaned
828
+ cleaned_chars = len(cleaned_prompt)
829
+ cleaned_tokens_est = cleaned_chars // 4
830
+ total_cleaned_chars += cleaned_chars
831
+ total_cleaned_tokens_est += cleaned_tokens_est
832
+
833
+ reduction_pct = 100*(original_chars-cleaned_chars)//original_chars if original_chars > 0 else 0
834
+ # Always show reduction results (key metric)
835
+ print(f"[🧹 CORPUS] Prompt {prompt_id}: {original_chars} → {cleaned_chars} chars ({reduction_pct}% saved)")
836
+ cleaned_prompts.append(cleaned_prompt)
837
+
838
+ # Store token delta for later adjustment in on_llm_end
839
+ if total_original_tokens_est > 0:
840
+ tokens_saved = total_original_tokens_est - total_cleaned_tokens_est
841
+ if not hasattr(self, '_corpus_token_savings'):
842
+ self._corpus_token_savings = {}
843
+ self._corpus_token_savings[run_id] = tokens_saved
844
+ print(f"[🔬 TOKEN TRACKING] Pre-prune: {total_original_chars} chars (~{total_original_tokens_est} tokens)")
845
+ print(f"[🔬 TOKEN TRACKING] Post-prune: {total_cleaned_chars} chars (~{total_cleaned_tokens_est} tokens)")
846
+ print(f"[🔬 TOKEN TRACKING] Estimated savings: ~{tokens_saved} tokens ({100*tokens_saved//total_original_tokens_est if total_original_tokens_est > 0 else 0}%)")
847
+ print(f"[🔬 TOKEN TRACKING] Stored savings for run_id={str(run_id)[:8]} to adjust on_llm_end")
848
+
849
+ # STEP 3: Atomic swap under lock (copy-on-write, no in-place mutation)
850
+ print(f"[🔬 CORPUS DEBUG] About to swap prompts - have {len(cleaned_prompts)} cleaned prompts")
851
+ with DaseinCallbackHandler._prompts_lock:
852
+ try:
853
+ print(f"[🔬 CORPUS DEBUG] Inside lock, swapping...")
854
+ # Atomic slice assignment (replaces entire contents in one operation)
855
+ prompts[:] = cleaned_prompts
856
+ # CRITICAL: Update _last_modified_prompts so DaseinLLMWrapper sees deduplicated prompts
857
+ self._last_modified_prompts = cleaned_prompts
858
+ print(f"[🔬 CORPUS] ✅ Updated _last_modified_prompts with {len(cleaned_prompts)} deduplicated prompts")
859
+ except RuntimeError as e:
860
+ print(f"[CORPUS] ⚠️ Could not swap prompts (framework collision): {e}")
861
+ except Exception as e:
862
+ print(f"[CORPUS] ⚠️ Unexpected error swapping: {e}")
863
+ import traceback
864
+ traceback.print_exc()
865
+ finally:
866
+ # Always reset re-entrancy guard
867
+ DaseinCallbackHandler._in_corpus_processing.reset(token)
868
+
869
+ # DEBUG: Print run context
870
+ # print(f"🔧 [LLM_START DEBUG] run_id: {run_id}, parent: {parent_run_id}")
871
+
872
+ # 🎯 CRITICAL: Track current node from kwargs metadata FIRST (needed for tool extraction)
873
+ if self._is_langgraph and 'metadata' in kwargs and isinstance(kwargs['metadata'], dict):
874
+ if 'langgraph_node' in kwargs['metadata']:
875
+ node_name = kwargs['metadata']['langgraph_node']
876
+ self._current_chain_node = node_name
877
+
878
+ # CRITICAL: Extract tools incrementally from each tool-bearing call
879
+ # Tools are bound node-by-node as they're invoked
880
+ if self._is_langgraph and self._agent:
881
+ # Check if THIS call has tools (signal that THIS node's tools are now bound)
882
+ tools_in_call = None
883
+ if 'invocation_params' in kwargs:
884
+ tools_in_call = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions')
885
+ elif 'tools' in kwargs:
886
+ tools_in_call = kwargs['tools']
887
+ elif 'functions' in kwargs:
888
+ tools_in_call = kwargs['functions']
889
+
890
+ if tools_in_call:
891
+ node_name = self._current_chain_node or 'unknown'
892
+
893
+ # Extract tool names from the schemas
894
+ tool_names = []
895
+ for tool in tools_in_call:
896
+ name = tool.get('name') or tool.get('function', {}).get('name', 'unknown')
897
+ tool_names.append(name)
898
+
899
+ # print(f"🔧 [TOOLS DETECTED] Node '{node_name}' has {len(tool_names)} tools: {tool_names}") # Commented out - too noisy
900
+
901
+ # Check if we've already extracted tools for this node
902
+ existing_nodes = {t.get('node') for t in self._compiled_tools_metadata}
903
+ if node_name not in existing_nodes:
904
+ try:
905
+ # Extract tools from this specific call (provider-resolved schemas)
906
+ for tool in tools_in_call:
907
+ tool_meta = {
908
+ 'name': tool.get('name') or tool.get('function', {}).get('name', 'unknown'),
909
+ 'description': tool.get('description') or tool.get('function', {}).get('description', ''),
910
+ 'node': node_name
911
+ }
912
+
913
+ # Get args schema
914
+ if 'parameters' in tool:
915
+ tool_meta['args_schema'] = tool['parameters']
916
+ elif 'function' in tool and 'parameters' in tool['function']:
917
+ tool_meta['args_schema'] = tool['function']['parameters']
918
+ else:
919
+ tool_meta['args_schema'] = {}
920
+
921
+ self._compiled_tools_metadata.append(tool_meta)
922
+
923
+ # print(f"🔧 [TOOLS METADATA] Extracted metadata for {len(tool_names)} tools from node '{node_name}'") # Commented out - too noisy
924
+ except Exception as e:
925
+ print(f"🔧 [TOOLS ERROR] Failed to extract metadata: {e}")
926
+ pass # Silently fail
927
+ # else:
928
+ # print(f"🔧 [TOOLS SKIP] Already extracted tools for node '{node_name}'") # Commented out - too noisy
929
+
930
+ # Inject rules if applicable
931
+ modified_prompts = self._inject_rule_if_applicable("llm_start", model_name, prompts)
932
+
933
+ # Store the modified prompts for the LLM wrapper to use
934
+ self._last_modified_prompts = modified_prompts
935
+
936
+ # Note: Pipecleaner deduplication now happens at ToolExecutor level (see wrappers.py)
937
+
938
+ # 🚨 OPTIMIZED: For LangGraph, check if kwargs contains 'invocation_params' with messages
939
+ # Extract the most recent message instead of full history
940
+ # Use from_end=True to capture the END of system prompts (where user's actual query is)
941
+ if 'invocation_params' in kwargs and 'messages' in kwargs['invocation_params']:
942
+ args_excerpt = self._extract_recent_message({'messages': kwargs['invocation_params']['messages']})
943
+ else:
944
+ args_excerpt = self._excerpt(" | ".join(modified_prompts), from_end=True)
945
+
946
+ # GNN-related fields
947
+ step_index = len(self._trace)
948
+
949
+ # Track which rules triggered at this step (llm_start rules)
950
+ rule_triggered_here = []
951
+ if hasattr(self, '_selected_rules') and self._selected_rules:
952
+ for rule_meta in self._selected_rules:
953
+ if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
954
+ rule_obj, _metadata = rule_meta
955
+ else:
956
+ rule_obj = rule_meta
957
+ target_step_type = getattr(rule_obj, 'target_step_type', '')
958
+ if target_step_type in ['llm_start', 'chain_start']:
959
+ rule_triggered_here.append(getattr(rule_obj, 'id', 'unknown'))
960
+
961
+ # Record start time for duration calculation
962
+ start_time = datetime.now()
963
+ self._start_times[step_index] = start_time
964
+
965
+ step = {
966
+ "step_type": "llm_start",
967
+ "tool_name": model_name,
968
+ "args_excerpt": args_excerpt,
969
+ "outcome": "",
970
+ "ts": start_time.isoformat(),
971
+ "run_id": None,
972
+ "parent_run_id": None,
973
+ "node": self._current_chain_node, # LangGraph node name (if available)
974
+ # GNN step-level fields
975
+ "step_index": step_index,
976
+ "rule_triggered_here": rule_triggered_here,
977
+ }
978
+ self._trace.append(step)
979
+ # self._vprint(f"[DASEIN][CALLBACK] Captured llm_start: {len(_TRACE)} total steps") # Commented out - too noisy
980
+
981
+ def on_llm_end(
982
+ self,
983
+ response: Any,
984
+ **kwargs: Any,
985
+ ) -> None:
986
+ """Called when an LLM ends running."""
987
+ outcome = ""
988
+ try:
989
+ # Debug: Print ALL available data to see what we're getting
990
+ # print(f"[DEBUG] on_llm_end called")
991
+ # print(f" response type: {type(response)}")
992
+ # print(f" kwargs keys: {kwargs.keys()}")
993
+
994
+ # Try multiple extraction strategies
995
+ # Strategy 1: Standard LangChain LLMResult structure
996
+ if hasattr(response, 'generations') and response.generations:
997
+ if len(response.generations) > 0:
998
+ first_gen = response.generations[0]
999
+ if isinstance(first_gen, list) and len(first_gen) > 0:
1000
+ generation = first_gen[0]
1001
+ else:
1002
+ generation = first_gen
1003
+
1004
+ # Try multiple content fields
1005
+ if hasattr(generation, 'text') and generation.text:
1006
+ outcome = self._excerpt(generation.text)
1007
+ elif hasattr(generation, 'message'):
1008
+ if hasattr(generation.message, 'content'):
1009
+ outcome = self._excerpt(generation.message.content)
1010
+ elif hasattr(generation.message, 'text'):
1011
+ outcome = self._excerpt(generation.message.text)
1012
+ elif hasattr(generation, 'content'):
1013
+ outcome = self._excerpt(generation.content)
1014
+ else:
1015
+ outcome = self._excerpt(str(generation))
1016
+
1017
+ # Strategy 2: Check if response itself has content
1018
+ elif hasattr(response, 'content'):
1019
+ outcome = self._excerpt(response.content)
1020
+
1021
+ # Strategy 3: Check kwargs for output/response
1022
+ elif 'output' in kwargs:
1023
+ outcome = self._excerpt(str(kwargs['output']))
1024
+ elif 'result' in kwargs:
1025
+ outcome = self._excerpt(str(kwargs['result']))
1026
+
1027
+ # Fallback
1028
+ if not outcome:
1029
+ outcome = self._excerpt(str(response))
1030
+
1031
+ # Debug: Warn if still empty
1032
+ if not outcome or len(outcome) == 0:
1033
+ self._vprint(f"[DASEIN][CALLBACK] WARNING: on_llm_end got empty outcome!")
1034
+ print(f" Response: {str(response)[:1000]}")
1035
+ print(f" kwargs keys: {list(kwargs.keys())}")
1036
+
1037
+ except (AttributeError, IndexError, TypeError) as e:
1038
+ self._vprint(f"[DASEIN][CALLBACK] Error in on_llm_end: {e}")
1039
+ outcome = self._excerpt(str(response))
1040
+
1041
+ # # 🎯 PRINT FULL LLM OUTPUT (RAW, UNTRUNCATED) - COMMENTED OUT FOR TESTING
1042
+ # node_name = getattr(self, '_current_chain_node', 'agent')
1043
+ # run_number = getattr(self, '_run_number', 1)
1044
+ # print(f"\n{'='*80}")
1045
+ # print(f"[DASEIN][LLM_END] RUN {run_number} | Node: {node_name}")
1046
+ # print(f"{'='*80}")
1047
+ # print(f"FULL OUTPUT:\n{str(response)}")
1048
+ # print(f"{'='*80}\n")
1049
+
1050
+ # 🎯 CRITICAL: Extract function calls for state tracking (agent-agnostic)
1051
+ try:
1052
+ if hasattr(response, 'generations') and response.generations:
1053
+ first_gen = response.generations[0]
1054
+ if isinstance(first_gen, list) and len(first_gen) > 0:
1055
+ generation = first_gen[0]
1056
+ else:
1057
+ generation = first_gen
1058
+
1059
+ # Check for function_call in message additional_kwargs
1060
+ if hasattr(generation, 'message') and hasattr(generation.message, 'additional_kwargs'):
1061
+ func_call = generation.message.additional_kwargs.get('function_call')
1062
+ if func_call and isinstance(func_call, dict) and 'name' in func_call:
1063
+ func_name = func_call['name']
1064
+ step_num = len(self._trace)
1065
+
1066
+ # Extract arguments and create preview
1067
+ args_str = func_call.get('arguments', '')
1068
+ preview = ''
1069
+ if args_str and len(args_str) > 0:
1070
+ # Take first 100 chars as preview
1071
+ preview = args_str[:100].replace('\n', ' ').replace('\r', '')
1072
+ if len(args_str) > 100:
1073
+ preview += '...'
1074
+
1075
+ call_info = {
1076
+ 'step': step_num,
1077
+ 'ts': datetime.now().isoformat(),
1078
+ 'preview': preview
1079
+ }
1080
+
1081
+ if func_name not in self._function_calls_made:
1082
+ self._function_calls_made[func_name] = []
1083
+ self._function_calls_made[func_name].append(call_info)
1084
+
1085
+ # 🔥 HOTPATH: Set current tool name for next LLM call (which will be inside the tool)
1086
+ self._current_tool_name = func_name
1087
+
1088
+ self._vprint(f"[DASEIN][STATE] Tracked function call: {func_name} (count: {len(self._function_calls_made[func_name])})")
1089
+ except Exception as e:
1090
+ pass # Silently skip if function call extraction fails
1091
+
1092
+ # Extract token usage from response metadata
1093
+ input_tokens = 0
1094
+ output_tokens = 0
1095
+ try:
1096
+ # Try LangChain's standard llm_output field
1097
+ if hasattr(response, 'llm_output') and response.llm_output:
1098
+ llm_output = response.llm_output
1099
+ # Different providers use different field names
1100
+ if 'token_usage' in llm_output:
1101
+ usage = llm_output['token_usage']
1102
+ input_tokens = usage.get('prompt_tokens', 0) or usage.get('input_tokens', 0)
1103
+ output_tokens = usage.get('completion_tokens', 0) or usage.get('output_tokens', 0)
1104
+ elif 'usage_metadata' in llm_output:
1105
+ usage = llm_output['usage_metadata']
1106
+ input_tokens = usage.get('input_tokens', 0) or usage.get('prompt_tokens', 0)
1107
+ output_tokens = usage.get('output_tokens', 0) or usage.get('completion_tokens', 0)
1108
+
1109
+ if (input_tokens == 0 and output_tokens == 0) and hasattr(response, 'generations') and response.generations:
1110
+ first_gen = response.generations[0]
1111
+ if isinstance(first_gen, list) and len(first_gen) > 0:
1112
+ gen = first_gen[0]
1113
+ else:
1114
+ gen = first_gen
1115
+
1116
+ # Check message.usage_metadata (Google GenAI stores it here!)
1117
+ if hasattr(gen, 'message') and hasattr(gen.message, 'usage_metadata'):
1118
+ usage = gen.message.usage_metadata
1119
+ input_tokens = usage.get('input_tokens', 0)
1120
+ output_tokens = usage.get('output_tokens', 0)
1121
+
1122
+ # Fallback: Check generation_info
1123
+ elif hasattr(gen, 'generation_info') and gen.generation_info:
1124
+ gen_info = gen.generation_info
1125
+ if 'usage_metadata' in gen_info:
1126
+ usage = gen_info['usage_metadata']
1127
+ input_tokens = usage.get('prompt_token_count', 0) or usage.get('input_tokens', 0)
1128
+ output_tokens = usage.get('candidates_token_count', 0) or usage.get('output_tokens', 0)
1129
+
1130
+ # Check if we have stored savings from corpus deduplication and adjust tokens
1131
+ current_run_id = kwargs.get('run_id', None)
1132
+ if current_run_id and hasattr(self, '_corpus_token_savings') and current_run_id in self._corpus_token_savings:
1133
+ tokens_saved = self._corpus_token_savings[current_run_id]
1134
+ # Adjust input tokens to reflect deduplication savings
1135
+ if input_tokens > 0:
1136
+ # If provider count is much larger than saved estimate, LLM saw original prompts
1137
+ if abs(input_tokens - tokens_saved) >= input_tokens * 0.3:
1138
+ input_tokens = max(0, input_tokens - tokens_saved)
1139
+ # Clean up
1140
+ del self._corpus_token_savings[current_run_id]
1141
+
1142
+ # Log if we got tokens
1143
+ # if input_tokens > 0 or output_tokens > 0:
1144
+ # self._vprint(f"[DASEIN][TOKENS] Captured: {input_tokens} in, {output_tokens} out")
1145
+
1146
+ except Exception as e:
1147
+ # Print error for debugging
1148
+ self._vprint(f"[DASEIN][CALLBACK] Error extracting tokens: {e}")
1149
+ import traceback
1150
+ traceback.print_exc()
1151
+
1152
+ # GNN-related fields: compute tokens_delta
1153
+ step_index = len(self._trace)
1154
+ tokens_delta = 0
1155
+ # Find previous step with tokens_output to compute delta
1156
+ for prev_step in reversed(self._trace):
1157
+ if 'tokens_output' in prev_step and prev_step['tokens_output'] > 0:
1158
+ tokens_delta = output_tokens - prev_step['tokens_output']
1159
+ break
1160
+
1161
+ # Calculate duration_ms by matching with corresponding llm_start
1162
+ duration_ms = 0
1163
+ for i in range(len(self._trace) - 1, -1, -1):
1164
+ if self._trace[i].get('step_type') == 'llm_start':
1165
+ # Found the matching llm_start
1166
+ if i in self._start_times:
1167
+ start_time = self._start_times[i]
1168
+ end_time = datetime.now()
1169
+ duration_ms = int((end_time - start_time).total_seconds() * 1000)
1170
+ # Update the llm_start step with duration_ms
1171
+ self._trace[i]['duration_ms'] = duration_ms
1172
+ break
1173
+
1174
+ step = {
1175
+ "step_type": "llm_end",
1176
+ "tool_name": "",
1177
+ "args_excerpt": "",
1178
+ "outcome": self._excerpt(outcome, max_len=1000), # Truncate to 1000 chars
1179
+ "ts": datetime.now().isoformat(),
1180
+ "run_id": None,
1181
+ "parent_run_id": None,
1182
+ "tokens_input": input_tokens,
1183
+ "tokens_output": output_tokens,
1184
+ "node": self._current_chain_node, # LangGraph node name (if available)
1185
+ # GNN step-level fields
1186
+ "step_index": step_index,
1187
+ "tokens_delta": tokens_delta,
1188
+ "duration_ms": duration_ms,
1189
+ }
1190
+ self._trace.append(step)
1191
+
1192
+ def on_agent_action(
1193
+ self,
1194
+ action: Any,
1195
+ **kwargs: Any,
1196
+ ) -> None:
1197
+ """Called when an agent takes an action."""
1198
+ tool_name = getattr(action, 'tool', 'unknown')
1199
+ args_excerpt = self._excerpt(str(getattr(action, 'tool_input', '')))
1200
+ outcome = self._excerpt(str(getattr(action, 'log', '')))
1201
+
1202
+ step = {
1203
+ "step_type": "agent_action",
1204
+ "tool_name": tool_name,
1205
+ "args_excerpt": args_excerpt,
1206
+ "outcome": outcome,
1207
+ "ts": datetime.now().isoformat(),
1208
+ "run_id": None,
1209
+ "parent_run_id": None,
1210
+ }
1211
+ self._trace.append(step)
1212
+
1213
+ def on_agent_finish(
1214
+ self,
1215
+ finish: Any,
1216
+ **kwargs: Any,
1217
+ ) -> None:
1218
+ """Called when an agent finishes."""
1219
+ outcome = self._excerpt(str(getattr(finish, 'return_values', '')))
1220
+
1221
+ step = {
1222
+ "step_type": "agent_finish",
1223
+ "tool_name": None,
1224
+ "args_excerpt": "",
1225
+ "outcome": outcome,
1226
+ "ts": datetime.now().isoformat(),
1227
+ "run_id": None,
1228
+ "parent_run_id": None,
1229
+ }
1230
+ self._trace.append(step)
1231
+
1232
+ def on_tool_start(
1233
+ self,
1234
+ serialized: Dict[str, Any],
1235
+ input_str: str,
1236
+ *,
1237
+ run_id: str,
1238
+ parent_run_id: Optional[str] = None,
1239
+ tags: Optional[List[str]] = None,
1240
+ metadata: Optional[Dict[str, Any]] = None,
1241
+ inputs: Optional[Dict[str, Any]] = None,
1242
+ **kwargs: Any,
1243
+ ) -> None:
1244
+ """Called when a tool starts running.
1245
+
1246
+ This is where we detect and track dynamic tools that weren't
1247
+ statically attached to the agent at init time.
1248
+ """
1249
+ import time
1250
+ tool_name = serialized.get("name", "unknown") if serialized else "unknown"
1251
+
1252
+ # Track discovered tools for reporting
1253
+ if tool_name != "unknown" and tool_name not in self._discovered_tools:
1254
+ self._discovered_tools.add(tool_name)
1255
+ # Tool discovered and tracked (silently)
1256
+
1257
+ # Store tool name for later use in on_tool_end
1258
+ self._tool_name_by_run_id[run_id] = tool_name
1259
+
1260
+ # 🔥 HOTPATH: Track current tool for pipecleaner deduplication
1261
+ self._current_tool_name = tool_name
1262
+
1263
+ # Apply tool-level rule injection
1264
+ # self._vprint(f"[DASEIN][CALLBACK] on_tool_start called!") # Commented out - too noisy
1265
+ # self._vprint(f"[DASEIN][CALLBACK] Tool: {tool_name}") # Commented out - too noisy
1266
+ # self._vprint(f"[DASEIN][CALLBACK] Input: {input_str[:100]}...") # Commented out - too noisy
1267
+ # self._vprint(f"[DASEIN][APPLY] on_tool_start: selected_rules={len(self._selected_rules)}") # Commented out - too noisy
1268
+ modified_input = self._inject_tool_rule_if_applicable("tool_start", tool_name, input_str)
1269
+
1270
+ args_excerpt = self._excerpt(modified_input)
1271
+
1272
+ # GNN-related fields: capture step-level metrics
1273
+ step_index = len(self._trace)
1274
+ tool_input_chars = len(str(input_str))
1275
+
1276
+ # Track which rules triggered at this step
1277
+ rule_triggered_here = []
1278
+ if hasattr(self, '_selected_rules') and self._selected_rules:
1279
+ for rule_meta in self._selected_rules:
1280
+ if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
1281
+ rule_obj, _metadata = rule_meta
1282
+ else:
1283
+ rule_obj = rule_meta
1284
+ if getattr(rule_obj, 'target_step_type', '') == "tool_start":
1285
+ rule_triggered_here.append(getattr(rule_obj, 'id', 'unknown'))
1286
+
1287
+ # Record start time for duration calculation (keyed by run_id for tools)
1288
+ start_time = datetime.now()
1289
+ self._start_times[run_id] = start_time
1290
+
1291
+ step = {
1292
+ "step_type": "tool_start",
1293
+ "tool_name": tool_name,
1294
+ "args_excerpt": args_excerpt,
1295
+ "outcome": "",
1296
+ "ts": start_time.isoformat(),
1297
+ "run_id": run_id,
1298
+ "parent_run_id": parent_run_id,
1299
+ "node": self._current_chain_node, # LangGraph node name (if available)
1300
+ # GNN step-level fields
1301
+ "step_index": step_index,
1302
+ "tool_input_chars": tool_input_chars,
1303
+ "rule_triggered_here": rule_triggered_here,
1304
+ }
1305
+ self._trace.append(step)
1306
+
1307
+ def on_tool_end(
1308
+ self,
1309
+ output: str,
1310
+ *,
1311
+ run_id: str,
1312
+ parent_run_id: Optional[str] = None,
1313
+ tags: Optional[List[str]] = None,
1314
+ **kwargs: Any,
1315
+ ) -> Any:
1316
+ """Called when a tool ends running."""
1317
+ import time
1318
+ # Get the tool name from the corresponding tool_start
1319
+ tool_name = self._tool_name_by_run_id.get(run_id, "unknown")
1320
+
1321
+ # Handle different output types (LangGraph may pass ToolMessage objects)
1322
+ output_str = str(output)
1323
+
1324
+ # Note: Pipecleaner deduplication happens at ToolExecutor level (see wrappers.py)
1325
+
1326
+ outcome = self._excerpt(output_str)
1327
+
1328
+ # self._vprint(f"[DASEIN][CALLBACK] on_tool_end called!") # Commented out - too noisy
1329
+ # self._vprint(f"[DASEIN][CALLBACK] Tool: {tool_name}") # Commented out - too noisy
1330
+ # self._vprint(f"[DASEIN][CALLBACK] Output length: {len(output_str)} chars") # Commented out - too noisy
1331
+ # self._vprint(f"[DASEIN][CALLBACK] Outcome length: {len(outcome)} chars") # Commented out - too noisy
1332
+
1333
+ # GNN-related fields: capture tool output metrics
1334
+ step_index = len(self._trace)
1335
+ tool_output_chars = len(output_str)
1336
+
1337
+ # Estimate tool_output_items (heuristic: count lines, or rows if SQL-like)
1338
+ tool_output_items = 0
1339
+ try:
1340
+ # Try to count lines as a proxy for items
1341
+ if output_str:
1342
+ tool_output_items = output_str.count('\n') + 1
1343
+ except:
1344
+ tool_output_items = 0
1345
+
1346
+ # Calculate duration_ms using run_id to match with tool_start
1347
+ duration_ms = 0
1348
+ if run_id in self._start_times:
1349
+ start_time = self._start_times[run_id]
1350
+ end_time = datetime.now()
1351
+ duration_ms = int((end_time - start_time).total_seconds() * 1000)
1352
+ # Update the corresponding tool_start step with duration_ms
1353
+ for i in range(len(self._trace) - 1, -1, -1):
1354
+ if self._trace[i].get('step_type') == 'tool_start' and self._trace[i].get('run_id') == run_id:
1355
+ self._trace[i]['duration_ms'] = duration_ms
1356
+ break
1357
+ # Clean up start time
1358
+ del self._start_times[run_id]
1359
+
1360
+ # Extract available selectors from DOM-like output (web browse agents)
1361
+ available_selectors = None
1362
+ if tool_name in ['extract_text', 'get_elements', 'extract_hyperlinks', 'extract_content']:
1363
+ available_selectors = self._extract_semantic_selectors(output_str)
1364
+
1365
+ step = {
1366
+ "step_type": "tool_end",
1367
+ "tool_name": tool_name,
1368
+ "args_excerpt": "",
1369
+ "outcome": self._excerpt(outcome, max_len=1000), # Truncate to 1000 chars
1370
+ "ts": datetime.now().isoformat(),
1371
+ "run_id": run_id,
1372
+ "parent_run_id": parent_run_id,
1373
+ "node": self._current_chain_node, # LangGraph node name (if available)
1374
+ # GNN step-level fields
1375
+ "step_index": step_index,
1376
+ "tool_output_chars": tool_output_chars,
1377
+ "tool_output_items": tool_output_items,
1378
+ "duration_ms": duration_ms,
1379
+ }
1380
+
1381
+ # Add available_selectors only if found (keep trace light)
1382
+ if available_selectors:
1383
+ step["available_selectors"] = available_selectors
1384
+ self._trace.append(step)
1385
+
1386
+ # Clean up the stored tool name
1387
+ if run_id in self._tool_name_by_run_id:
1388
+ del self._tool_name_by_run_id[run_id]
1389
+
1390
+ # 🔥 HOTPATH: Clear current tool
1391
+ self._current_tool_name = None
1392
+
1393
+ def on_tool_error(
1394
+ self,
1395
+ error: BaseException,
1396
+ *,
1397
+ run_id: str,
1398
+ parent_run_id: Optional[str] = None,
1399
+ tags: Optional[List[str]] = None,
1400
+ **kwargs: Any,
1401
+ ) -> None:
1402
+ """Called when a tool encounters an error."""
1403
+ error_msg = self._excerpt(str(error))
1404
+
1405
+ step = {
1406
+ "step_type": "tool_error",
1407
+ "tool_name": "",
1408
+ "args_excerpt": "",
1409
+ "outcome": f"ERROR: {error_msg}",
1410
+ "ts": datetime.now().isoformat(),
1411
+ "run_id": run_id,
1412
+ "parent_run_id": parent_run_id,
1413
+ }
1414
+ self._trace.append(step)
1415
+
1416
+ def on_chain_start(
1417
+ self,
1418
+ serialized: Dict[str, Any],
1419
+ inputs: Dict[str, Any],
1420
+ **kwargs: Any,
1421
+ ) -> None:
1422
+ """Called when a chain starts running."""
1423
+ chain_name = serialized.get("name", "unknown") if serialized else "unknown"
1424
+ # self._vprint(f"[DASEIN][CALLBACK] on_chain_start called!") # Commented out - too noisy
1425
+ # self._vprint(f"[DASEIN][CALLBACK] Chain: {chain_name}") # Commented out - too noisy
1426
+
1427
+ # 🚨 OPTIMIZED: For LangGraph agents, suppress redundant chain_start events
1428
+ # LangGraph fires on_chain_start for every internal node, creating noise
1429
+ # We already capture llm_start, llm_end, tool_start, tool_end which are more meaningful
1430
+ if self._is_langgraph:
1431
+ # Track current chain node for future targeted injection
1432
+ # 🎯 CRITICAL: Extract actual node name from metadata (same as on_llm_start)
1433
+ if 'metadata' in kwargs and isinstance(kwargs['metadata'], dict):
1434
+ if 'langgraph_node' in kwargs['metadata']:
1435
+ self._current_chain_node = kwargs['metadata']['langgraph_node']
1436
+ # print(f"🔵 [NODE EXEC] {self._current_chain_node}") # Commented out - too noisy
1437
+ else:
1438
+ self._current_chain_node = chain_name
1439
+ # print(f"🔵 [NODE EXEC] {chain_name}") # Commented out - too noisy
1440
+ else:
1441
+ self._current_chain_node = chain_name
1442
+ # print(f"🔵 [NODE EXEC] {chain_name}") # Commented out - too noisy
1443
+
1444
+ # self._vprint(f"[DASEIN][CALLBACK] Suppressing redundant chain_start for LangGraph agent") # Commented out - too noisy
1445
+ # Still handle tool executors
1446
+ if chain_name in {"tools", "ToolNode", "ToolExecutor"}:
1447
+ # self._vprint(f"[DASEIN][CALLBACK] Bridging chain_start to tool_start for {chain_name}") # Commented out - too noisy
1448
+ pass
1449
+ self._handle_tool_executor_start(serialized, inputs, **kwargs)
1450
+ return
1451
+
1452
+ # For standard LangChain agents, keep chain_start events
1453
+ # Bridge to tool_start for tool executors
1454
+ if chain_name in {"tools", "ToolNode", "ToolExecutor"}:
1455
+ # self._vprint(f"[DASEIN][CALLBACK] Bridging chain_start to tool_start for {chain_name}") # Commented out - too noisy
1456
+ self._handle_tool_executor_start(serialized, inputs, **kwargs)
1457
+
1458
+ args_excerpt = self._excerpt(str(inputs))
1459
+
1460
+ # Record start time for duration calculation
1461
+ step_index = len(self._trace)
1462
+ start_time = datetime.now()
1463
+ self._start_times[f"chain_{step_index}"] = start_time
1464
+
1465
+ step = {
1466
+ "step_type": "chain_start",
1467
+ "tool_name": chain_name,
1468
+ "args_excerpt": args_excerpt,
1469
+ "outcome": "",
1470
+ "ts": start_time.isoformat(),
1471
+ "run_id": None,
1472
+ "parent_run_id": None,
1473
+ "step_index": step_index,
1474
+ }
1475
+ self._trace.append(step)
1476
+
1477
+ def on_chain_end(
1478
+ self,
1479
+ outputs: Dict[str, Any],
1480
+ **kwargs: Any,
1481
+ ) -> None:
1482
+ """Called when a chain ends running."""
1483
+ # 🚨 OPTIMIZED: Suppress redundant chain_end for LangGraph agents
1484
+ if self._is_langgraph:
1485
+ return
1486
+
1487
+ outcome = self._excerpt(str(outputs))
1488
+
1489
+ # Calculate duration_ms by matching with corresponding chain_start
1490
+ duration_ms = 0
1491
+ for i in range(len(self._trace) - 1, -1, -1):
1492
+ if self._trace[i].get('step_type') == 'chain_start':
1493
+ # Found the matching chain_start
1494
+ chain_key = f"chain_{i}"
1495
+ if chain_key in self._start_times:
1496
+ start_time = self._start_times[chain_key]
1497
+ end_time = datetime.now()
1498
+ duration_ms = int((end_time - start_time).total_seconds() * 1000)
1499
+ # Update the chain_start step with duration_ms
1500
+ self._trace[i]['duration_ms'] = duration_ms
1501
+ # Clean up start time
1502
+ del self._start_times[chain_key]
1503
+ break
1504
+
1505
+ step = {
1506
+ "step_type": "chain_end",
1507
+ "tool_name": "",
1508
+ "args_excerpt": "",
1509
+ "outcome": outcome,
1510
+ "ts": datetime.now().isoformat(),
1511
+ "run_id": None,
1512
+ "parent_run_id": None,
1513
+ "duration_ms": duration_ms,
1514
+ }
1515
+ self._trace.append(step)
1516
+
1517
+ def on_chain_error(
1518
+ self,
1519
+ error: BaseException,
1520
+ **kwargs: Any,
1521
+ ) -> None:
1522
+ """Called when a chain encounters an error."""
1523
+ error_msg = self._excerpt(str(error))
1524
+
1525
+ step = {
1526
+ "step_type": "chain_error",
1527
+ "tool_name": "",
1528
+ "args_excerpt": "",
1529
+ "outcome": f"ERROR: {error_msg}",
1530
+ "ts": datetime.now().isoformat(),
1531
+ "run_id": None,
1532
+ "parent_run_id": None,
1533
+ }
1534
+ self._trace.append(step)
1535
+
1536
+ def _extract_recent_message(self, inputs: Dict[str, Any]) -> str:
1537
+ """
1538
+ Extract the most recent message from LangGraph inputs to show thought progression.
1539
+
1540
+ For LangGraph agents, inputs contain {'messages': [msg1, msg2, ...]}.
1541
+ Instead of showing the entire history, we extract just the last message.
1542
+ """
1543
+ try:
1544
+ # Check if this is a LangGraph message format
1545
+ if isinstance(inputs, dict) and 'messages' in inputs:
1546
+ messages = inputs['messages']
1547
+ if isinstance(messages, list) and len(messages) > 0:
1548
+ # Get the most recent message
1549
+ last_msg = messages[-1]
1550
+
1551
+ # Extract content based on message type
1552
+ if hasattr(last_msg, 'content'):
1553
+ # LangChain message object
1554
+ content = last_msg.content
1555
+ msg_type = getattr(last_msg, 'type', 'unknown')
1556
+ return self._excerpt(f"[{msg_type}] {content}")
1557
+ elif isinstance(last_msg, tuple) and len(last_msg) >= 2:
1558
+ # Tuple format: (role, content)
1559
+ return self._excerpt(f"[{last_msg[0]}] {last_msg[1]}")
1560
+ else:
1561
+ # Unknown format, convert to string
1562
+ return self._excerpt(str(last_msg))
1563
+
1564
+ # For non-message inputs, check if it's a list of actions/tool calls
1565
+ if isinstance(inputs, list) and len(inputs) > 0:
1566
+ # This might be tool call info
1567
+ return self._excerpt(str(inputs[0]))
1568
+
1569
+ # Fall back to original behavior for non-LangGraph agents
1570
+ return self._excerpt(str(inputs))
1571
+
1572
+ except Exception as e:
1573
+ # On any error, fall back to original behavior
1574
+ return self._excerpt(str(inputs))
1575
+
1576
+ def _excerpt(self, obj: Any, max_len: int = 250, from_end: bool = False) -> str:
1577
+ """
1578
+ Truncate text to max_length with ellipsis.
1579
+
1580
+ Args:
1581
+ obj: Object to convert to string and truncate
1582
+ max_len: Maximum length of excerpt
1583
+ from_end: If True, take LAST max_len chars (better for system prompts).
1584
+ If False, take FIRST max_len chars (better for tool args).
1585
+ """
1586
+ text = str(obj)
1587
+ if len(text) <= max_len:
1588
+ return text
1589
+
1590
+ if from_end:
1591
+ # Take last X chars - better for system prompts where the end contains user's actual query
1592
+ return "..." + text[-(max_len-3):]
1593
+ else:
1594
+ # Take first X chars - better for tool inputs
1595
+ return text[:max_len-3] + "..."
1596
+
1597
+ def _extract_semantic_selectors(self, html_text: str) -> List[Dict[str, int]]:
1598
+ """
1599
+ Extract semantic HTML tags from output for grounding web browse rules.
1600
+ Only extracts semantic tags (nav, header, h1, etc.) to keep trace lightweight.
1601
+
1602
+ Args:
1603
+ html_text: Output text that may contain HTML
1604
+
1605
+ Returns:
1606
+ List of {"tag": str, "count": int} sorted by count descending, or None if no HTML
1607
+ """
1608
+ import re
1609
+
1610
+ # Quick check: does this look like HTML?
1611
+ if '<' not in html_text or '>' not in html_text:
1612
+ return None
1613
+
1614
+ # Semantic tags we care about (prioritized for web browse agents)
1615
+ semantic_tags = [
1616
+ # Navigation/Structure (highest priority)
1617
+ 'nav', 'header', 'footer', 'main', 'article', 'section', 'aside',
1618
+
1619
+ # Headers (critical for "find headers" queries!)
1620
+ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
1621
+
1622
+ # Interactive
1623
+ 'a', 'button', 'form', 'input', 'textarea', 'select', 'label',
1624
+
1625
+ # Lists (often used for navigation)
1626
+ 'ul', 'ol', 'li',
1627
+
1628
+ # Tables (data extraction)
1629
+ 'table', 'thead', 'tbody', 'tr', 'th', 'td',
1630
+
1631
+ # Media
1632
+ 'img', 'video', 'audio'
1633
+ ]
1634
+
1635
+ # Count occurrences of each semantic tag
1636
+ found_tags = {}
1637
+ for tag in semantic_tags:
1638
+ # Pattern: <tag ...> or <tag> (opening tags only)
1639
+ pattern = f'<{tag}[\\s>]'
1640
+ matches = re.findall(pattern, html_text, re.IGNORECASE)
1641
+ if matches:
1642
+ found_tags[tag] = len(matches)
1643
+
1644
+ # Return None if no semantic tags found
1645
+ if not found_tags:
1646
+ return None
1647
+
1648
+ # Convert to list format, sorted by count descending
1649
+ # Limit to top 15 to keep trace light
1650
+ result = [{"tag": tag, "count": count}
1651
+ for tag, count in sorted(found_tags.items(), key=lambda x: -x[1])]
1652
+ return result[:15] # Top 15 most common tags
1653
+
1654
+ def set_selected_rules(self, rules: List[Dict[str, Any]]):
1655
+ """Set the rules selected for this run.
1656
+ Normalize incoming dicts/tuples into attribute-accessible rule objects.
1657
+ """
1658
+ try:
1659
+ from types import SimpleNamespace
1660
+ normalized = []
1661
+ for item in rules or []:
1662
+ # Unwrap (rule, metadata) tuples if present
1663
+ if isinstance(item, tuple) and len(item) == 2:
1664
+ rule_candidate = item[0]
1665
+ else:
1666
+ rule_candidate = item
1667
+ # Convert dicts to attribute-accessible objects
1668
+ if isinstance(rule_candidate, dict):
1669
+ # Ensure advice_text exists
1670
+ if 'advice_text' not in rule_candidate and 'advice' in rule_candidate:
1671
+ rule_candidate['advice_text'] = rule_candidate.get('advice')
1672
+ normalized.append(SimpleNamespace(**rule_candidate))
1673
+ else:
1674
+ normalized.append(rule_candidate)
1675
+ self._selected_rules = normalized
1676
+ except Exception:
1677
+ # Fallback to raw rules
1678
+ self._selected_rules = rules
1679
+
1680
+ def get_trace(self) -> List[Dict[str, Any]]:
1681
+ """Get the current trace (instance-level, thread-safe)."""
1682
+ return self._trace.copy()
1683
+
1684
+ def _inject_first_turn_override(self, prompts: List[str]) -> List[str]:
1685
+ """Inject a generic first-turn override to own turn 1."""
1686
+ if not prompts:
1687
+ return prompts
1688
+
1689
+ # Create a generic first-turn override
1690
+ first_turn_override = """🚨🚨🚨 CRITICAL SYSTEM DIRECTIVE 🚨🚨🚨
1691
+ ⚠️ MANDATORY: You MUST follow this exact sequence or the task will FAIL
1692
+
1693
+ TURN 1 REQUIREMENT:
1694
+ - Output ONLY: Action: sql_db_list_tables
1695
+ Action Input: ACK_RULES:[r1]
1696
+ - Do NOT use any other tools
1697
+ - Do NOT perform any planning
1698
+ - Do NOT output anything else
1699
+
1700
+ TURN 2+ (After ACK):
1701
+ - If ACK was correct, proceed with normal tools and schema
1702
+ - Skip table discovery and schema introspection
1703
+ - Use known tables directly
1704
+
1705
+ 🚨 FAILURE TO ACK IN TURN 1 = IMMEDIATE TASK TERMINATION 🚨
1706
+
1707
+ """
1708
+
1709
+ # Put the injection at the VERY BEGINNING of the system prompt
1710
+ modified_prompts = prompts.copy()
1711
+ if modified_prompts:
1712
+ modified_prompts[0] = first_turn_override + modified_prompts[0]
1713
+
1714
+ self._vprint(f"[DASEIN][APPLY] Injected first-turn override")
1715
+ return modified_prompts
1716
+
1717
+ def _should_inject_rule(self, step_type: str, tool_name: str) -> bool:
1718
+ """Determine if we should inject a rule at this step."""
1719
+ # Inject for LLM starts (system-level rules) and tool starts (tool-level rules)
1720
+ if step_type == "llm_start":
1721
+ return True
1722
+ if step_type == "tool_start":
1723
+ return True
1724
+ return False
1725
+
1726
+ def _inject_rule_if_applicable(self, step_type: str, tool_name: str, prompts: List[str]) -> List[str]:
1727
+ """Inject rules into prompts if applicable."""
1728
+
1729
+ if not self._should_inject_rule(step_type, tool_name):
1730
+ return prompts
1731
+
1732
+ # If no rules selected yet, return prompts unchanged
1733
+ if not self._selected_rules:
1734
+ return prompts
1735
+
1736
+ # Check guard to prevent duplicate injection
1737
+ # 🎯 CRITICAL: For LangGraph planning nodes, SKIP the guard - we need to inject on EVERY call
1738
+ # because the same node (e.g., supervisor) can be called multiple times dynamically
1739
+ use_guard = True
1740
+ if hasattr(self, '_is_langgraph') and self._is_langgraph:
1741
+ if step_type == 'llm_start' and hasattr(self, '_current_chain_node'):
1742
+ # For planning nodes, skip guard to allow re-injection on subsequent calls
1743
+ if hasattr(self, '_planning_nodes') and self._current_chain_node in self._planning_nodes:
1744
+ use_guard = False
1745
+
1746
+ if use_guard:
1747
+ guard_key = (step_type, tool_name)
1748
+ if guard_key in self._injection_guard:
1749
+ return prompts
1750
+
1751
+ try:
1752
+ # Inject rules that target llm_start and tool_start (both go to system prompt)
1753
+ system_rules = []
1754
+ for rule_meta in self._selected_rules:
1755
+ # Handle tuple format from select_rules: (rule, metadata)
1756
+ if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
1757
+ rule, metadata = rule_meta
1758
+ elif isinstance(rule_meta, dict):
1759
+ if 'rule' in rule_meta:
1760
+ rule = rule_meta.get('rule', {})
1761
+ else:
1762
+ rule = rule_meta
1763
+ else:
1764
+ rule = rule_meta
1765
+
1766
+ # Check if this rule targets system-level injection (llm_start only)
1767
+ target_step_type = getattr(rule, 'target_step_type', '')
1768
+
1769
+ # 🚨 CRITICAL: For LangGraph agents, only skip planning rules if agent was successfully recreated
1770
+ # If recreation failed, we need to inject via callback as fallback
1771
+ if step_type == 'llm_start' and hasattr(self, '_is_langgraph') and self._is_langgraph:
1772
+ # Only skip if agent was actually recreated with planning rules embedded
1773
+ if hasattr(self, '_agent_was_recreated') and self._agent_was_recreated:
1774
+ if target_step_type in ['llm_start', 'chain_start']:
1775
+ self._vprint(f"[DASEIN][CALLBACK] Skipping planning rule {getattr(rule, 'id', 'unknown')} for LangGraph agent (already injected at creation)")
1776
+ continue
1777
+
1778
+ # 🎯 NODE-SCOPED INJECTION: Check target_node if specified (for node-specific rules)
1779
+ if target_step_type in ['llm_start', 'chain_start']:
1780
+ current_node = getattr(self, '_current_chain_node', None)
1781
+
1782
+ # Check if this rule targets a specific node
1783
+ target_node = getattr(rule, 'target_node', None)
1784
+ if target_node:
1785
+ # Rule has explicit target_node - ONLY inject if we're in that node
1786
+ if current_node != target_node:
1787
+ # Silently skip - not the target node
1788
+ continue
1789
+ else:
1790
+ # No target_node specified - use existing planning_nodes logic (backward compatibility)
1791
+ if hasattr(self, '_planning_nodes') and self._planning_nodes:
1792
+ # Check if current node is in the planning nodes set
1793
+ if current_node not in self._planning_nodes:
1794
+ # Silently skip non-planning nodes
1795
+ continue
1796
+ # Injecting into planning node (logged in detailed injection log below)
1797
+
1798
+ advice = getattr(rule, 'advice_text', getattr(rule, 'advice', ''))
1799
+ if advice:
1800
+ system_rules.append(advice)
1801
+
1802
+ # Apply system-level rules if any
1803
+ if system_rules and prompts:
1804
+ modified_prompts = prompts.copy()
1805
+ system_prompt = modified_prompts[0]
1806
+
1807
+ # Combine all system rules with much stronger language
1808
+ rule_injections = []
1809
+ for advice in system_rules:
1810
+ if "TOOL RULE:" in advice:
1811
+ # Make tool rules even more explicit
1812
+ rule_injections.append(f"🚨 CRITICAL TOOL OVERRIDE: {advice}")
1813
+ else:
1814
+ rule_injections.append(f"🚨 CRITICAL SYSTEM OVERRIDE: {advice}")
1815
+
1816
+ # Build execution state context (agent-agnostic, with argument previews)
1817
+ # Strategy: Show all if ≤5 calls, else show most recent 3
1818
+ # Rationale: Small counts get full context; larger counts show recent to prevent duplicates
1819
+ state_context = ""
1820
+ if hasattr(self, '_function_calls_made') and self._function_calls_made:
1821
+ state_lines = []
1822
+ for func_name in sorted(self._function_calls_made.keys()):
1823
+ calls = self._function_calls_made[func_name]
1824
+ count = len(calls)
1825
+
1826
+ # Hybrid window: show all if ≤5 calls, else show recent 3
1827
+ if count <= 5:
1828
+ # Show all calls with previews
1829
+ state_lines.append(f" • {func_name}: called {count}x:")
1830
+ for call in calls:
1831
+ preview = call.get('preview', '')
1832
+ if preview:
1833
+ state_lines.append(f" [step {call['step']}] {preview}")
1834
+ else:
1835
+ state_lines.append(f" [step {call['step']}] (no args)")
1836
+ else:
1837
+ # Show summary + recent 3 with previews
1838
+ state_lines.append(f" • {func_name}: called {count}x (most recent 3):")
1839
+ for call in calls[-3:]:
1840
+ preview = call.get('preview', '')
1841
+ if preview:
1842
+ state_lines.append(f" [step {call['step']}] {preview}")
1843
+ else:
1844
+ state_lines.append(f" [step {call['step']}] (no args)")
1845
+
1846
+ if state_lines:
1847
+ state_context = f"""
1848
+ EXECUTION STATE (functions called so far in this run):
1849
+ {chr(10).join(state_lines)}
1850
+
1851
+ """
1852
+
1853
+ combined_injection = f""" SYSTEM OVERRIDE — PLANNING TURN ONLY
1854
+ These rules OVERRIDE all defaults. You MUST enforce them exactly or the task FAILS.
1855
+
1856
+ Tags: AVOID (absolute ban), SKIP (force bypass), FIX (mandatory params), PREFER (ranked choice), HINT (optional).
1857
+ Precedence: AVOID/SKIP > FIX > PREFER > HINT. On conflict, the higher rule ALWAYS wins.
1858
+
1859
+ {state_context}Checklist (non-negotiable):
1860
+ - AVOID: no banned targets under ANY condition.
1861
+ - SKIP: bypass skipped steps/tools; NEVER retry them.
1862
+ - FIX: all required params/settings MUST be included.
1863
+ - PREFER: when multiple compliant options exist, choose the preferred—NO exceptions.
1864
+ - Recovery: if a banned/skipped item already failed, IMMEDIATELY switch to a compliant alternative.
1865
+
1866
+ Output Contract: Produce ONE compliant tool/function call (or direct answer if none is needed).
1867
+ NO reasoning, NO justification, NO markdown.
1868
+
1869
+ Rules to Enforce:
1870
+
1871
+
1872
+ {chr(10).join(rule_injections)}
1873
+
1874
+
1875
+ """
1876
+ # Put the injection at the VERY BEGINNING of the system prompt
1877
+ modified_prompts[0] = combined_injection + system_prompt
1878
+
1879
+ # Add to guard (only if we're using the guard)
1880
+ if use_guard:
1881
+ self._injection_guard.add(guard_key)
1882
+
1883
+ # Log the complete injection for debugging
1884
+ # Compact injection summary
1885
+ if hasattr(self, '_is_langgraph') and self._is_langgraph:
1886
+ # LangGraph: show node name
1887
+ func_count = len(self._function_calls_made) if hasattr(self, '_function_calls_made') and state_context else 0
1888
+ node_name = getattr(self, '_current_chain_node', 'unknown')
1889
+ print(f"[DASEIN] 🎯 Injecting {len(system_rules)} rule(s) into {node_name} | State: {func_count} functions tracked")
1890
+ else:
1891
+ # LangChain: simpler logging without node name
1892
+ print(f"[DASEIN] 🎯 Injecting {len(system_rules)} rule(s) into agent")
1893
+
1894
+ return modified_prompts
1895
+
1896
+ except Exception as e:
1897
+ self._vprint(f"[DASEIN][APPLY] Injection failed: {e}")
1898
+
1899
+ return prompts
1900
+
1901
+ def _inject_tool_rule_if_applicable(self, step_type: str, tool_name: str, input_str: str) -> str:
1902
+ """Inject rules into tool input if applicable."""
1903
+ if not self._should_inject_rule(step_type, tool_name):
1904
+ return input_str
1905
+
1906
+ # If no rules selected yet, return input unchanged
1907
+ if not self._selected_rules:
1908
+ return input_str
1909
+
1910
+ # Check guard to prevent duplicate injection
1911
+ guard_key = (step_type, tool_name)
1912
+ if guard_key in self._injection_guard:
1913
+ return input_str
1914
+
1915
+ try:
1916
+ # Inject rules that target tool_start
1917
+ tool_rules = []
1918
+ current_node = getattr(self, '_current_chain_node', None)
1919
+
1920
+ for rule_meta in self._selected_rules:
1921
+ # Handle tuple format from select_rules: (rule, metadata)
1922
+ if isinstance(rule_meta, tuple) and len(rule_meta) == 2:
1923
+ rule, metadata = rule_meta
1924
+ else:
1925
+ rule = rule_meta
1926
+ metadata = {}
1927
+
1928
+ # Only apply rules that target tool_start
1929
+ if rule.target_step_type == "tool_start":
1930
+ # 🎯 NODE-SCOPED INJECTION: Check target_node if specified
1931
+ target_node = getattr(rule, 'target_node', None)
1932
+ if target_node:
1933
+ # Rule has explicit target_node - ONLY inject if we're in that node
1934
+ if current_node != target_node:
1935
+ # Silently skip - not the target node
1936
+ continue
1937
+ # No target_node specified - inject into any node using this tool (backward compat)
1938
+
1939
+ tool_rules.append(rule)
1940
+ self._vprint(f"[DASEIN][APPLY] Tool rule: {rule.advice_text[:100]}...")
1941
+
1942
+ if tool_rules:
1943
+ # Apply tool-level rule injection
1944
+ modified_input = self._apply_tool_rules(input_str, tool_rules)
1945
+ self._injection_guard.add(guard_key)
1946
+ return modified_input
1947
+ else:
1948
+ return input_str
1949
+
1950
+ except Exception as e:
1951
+ self._vprint(f"[DASEIN][APPLY] Error injecting tool rules: {e}")
1952
+ return input_str
1953
+
1954
+ def _apply_tool_rules(self, input_str: str, rules: List) -> str:
1955
+ """Apply tool-level rules to modify the input string."""
1956
+ modified_input = input_str
1957
+
1958
+ for rule in rules:
1959
+ try:
1960
+ # Apply the rule's advice to modify the tool input
1961
+ if "strip" in rule.advice_text.lower() and "fence" in rule.advice_text.lower():
1962
+ # Strip markdown code fences
1963
+ import re
1964
+ # Remove ```sql...``` or ```...``` patterns
1965
+ modified_input = re.sub(r'```(?:sql)?\s*(.*?)\s*```', r'\1', modified_input, flags=re.DOTALL)
1966
+ self._vprint(f"[DASEIN][APPLY] Stripped code fences from tool input")
1967
+ elif "strip" in rule.advice_text.lower() and "whitespace" in rule.advice_text.lower():
1968
+ # Strip leading/trailing whitespace
1969
+ modified_input = modified_input.strip()
1970
+ self._vprint(f"[DASEIN][APPLY] Stripped whitespace from tool input")
1971
+ # Add more rule types as needed
1972
+
1973
+ except Exception as e:
1974
+ self._vprint(f"[DASEIN][APPLY] Error applying tool rule: {e}")
1975
+ continue
1976
+
1977
+ return modified_input
1978
+
1979
+ def _handle_tool_executor_start(
1980
+ self,
1981
+ serialized: Dict[str, Any],
1982
+ inputs: Dict[str, Any],
1983
+ **kwargs: Any,
1984
+ ) -> None:
1985
+ """Handle tool executor start - bridge from chain_start to tool_start."""
1986
+ self._vprint(f"[DASEIN][CALLBACK] tool_start (from chain_start)")
1987
+
1988
+ # Extract tool information from inputs
1989
+ tool_name = "unknown"
1990
+ tool_input = ""
1991
+
1992
+ if isinstance(inputs, dict):
1993
+ if "tool" in inputs:
1994
+ tool_name = inputs["tool"]
1995
+ elif "tool_name" in inputs:
1996
+ tool_name = inputs["tool_name"]
1997
+
1998
+ if "tool_input" in inputs:
1999
+ tool_input = str(inputs["tool_input"])
2000
+ elif "input" in inputs:
2001
+ tool_input = str(inputs["input"])
2002
+ else:
2003
+ tool_input = str(inputs)
2004
+ else:
2005
+ tool_input = str(inputs)
2006
+
2007
+ self._vprint(f"[DASEIN][CALLBACK] Tool: {tool_name}")
2008
+ self._vprint(f"[DASEIN][CALLBACK] Input: {tool_input[:100]}...")
2009
+
2010
+ # Check if we have tool_start rules that cover this tool
2011
+ tool_rules = [rule for rule in self._selected_rules if rule.target_step_type == "tool_start"]
2012
+ covered_rules = [rule for rule in tool_rules if self._rule_covers_tool(rule, tool_name, tool_input)]
2013
+
2014
+ if covered_rules:
2015
+ self._vprint(f"[DASEIN][APPLY] tool_start: {len(covered_rules)} rules cover this tool call")
2016
+ # Fire micro-turn for rule application
2017
+ modified_input = self._fire_micro_turn_for_tool_rules(covered_rules, tool_name, tool_input)
2018
+ else:
2019
+ self._vprint(f"[DASEIN][APPLY] tool_start: no rules cover this tool call")
2020
+ modified_input = tool_input
2021
+
2022
+ args_excerpt = self._excerpt(modified_input)
2023
+
2024
+ step = {
2025
+ "step_type": "tool_start",
2026
+ "tool_name": tool_name,
2027
+ "args_excerpt": args_excerpt,
2028
+ "outcome": "",
2029
+ "ts": datetime.now().isoformat(),
2030
+ "run_id": kwargs.get("run_id"),
2031
+ "parent_run_id": kwargs.get("parent_run_id"),
2032
+ }
2033
+ self._trace.append(step)
2034
+
2035
+ def _rule_covers_tool(self, rule, tool_name: str, tool_input: str) -> bool:
2036
+ """Check if a rule covers the given tool call."""
2037
+ try:
2038
+ # Check if rule references this tool
2039
+ if hasattr(rule, 'references') and rule.references:
2040
+ if hasattr(rule.references, 'tools') and rule.references.tools:
2041
+ if tool_name not in rule.references.tools:
2042
+ return False
2043
+
2044
+ # Check trigger patterns if they exist
2045
+ if hasattr(rule, 'trigger_pattern') and rule.trigger_pattern:
2046
+ # For now, assume all tool_start rules cover their referenced tools
2047
+ # This can be made more sophisticated later
2048
+ pass
2049
+
2050
+ return True
2051
+ except Exception as e:
2052
+ self._vprint(f"[DASEIN][COVERAGE] Error checking rule coverage: {e}")
2053
+ return False
2054
+
2055
+ def _fire_micro_turn_for_tool_rules(self, rules, tool_name: str, tool_input: str) -> str:
2056
+ """Fire a micro-turn LLM call to apply tool rules."""
2057
+ try:
2058
+ # Use the first rule for now (can be extended to handle multiple rules)
2059
+ rule = rules[0]
2060
+ rule_id = getattr(rule, 'id', 'unknown')
2061
+
2062
+ self._vprint(f"[DASEIN][MICROTURN] rule_id={rule_id} tool={tool_name}")
2063
+
2064
+ # Create micro-turn prompt
2065
+ micro_turn_prompt = self._create_micro_turn_prompt(rule, tool_name, tool_input)
2066
+
2067
+ # Fire actual micro-turn LLM call
2068
+ modified_input = self._execute_micro_turn_llm_call(micro_turn_prompt, tool_input)
2069
+
2070
+ # Store the modified input for retrieval during tool execution
2071
+ input_key = f"{tool_name}:{hash(tool_input)}"
2072
+ _MODIFIED_TOOL_INPUTS[input_key] = modified_input
2073
+
2074
+ self._vprint(f"[DASEIN][MICROTURN] Applied rule {rule_id}: {str(tool_input)[:50]}... -> {str(modified_input)[:50]}...")
2075
+
2076
+ return modified_input
2077
+
2078
+ except Exception as e:
2079
+ self._vprint(f"[DASEIN][MICROTURN] Error in micro-turn: {e}")
2080
+ return tool_input
2081
+
2082
+ def _create_micro_turn_prompt(self, rule, tool_name: str, tool_input: str) -> str:
2083
+ """Create the micro-turn prompt for rule application."""
2084
+ advice = getattr(rule, 'advice', '')
2085
+ return f"""Apply this rule to the tool input:
2086
+
2087
+ Rule: {advice}
2088
+ Tool: {tool_name}
2089
+ Current Input: {tool_input}
2090
+
2091
+ Output only the corrected tool input:"""
2092
+
2093
+ def _execute_micro_turn_llm_call(self, prompt: str, original_input: str) -> str:
2094
+ """Execute the actual micro-turn LLM call."""
2095
+ try:
2096
+ if not self._llm:
2097
+ self._vprint(f"[DASEIN][MICROTURN] No LLM available for micro-turn call")
2098
+ return original_input
2099
+
2100
+ self._vprint(f"[DASEIN][MICROTURN] Executing micro-turn LLM call")
2101
+ self._vprint(f"[DASEIN][MICROTURN] Prompt: {prompt[:200]}...")
2102
+
2103
+ # Make the micro-turn LLM call
2104
+ # Create a simple message list for the LLM
2105
+ messages = [{"role": "user", "content": prompt}]
2106
+
2107
+ # Call the LLM
2108
+ response = self._llm.invoke(messages)
2109
+
2110
+ # Extract the response content
2111
+ if hasattr(response, 'content'):
2112
+ modified_input = response.content.strip()
2113
+ elif isinstance(response, str):
2114
+ modified_input = response.strip()
2115
+ else:
2116
+ modified_input = str(response).strip()
2117
+
2118
+ self._vprint(f"[DASEIN][MICROTURN] LLM response: {modified_input[:100]}...")
2119
+
2120
+ # 🚨 CRITICAL: Parse JSON responses with markdown fences
2121
+ if modified_input.startswith('```json') or modified_input.startswith('```'):
2122
+ try:
2123
+ # Extract JSON from markdown fences
2124
+ import re
2125
+ import json
2126
+ json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', modified_input, re.DOTALL)
2127
+ if json_match:
2128
+ json_str = json_match.group(1)
2129
+ parsed_json = json.loads(json_str)
2130
+ # Convert back to the expected format
2131
+ if isinstance(parsed_json, dict) and 'name' in parsed_json and 'args' in parsed_json:
2132
+ modified_input = parsed_json
2133
+ self._vprint(f"[DASEIN][MICROTURN] Parsed JSON from markdown fences: {parsed_json}")
2134
+ else:
2135
+ self._vprint(f"[DASEIN][MICROTURN] JSON doesn't have expected structure, using as-is")
2136
+ else:
2137
+ self._vprint(f"[DASEIN][MICROTURN] Could not extract JSON from markdown fences")
2138
+ except Exception as e:
2139
+ self._vprint(f"[DASEIN][MICROTURN] Error parsing JSON: {e}")
2140
+
2141
+ # Validate the response - only fallback if completely empty
2142
+ if not modified_input:
2143
+ self._vprint(f"[DASEIN][MICROTURN] LLM response empty, using original input")
2144
+ return original_input
2145
+
2146
+ return modified_input
2147
+
2148
+ except Exception as e:
2149
+ self._vprint(f"[DASEIN][MICROTURN] Error executing micro-turn LLM call: {e}")
2150
+ return original_input
2151
+
2152
+
2153
+ def get_trace() -> List[Dict[str, Any]]:
2154
+ """
2155
+ DEPRECATED: Legacy function for backward compatibility.
2156
+ Get the current trace from active CognateProxy instances.
2157
+
2158
+ Returns:
2159
+ List of trace step dictionaries (empty if no active traces)
2160
+ """
2161
+ # Try to get trace from active CognateProxy instances
2162
+ try:
2163
+ import gc
2164
+ for obj in gc.get_objects():
2165
+ if hasattr(obj, '_last_run_trace') and obj._last_run_trace:
2166
+ return obj._last_run_trace.copy()
2167
+ if hasattr(obj, '_callback_handler') and hasattr(obj._callback_handler, '_trace'):
2168
+ return obj._callback_handler._trace.copy()
2169
+ except Exception:
2170
+ pass
2171
+
2172
+ return [] # Return empty list if no trace found
2173
+
2174
+
2175
+ def get_modified_tool_input(tool_name: str, original_input: str) -> str:
2176
+ """
2177
+ Get the modified tool input if it exists.
2178
+
2179
+ Args:
2180
+ tool_name: Name of the tool
2181
+ original_input: Original tool input
2182
+
2183
+ Returns:
2184
+ Modified tool input if available, otherwise original input
2185
+ """
2186
+ input_key = f"{tool_name}:{hash(original_input)}"
2187
+ return _MODIFIED_TOOL_INPUTS.get(input_key, original_input)
2188
+
2189
+
2190
+ def clear_modified_tool_inputs():
2191
+ """Clear all modified tool inputs."""
2192
+ global _MODIFIED_TOOL_INPUTS
2193
+ _MODIFIED_TOOL_INPUTS.clear()
2194
+
2195
+
2196
+ def clear_trace() -> None:
2197
+ """
2198
+ DEPRECATED: Legacy function for backward compatibility.
2199
+ Clear traces in active CognateProxy instances.
2200
+ """
2201
+ # Try to clear traces in active CognateProxy instances
2202
+ try:
2203
+ import gc
2204
+ for obj in gc.get_objects():
2205
+ if hasattr(obj, '_callback_handler') and hasattr(obj._callback_handler, 'reset_run_state'):
2206
+ obj._callback_handler.reset_run_state()
2207
+ except Exception:
2208
+ pass # Ignore if not available
2209
+
2210
+
2211
+ def print_trace(max_chars: int = 240, only: tuple[str, ...] | None = None, suppress: tuple[str, ...] = ("chain_end",), show_tree: bool = True, show_summary: bool = True) -> None:
2212
+ """
2213
+ Print a compact fixed-width table of the trace with tree-like view and filtering.
2214
+
2215
+ Args:
2216
+ max_chars: Maximum characters per line (default 240)
2217
+ only: Filter by step_type if provided (e.g., ("llm_start", "llm_end"))
2218
+ suppress: Suppress any step_type in this tuple (default: ("chain_end",))
2219
+ show_tree: If True, left-pad args_excerpt by 2*depth spaces for tree-like view
2220
+ show_summary: If True, show step_type counts and deduped rows summary
2221
+ """
2222
+ # Try to get trace from active CognateProxy instances
2223
+ trace = None
2224
+ try:
2225
+ # Import here to avoid circular imports
2226
+ from dasein.api import _global_cognate_proxy
2227
+ if _global_cognate_proxy and hasattr(_global_cognate_proxy, '_wrapped_llm') and _global_cognate_proxy._wrapped_llm:
2228
+ trace = _global_cognate_proxy._wrapped_llm.get_trace()
2229
+ except:
2230
+ pass
2231
+
2232
+ if not trace:
2233
+ trace = get_trace() # Use the updated get_trace() function
2234
+
2235
+ # If global trace is empty, try to get it from the last completed run
2236
+ if not trace:
2237
+ # Try to get trace from any active CognateProxy instances
2238
+ try:
2239
+ import gc
2240
+ for obj in gc.get_objects():
2241
+ # Look for CognateProxy instances with captured traces
2242
+ if hasattr(obj, '_last_run_trace') and obj._last_run_trace:
2243
+ trace = obj._last_run_trace
2244
+ print(f"[DASEIN][TRACE] Retrieved trace from CognateProxy: {len(trace)} steps")
2245
+ break
2246
+ # Fallback: try callback handler
2247
+ elif hasattr(obj, '_callback_handler') and hasattr(obj._callback_handler, 'get_trace'):
2248
+ potential_trace = obj._callback_handler.get_trace()
2249
+ if potential_trace:
2250
+ trace = potential_trace
2251
+ print(f"[DASEIN][TRACE] Retrieved trace from callback handler: {len(trace)} steps")
2252
+ break
2253
+ except Exception as e:
2254
+ pass
2255
+
2256
+ if not trace:
2257
+ print("No trace data available.")
2258
+ return
2259
+
2260
+ # Print execution state if available
2261
+ try:
2262
+ from dasein.api import _global_cognate_proxy
2263
+ if _global_cognate_proxy and hasattr(_global_cognate_proxy, '_callback_handler'):
2264
+ handler = _global_cognate_proxy._callback_handler
2265
+ if hasattr(handler, '_function_calls_made') and handler._function_calls_made:
2266
+ print("\n" + "=" * 80)
2267
+ print("EXECUTION STATE (Functions Called During Run):")
2268
+ print("=" * 80)
2269
+ for func_name in sorted(handler._function_calls_made.keys()):
2270
+ calls = handler._function_calls_made[func_name]
2271
+ count = len(calls)
2272
+ print(f" • {func_name}: called {count}x")
2273
+ # Hybrid window: show all if ≤5, else show most recent 3 (matches injection logic)
2274
+ if count <= 5:
2275
+ # Show all calls
2276
+ for call in calls:
2277
+ preview = call.get('preview', '(no preview)')
2278
+ if len(preview) > 80:
2279
+ preview = preview[:80] + '...'
2280
+ print(f" [step {call['step']}] {preview}")
2281
+ else:
2282
+ # Show recent 3
2283
+ print(f" ... (showing most recent 3 of {count}):")
2284
+ for call in calls[-3:]:
2285
+ preview = call.get('preview', '(no preview)')
2286
+ if len(preview) > 80:
2287
+ preview = preview[:80] + '...'
2288
+ print(f" [step {call['step']}] {preview}")
2289
+ print("=" * 80 + "\n")
2290
+ except Exception as e:
2291
+ pass # Silently skip if state not available
2292
+
2293
+ # Filter by step_type if only is provided
2294
+ filtered_trace = trace
2295
+ if only:
2296
+ filtered_trace = [step for step in trace if step.get("step_type") in only]
2297
+
2298
+ # Suppress any step_type in suppress tuple
2299
+ if suppress:
2300
+ filtered_trace = [step for step in filtered_trace if step.get("step_type") not in suppress]
2301
+
2302
+ if not filtered_trace:
2303
+ print("No trace data matching filter criteria.")
2304
+ return
2305
+
2306
+ # Build depth map from parent_run_id
2307
+ depth_map = {}
2308
+ for step in filtered_trace:
2309
+ run_id = step.get("run_id")
2310
+ parent_run_id = step.get("parent_run_id")
2311
+
2312
+ if run_id is None or parent_run_id is None or parent_run_id not in depth_map:
2313
+ depth_map[run_id] = 0
2314
+ else:
2315
+ depth_map[run_id] = depth_map[parent_run_id] + 1
2316
+
2317
+ # Calculate column widths based on max_chars
2318
+ # Reserve space for: # (3), step_type (15), tool_name (25), separators (6)
2319
+ available_width = max_chars - 3 - 15 - 25 - 6
2320
+ excerpt_width = available_width // 2
2321
+ outcome_width = available_width - excerpt_width
2322
+
2323
+ # Print header
2324
+ print(f"{'#':<3} {'step_type':<15} {'tool_name':<25} {'args_excerpt':<{excerpt_width}} {'outcome':<{outcome_width}}")
2325
+ print("-" * max_chars)
2326
+
2327
+ # Print each step
2328
+ for i, step in enumerate(filtered_trace, 1):
2329
+ step_type = step.get("step_type", "")[:15]
2330
+ tool_name = str(step.get("tool_name", ""))[:25]
2331
+ args_excerpt = step.get("args_excerpt", "")
2332
+ outcome = step.get("outcome", "")
2333
+
2334
+ # Apply tree indentation if show_tree is True
2335
+ if show_tree:
2336
+ run_id = step.get("run_id")
2337
+ depth = depth_map.get(run_id, 0)
2338
+ args_excerpt = " " * depth + args_excerpt
2339
+
2340
+ # Truncate to fit column widths
2341
+ args_excerpt = args_excerpt[:excerpt_width]
2342
+ outcome = outcome[:outcome_width]
2343
+
2344
+ print(f"{i:<3} {step_type:<15} {tool_name:<25} {args_excerpt:<{excerpt_width}} {outcome:<{outcome_width}}")
2345
+
2346
+ # Show summary if requested
2347
+ if show_summary:
2348
+ print("\n" + "=" * max_chars)
2349
+
2350
+ # Count steps by step_type
2351
+ step_counts = {}
2352
+ for step in filtered_trace:
2353
+ step_type = step.get("step_type", "unknown")
2354
+ step_counts[step_type] = step_counts.get(step_type, 0) + 1
2355
+
2356
+ print("Step counts:")
2357
+ for step_type, count in sorted(step_counts.items()):
2358
+ print(f" {step_type}: {count}")
2359
+
2360
+ # Add compact function call summary
2361
+ try:
2362
+ from dasein.api import _global_cognate_proxy
2363
+ if _global_cognate_proxy and hasattr(_global_cognate_proxy, '_callback_handler'):
2364
+ handler = _global_cognate_proxy._callback_handler
2365
+ if hasattr(handler, '_function_calls_made') and handler._function_calls_made:
2366
+ print("\nFunction calls:")
2367
+ for func_name in sorted(handler._function_calls_made.keys()):
2368
+ count = len(handler._function_calls_made[func_name])
2369
+ print(f" {func_name}: {count}")
2370
+ except Exception:
2371
+ pass
2372
+
2373
+ # Count deduped rows skipped (steps that were filtered out)
2374
+ total_steps = len(trace)
2375
+ shown_steps = len(filtered_trace)
2376
+ skipped_steps = total_steps - shown_steps
2377
+
2378
+ if skipped_steps > 0:
2379
+ print(f"Deduped rows skipped: {skipped_steps}")