dasein-core 0.2.7__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dasein/api.py +1144 -133
  2. dasein/capture.py +2325 -1803
  3. dasein/microturn.py +475 -0
  4. dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
  5. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
  6. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
  7. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
  8. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
  9. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
  10. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
  11. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
  12. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
  13. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
  14. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
  15. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
  16. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
  17. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
  18. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
  19. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
  20. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
  21. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
  22. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
  23. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
  24. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
  25. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
  26. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
  27. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
  28. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
  29. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
  30. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
  31. dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
  32. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
  33. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
  34. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
  35. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
  36. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
  37. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
  38. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
  39. dasein/pipecleaner.py +1917 -0
  40. dasein/wrappers.py +315 -0
  41. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/METADATA +4 -1
  42. dasein_core-0.2.10.dist-info/RECORD +59 -0
  43. dasein_core-0.2.7.dist-info/RECORD +0 -21
  44. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/WHEEL +0 -0
  45. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/licenses/LICENSE +0 -0
  46. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/top_level.txt +0 -0
dasein/api.py CHANGED
@@ -18,6 +18,15 @@ from .services import ServiceAdapter
18
18
  from .config import W_COST
19
19
 
20
20
 
21
+ # ============================================================================
22
+ # CONFIGURATION
23
+ # ============================================================================
24
+
25
+ # Microturn enforcement configuration
26
+ USE_LLM_MICROTURN = False # If True, use LLM to judge which calls to allow
27
+ # If False, use deterministic duplicate detection only
28
+ # (Keep False - LLM microturn adds latency, use only for semantic rules)
29
+
21
30
  # ============================================================================
22
31
  # VERBOSE LOGGING HELPER
23
32
  # ============================================================================
@@ -51,6 +60,7 @@ class DaseinLLMWrapper(BaseChatModel):
51
60
 
52
61
  def _generate(self, messages, stop=None, run_manager=None, **kwargs):
53
62
  """Generate response and capture trace."""
63
+ print(f"[DASEIN][WRAPPER] _generate called with {len(messages)} messages")
54
64
  self._vprint(f"[DASEIN][TRACE] LLM wrapper _generate called with {len(messages)} messages")
55
65
 
56
66
  # Get model name dynamically
@@ -163,6 +173,87 @@ class DaseinLLMWrapper(BaseChatModel):
163
173
  self._vprint(f"[DASEIN][TRACE] LLM result: {result_text[:100]}...")
164
174
  self._vprint(f"[DASEIN][METRICS] Tokens: {step['tokens_input']}->{output_tokens} | Time: {duration_ms}ms | Success: {'OK' if success else 'FAIL'}")
165
175
 
176
+ # 🚨 MICROTURN ENFORCEMENT - DISABLED (can interfere with tool execution)
177
+ if False: # Disabled
178
+ try:
179
+ proposed_func_name = None
180
+ print(f"[DASEIN][MICROTURN_DEBUG] Checking result for function call...")
181
+ if hasattr(result, 'generations') and result.generations:
182
+ first_gen = result.generations[0]
183
+ if isinstance(first_gen, list) and len(first_gen) > 0:
184
+ generation = first_gen[0]
185
+ else:
186
+ generation = first_gen
187
+
188
+ print(f"[DASEIN][MICROTURN_DEBUG] generation type: {type(generation)}")
189
+ if hasattr(generation, 'message') and hasattr(generation.message, 'additional_kwargs'):
190
+ func_call = generation.message.additional_kwargs.get('function_call')
191
+ print(f"[DASEIN][MICROTURN_DEBUG] func_call: {func_call}")
192
+ if func_call and isinstance(func_call, dict) and 'name' in func_call:
193
+ proposed_func_name = func_call['name']
194
+ else:
195
+ print(f"[DASEIN][MICROTURN_DEBUG] No generations in result")
196
+
197
+ if not proposed_func_name:
198
+ print(f"[DASEIN][MICROTURN_DEBUG] No function call in response, skipping microturn")
199
+ else:
200
+ print(f"[DASEIN][MICROTURN_DEBUG] Found proposed function: {proposed_func_name}")
201
+
202
+ # Build execution state (BEFORE adding current call)
203
+ state_lines = []
204
+ if hasattr(self._callback_handler, '_function_calls_made') and self._callback_handler._function_calls_made:
205
+ for fname in sorted(self._callback_handler._function_calls_made.keys()):
206
+ count = len(self._callback_handler._function_calls_made[fname])
207
+ if count > 0:
208
+ state_lines.append(f" • {fname}: called {count}x")
209
+
210
+ state_context = "EXECUTION STATE:\n" + "\n".join(state_lines) if state_lines else "EXECUTION STATE: No calls yet"
211
+
212
+ microturn_prompt = f"""You are a rule enforcement system. Your job is to decide if a proposed action violates the rules.
213
+
214
+ HARD RULE: You MUST make at maximum a single summary call
215
+
216
+ {state_context}
217
+
218
+ PROPOSED ACTION: Call {proposed_func_name}
219
+
220
+ DECISION:
221
+ If this action violates the rule, respond with EXACTLY: BLOCK
222
+ If this action is allowed, respond with EXACTLY: PASS
223
+
224
+ Your response (BLOCK or PASS):"""
225
+
226
+ print(f"[DASEIN][MICROTURN_DEBUG] Calling microturn LLM...")
227
+ from langchain_core.messages import HumanMessage
228
+ messages_for_microturn = [HumanMessage(content=microturn_prompt)]
229
+ microturn_response = self._llm.invoke(messages_for_microturn)
230
+
231
+ if hasattr(microturn_response, 'content'):
232
+ decision = microturn_response.content.strip().upper()
233
+ else:
234
+ decision = str(microturn_response).strip().upper()
235
+
236
+ node_name = getattr(self._callback_handler, '_current_chain_node', 'agent')
237
+ print(f"[DASEIN][MICROTURN] Node: {node_name} | Proposed: {proposed_func_name} | Decision: {decision}")
238
+
239
+ if "BLOCK" in decision:
240
+ print(f"[DASEIN][MICROTURN] BLOCKING {proposed_func_name} call!")
241
+ # Modify the result to clear the function call
242
+ if hasattr(result, 'generations') and result.generations:
243
+ first_gen = result.generations[0]
244
+ if isinstance(first_gen, list) and len(first_gen) > 0:
245
+ generation = first_gen[0]
246
+ else:
247
+ generation = first_gen
248
+
249
+ if hasattr(generation, 'message'):
250
+ generation.message.additional_kwargs['function_call'] = {}
251
+ generation.message.content = ""
252
+ except Exception as e:
253
+ print(f"[DASEIN][MICROTURN] Error in microturn: {e}")
254
+ import traceback
255
+ traceback.print_exc()
256
+
166
257
  # Trigger on_llm_end callback
167
258
  if self._callback_handler:
168
259
  self._callback_handler.on_llm_end(
@@ -217,6 +308,15 @@ class DaseinLLMWrapper(BaseChatModel):
217
308
  except:
218
309
  return "No result"
219
310
 
311
+ def invoke(self, messages, **kwargs):
312
+ """Override invoke to intercept all LLM calls."""
313
+ print(f"[DASEIN][WRAPPER] invoke() called with {len(messages) if isinstance(messages, list) else 1} messages")
314
+
315
+ # Call the parent's invoke which will call our _generate
316
+ result = super().invoke(messages, **kwargs)
317
+
318
+ return result
319
+
220
320
  def _llm_type(self):
221
321
  return "dasein_llm_wrapper"
222
322
 
@@ -419,6 +519,14 @@ def cognate(agent, *, weights=None, verbose=False, retry=1, performance_tracking
419
519
  Returns:
420
520
  A proxy object with .run() and .invoke() methods
421
521
  """
522
+ # CRITICAL: Prevent double-wrapping in Jupyter/Colab when cell is rerun
523
+ # If agent is already a CognateProxy, unwrap it first to avoid nested retry loops
524
+ if isinstance(agent, CognateProxy):
525
+ print("[DASEIN][WARNING] Agent is already wrapped with cognate(). Unwrapping to prevent nested loops.")
526
+ print(f"[DASEIN][WARNING] Previous config: retry={agent._retry}, performance_tracking={agent._performance_tracking}")
527
+ print(f"[DASEIN][WARNING] New config: retry={retry}, performance_tracking={performance_tracking}")
528
+ agent = agent._agent # Unwrap to get original agent
529
+
422
530
  global _global_cognate_proxy
423
531
  _global_cognate_proxy = CognateProxy(agent, weights=weights, verbose=verbose, retry=retry, performance_tracking=performance_tracking, rule_trace=rule_trace, post_run=post_run, performance_tracking_id=performance_tracking_id, top_k=top_k)
424
532
  return _global_cognate_proxy
@@ -728,7 +836,7 @@ class CognateProxy:
728
836
  print(f"[DASEIN] Coordinator node: {coordinator_node}")
729
837
  planning_nodes = self._identify_planning_nodes(agent, coordinator_node)
730
838
 
731
- self._callback_handler = DaseinCallbackHandler(weights=weights, llm=None, is_langgraph=self._is_langgraph, coordinator_node=coordinator_node, planning_nodes=planning_nodes, verbose=verbose)
839
+ self._callback_handler = DaseinCallbackHandler(weights=weights, llm=None, is_langgraph=self._is_langgraph, coordinator_node=coordinator_node, planning_nodes=planning_nodes, verbose=verbose, agent=self._agent, extract_tools_fn=self._extract_tool_metadata)
732
840
  self._langgraph_params = None
733
841
  self._original_agent = agent # Keep reference to original
734
842
  self._agent_was_recreated = False # Track if agent recreation succeeded
@@ -775,10 +883,24 @@ class CognateProxy:
775
883
 
776
884
  # Wrap the agent's LLM with our trace capture wrapper
777
885
  self._wrap_agent_llm()
886
+
887
+ # Inject universal dead-letter tool
888
+ self._inject_deadletter_tool()
778
889
 
779
890
  def _vprint(self, message: str, force: bool = False):
780
891
  """Helper for verbose printing."""
781
892
  _vprint(message, self._verbose, force)
893
+
894
+ def _format_final_outcome(self, outcome):
895
+ """Format final outcome for display."""
896
+ if outcome == "completed":
897
+ return "✅ Task Completed"
898
+ elif outcome == "gave_up":
899
+ return "⚠️ Agent Gave Up"
900
+ elif outcome == "failed":
901
+ return "❌ Failed"
902
+ else:
903
+ return f"❓ {outcome}"
782
904
 
783
905
  def _extract_query_from_input(self, input_data):
784
906
  """ CRITICAL: Extract query string from various input formats."""
@@ -965,6 +1087,172 @@ class CognateProxy:
965
1087
  self._vprint(f"[DASEIN][PLANNING_NODES] ERROR: {e}")
966
1088
  return set()
967
1089
 
1090
+ def _extract_tool_metadata(self, agent):
1091
+ """
1092
+ Extract tool metadata (name, description, args_schema) from agent.
1093
+
1094
+ CRITICAL: Extracts ALL available tools from the agent, not just tools used in trace.
1095
+ Why: If agent used wrong tool (e.g., extract_text instead of get_elements),
1096
+ the trace won't show the correct tool. Stage 3.5 needs to see all options
1097
+ to suggest better alternatives.
1098
+
1099
+ For multi-agent systems, preserves node→tool mapping so Stage 3.5 knows
1100
+ which tools are available in which nodes (critical for grounding).
1101
+ """
1102
+ tools_metadata = []
1103
+ tools_to_process = [] # Format: (tool, node_name or None)
1104
+
1105
+ # Get ALL tools from agent (LangChain or LangGraph) - not filtered by trace usage
1106
+ tools_attr = getattr(agent, 'tools', None)
1107
+ if tools_attr:
1108
+ try:
1109
+ # Top-level tools have no node context
1110
+ tools_to_process = [(t, None) for t in list(tools_attr)]
1111
+ except Exception:
1112
+ pass
1113
+ elif getattr(agent, 'toolkit', None):
1114
+ tk = getattr(agent, 'toolkit')
1115
+ tk_tools = getattr(tk, 'tools', None) or getattr(tk, 'get_tools', None)
1116
+ try:
1117
+ # Toolkit tools have no node context
1118
+ tools_to_process = [(t, None) for t in list(tk_tools() if callable(tk_tools) else tk_tools or [])]
1119
+ except Exception:
1120
+ pass
1121
+
1122
+ # Also try LangGraph tools from compiled graph
1123
+ # For multi-agent systems, scan ALL nodes for tools (not just 'tools' node)
1124
+ # CRITICAL: Preserve node→tool mapping for proper grounding
1125
+ # CRITICAL: Use agent.get_graph().nodes (same as planning node discovery)
1126
+ # NOT agent.nodes which returns different objects without .data attribute
1127
+ if hasattr(agent, 'get_graph'):
1128
+ graph = agent.get_graph()
1129
+ nodes = graph.nodes
1130
+ for node_name, node_obj in nodes.items():
1131
+ if node_name.startswith('__'): # Skip __start__, __end__
1132
+ continue
1133
+
1134
+ # Check if this is a subgraph with child nodes (like research_supervisor)
1135
+ # CRITICAL: Use node_obj.data (compiled graph) not node_obj.node (implementation)
1136
+ if hasattr(node_obj, 'data') and hasattr(node_obj.data, 'nodes') and 'Compiled' in type(node_obj.data).__name__:
1137
+ try:
1138
+ subgraph = node_obj.data.get_graph()
1139
+ for sub_node_name, sub_node_obj in subgraph.nodes.items():
1140
+ if sub_node_name.startswith('__'):
1141
+ continue
1142
+ if hasattr(sub_node_obj, 'node'):
1143
+ sub_actual = sub_node_obj.node
1144
+ # Use fully qualified node name: parent.child
1145
+ full_node_name = f"{node_name}.{sub_node_name}"
1146
+
1147
+ # Check all tool patterns in subgraph children
1148
+ if hasattr(sub_actual, 'tools_by_name'):
1149
+ tools_to_process.extend([(t, full_node_name) for t in sub_actual.tools_by_name.values()])
1150
+ if hasattr(sub_actual, 'runnable') and hasattr(sub_actual.runnable, 'tools'):
1151
+ sub_tools = sub_actual.runnable.tools
1152
+ if callable(sub_tools):
1153
+ try:
1154
+ sub_tools = sub_tools()
1155
+ except:
1156
+ pass
1157
+ if isinstance(sub_tools, list):
1158
+ tools_to_process.extend([(t, full_node_name) for t in sub_tools])
1159
+ print(f" [DASEIN][EXTRACT] Found {len(sub_tools)} tools in {full_node_name}.runnable.tools")
1160
+ else:
1161
+ tools_to_process.append((sub_tools, full_node_name))
1162
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {full_node_name}.runnable.tools")
1163
+ except Exception as e:
1164
+ print(f" [DASEIN][EXTRACT] Failed to analyze subgraph: {e}")
1165
+
1166
+ # Check if node has steps with tools
1167
+ if hasattr(node_obj, 'node'):
1168
+ actual_node = node_obj.node
1169
+
1170
+ # Check for tools_by_name (common in agent nodes)
1171
+ if hasattr(actual_node, 'tools_by_name'):
1172
+ node_tools = actual_node.tools_by_name.values()
1173
+ tools_to_process.extend([(t, node_name) for t in node_tools])
1174
+ print(f" [DASEIN][EXTRACT] Found {len(node_tools)} tools in {node_name}.tools_by_name")
1175
+
1176
+ # Check for runnable.tools (dynamic tools like ConductResearch)
1177
+ if hasattr(actual_node, 'runnable') and hasattr(actual_node.runnable, 'tools'):
1178
+ runnable_tools = actual_node.runnable.tools
1179
+ if callable(runnable_tools):
1180
+ try:
1181
+ runnable_tools = runnable_tools()
1182
+ except:
1183
+ pass
1184
+ if isinstance(runnable_tools, list):
1185
+ tools_to_process.extend([(t, node_name) for t in runnable_tools])
1186
+ print(f" [DASEIN][EXTRACT] Found {len(runnable_tools)} tools in {node_name}.runnable.tools")
1187
+ else:
1188
+ tools_to_process.append((runnable_tools, node_name))
1189
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.runnable.tools")
1190
+
1191
+ # Check for bound.tools (another common pattern)
1192
+ if hasattr(actual_node, 'bound') and hasattr(actual_node.bound, 'tools'):
1193
+ bound_tools = actual_node.bound.tools
1194
+ if isinstance(bound_tools, list):
1195
+ tools_to_process.extend([(t, node_name) for t in bound_tools])
1196
+ print(f" [DASEIN][EXTRACT] Found {len(bound_tools)} tools in {node_name}.bound.tools")
1197
+ else:
1198
+ tools_to_process.append((bound_tools, node_name))
1199
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.bound.tools")
1200
+
1201
+ # Check for steps (legacy pattern)
1202
+ if hasattr(actual_node, 'steps'):
1203
+ for step in actual_node.steps:
1204
+ if hasattr(step, 'tools_by_name'):
1205
+ step_tools = step.tools_by_name.values()
1206
+ tools_to_process.extend([(t, node_name) for t in step_tools])
1207
+ print(f" [DASEIN][EXTRACT] Found {len(step_tools)} tools in {node_name}.steps")
1208
+ break
1209
+
1210
+ # Extract metadata from each tool (with node context for multi-agent)
1211
+ for tool_tuple in tools_to_process:
1212
+ try:
1213
+ # Unpack (tool, node_name)
1214
+ if isinstance(tool_tuple, tuple) and len(tool_tuple) == 2:
1215
+ tool, node_name = tool_tuple
1216
+ else:
1217
+ tool = tool_tuple
1218
+ node_name = None
1219
+
1220
+ # Unwrap DaseinToolWrapper to get complete metadata (especially args_schema)
1221
+ if hasattr(tool, 'original_tool'):
1222
+ tool = tool.original_tool
1223
+
1224
+ tool_meta = {
1225
+ 'name': getattr(tool, 'name', str(tool.__class__.__name__)),
1226
+ 'description': getattr(tool, 'description', ''),
1227
+ }
1228
+
1229
+ # CRITICAL: Add node context for multi-agent systems (for grounding)
1230
+ if node_name:
1231
+ tool_meta['node'] = node_name
1232
+
1233
+ # Extract args_schema if available
1234
+ if hasattr(tool, 'args_schema') and tool.args_schema:
1235
+ try:
1236
+ # Try Pydantic v2 method
1237
+ if hasattr(tool.args_schema, 'model_json_schema'):
1238
+ tool_meta['args_schema'] = tool.args_schema.model_json_schema()
1239
+ # Fallback to Pydantic v1 method
1240
+ elif hasattr(tool.args_schema, 'schema'):
1241
+ tool_meta['args_schema'] = tool.args_schema.schema()
1242
+ else:
1243
+ tool_meta['args_schema'] = {}
1244
+ except Exception:
1245
+ tool_meta['args_schema'] = {}
1246
+ else:
1247
+ tool_meta['args_schema'] = {}
1248
+
1249
+ tools_metadata.append(tool_meta)
1250
+ except Exception as e:
1251
+ # Skip tools that fail to extract
1252
+ pass
1253
+
1254
+ return tools_metadata
1255
+
968
1256
  def _extract_langgraph_params(self, agent):
969
1257
  """ CRITICAL: Extract LangGraph agent creation parameters for recreation."""
970
1258
  try:
@@ -979,24 +1267,131 @@ class CognateProxy:
979
1267
  return None
980
1268
 
981
1269
  # Try to extract tools from the compiled graph
982
- if hasattr(agent, 'nodes') and 'tools' in agent.nodes:
983
- tools_node = agent.nodes['tools']
984
- if hasattr(tools_node, 'node') and hasattr(tools_node.node, 'steps'):
985
- for step in tools_node.node.steps:
986
- if hasattr(step, 'tools_by_name'):
987
- # Extract original tools (before our wrapping)
988
- tools = []
989
- for tool_name, tool in step.tools_by_name.items():
1270
+ # CRITICAL: For multi-agent, scan ALL nodes (not just 'tools' node)
1271
+ tools = []
1272
+ # CRITICAL: Use agent.get_graph().nodes (same as planning node discovery)
1273
+ # NOT agent.nodes which returns different objects without .data attribute
1274
+ if hasattr(agent, 'get_graph'):
1275
+ graph = agent.get_graph()
1276
+ nodes = graph.nodes
1277
+ print(f" [DASEIN][EXTRACT] Scanning {len(nodes)} LangGraph nodes for tools...")
1278
+ for node_name, node_obj in nodes.items():
1279
+ if node_name.startswith('__'): # Skip __start__, __end__
1280
+ continue
1281
+
1282
+ print(f" [DASEIN][EXTRACT] Checking node: {node_name}")
1283
+
1284
+ # Check if this is a subgraph with child nodes (like research_supervisor)
1285
+ # CRITICAL: Use node_obj.data (compiled graph) not node_obj.node (implementation)
1286
+ if hasattr(node_obj, 'data') and hasattr(node_obj.data, 'nodes') and 'Compiled' in type(node_obj.data).__name__:
1287
+ try:
1288
+ subgraph = node_obj.data.get_graph()
1289
+ print(f" [DASEIN][EXTRACT] {node_name} is a subgraph with {len(subgraph.nodes)} child nodes")
1290
+ for sub_node_name, sub_node_obj in subgraph.nodes.items():
1291
+ if sub_node_name.startswith('__'):
1292
+ continue
1293
+ print(f" [DASEIN][EXTRACT] Checking subgraph child: {sub_node_name}")
1294
+ if hasattr(sub_node_obj, 'node'):
1295
+ sub_actual = sub_node_obj.node
1296
+
1297
+ # Debug: print what attributes this node has
1298
+ attrs = [a for a in dir(sub_actual) if not a.startswith('_')]
1299
+ print(f" [DASEIN][EXTRACT] Node attributes: {', '.join(attrs[:10])}...")
1300
+
1301
+ # Check all tool patterns in subgraph children
1302
+ if hasattr(sub_actual, 'tools_by_name'):
1303
+ for tool_name, tool in sub_actual.tools_by_name.items():
1304
+ if hasattr(tool, 'original_tool'):
1305
+ tools.append(tool.original_tool)
1306
+ else:
1307
+ tools.append(tool)
1308
+ print(f" [DASEIN][EXTRACT] Found {len(sub_actual.tools_by_name)} tools in {node_name}.{sub_node_name}.tools_by_name")
1309
+ if hasattr(sub_actual, 'runnable') and hasattr(sub_actual.runnable, 'tools'):
1310
+ sub_tools = sub_actual.runnable.tools
1311
+ if callable(sub_tools):
1312
+ try:
1313
+ sub_tools = sub_tools()
1314
+ except:
1315
+ pass
1316
+ if isinstance(sub_tools, list):
1317
+ tools.extend(sub_tools)
1318
+ print(f" [DASEIN][EXTRACT] Found {len(sub_tools)} tools in {node_name}.{sub_node_name}.runnable.tools")
1319
+ else:
1320
+ tools.append(sub_tools)
1321
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.{sub_node_name}.runnable.tools")
1322
+
1323
+ # Also check if sub_actual IS a callable with tools (another pattern)
1324
+ if callable(sub_actual) and hasattr(sub_actual, 'tools'):
1325
+ direct_tools = sub_actual.tools
1326
+ if callable(direct_tools):
1327
+ try:
1328
+ direct_tools = direct_tools()
1329
+ except:
1330
+ pass
1331
+ if isinstance(direct_tools, list):
1332
+ tools.extend(direct_tools)
1333
+ print(f" [DASEIN][EXTRACT] Found {len(direct_tools)} tools in {node_name}.{sub_node_name} (direct)")
1334
+ elif direct_tools:
1335
+ tools.append(direct_tools)
1336
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.{sub_node_name} (direct)")
1337
+ except Exception as e:
1338
+ print(f" [DASEIN][EXTRACT] Failed to analyze subgraph: {e}")
1339
+
1340
+ # Check if node has tools
1341
+ if hasattr(node_obj, 'node'):
1342
+ actual_node = node_obj.node
1343
+
1344
+ # Check for tools_by_name (common in agent nodes)
1345
+ if hasattr(actual_node, 'tools_by_name'):
1346
+ for tool_name, tool in actual_node.tools_by_name.items():
990
1347
  # If it's our wrapped tool, get the original
991
1348
  if hasattr(tool, 'original_tool'):
992
1349
  tools.append(tool.original_tool)
993
1350
  else:
994
1351
  tools.append(tool)
995
- params['tools'] = tools
996
- print(f" [DASEIN][EXTRACT] Found {len(tools)} tools")
997
- break
998
-
999
- if 'tools' not in params:
1352
+ print(f" [DASEIN][EXTRACT] Found {len(actual_node.tools_by_name)} tools in {node_name}.tools_by_name")
1353
+
1354
+ # Check for runnable.tools (dynamic tools like ConductResearch)
1355
+ if hasattr(actual_node, 'runnable') and hasattr(actual_node.runnable, 'tools'):
1356
+ runnable_tools = actual_node.runnable.tools
1357
+ if callable(runnable_tools):
1358
+ try:
1359
+ runnable_tools = runnable_tools()
1360
+ except:
1361
+ pass
1362
+ if isinstance(runnable_tools, list):
1363
+ tools.extend(runnable_tools)
1364
+ print(f" [DASEIN][EXTRACT] Found {len(runnable_tools)} tools in {node_name}.runnable.tools")
1365
+ else:
1366
+ tools.append(runnable_tools)
1367
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.runnable.tools")
1368
+
1369
+ # Check for bound.tools (another common pattern)
1370
+ if hasattr(actual_node, 'bound') and hasattr(actual_node.bound, 'tools'):
1371
+ bound_tools = actual_node.bound.tools
1372
+ if isinstance(bound_tools, list):
1373
+ tools.extend(bound_tools)
1374
+ print(f" [DASEIN][EXTRACT] Found {len(bound_tools)} tools in {node_name}.bound.tools")
1375
+ else:
1376
+ tools.append(bound_tools)
1377
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.bound.tools")
1378
+
1379
+ # Check for steps (legacy pattern)
1380
+ if hasattr(actual_node, 'steps'):
1381
+ for step in actual_node.steps:
1382
+ if hasattr(step, 'tools_by_name'):
1383
+ for tool_name, tool in step.tools_by_name.items():
1384
+ if hasattr(tool, 'original_tool'):
1385
+ tools.append(tool.original_tool)
1386
+ else:
1387
+ tools.append(tool)
1388
+ print(f" [DASEIN][EXTRACT] Found {len(step.tools_by_name)} tools in {node_name}.steps")
1389
+ break
1390
+
1391
+ if tools:
1392
+ params['tools'] = tools
1393
+ print(f" [DASEIN][EXTRACT] Total: {len(tools)} tools extracted")
1394
+ else:
1000
1395
  print(f" [DASEIN][EXTRACT] No tools found in agent")
1001
1396
  return None
1002
1397
 
@@ -1189,23 +1584,663 @@ Follow these rules when planning your actions."""
1189
1584
  traceback.print_exc()
1190
1585
  return False
1191
1586
 
1587
+ @staticmethod
1588
+ def _create_deadletter_tool():
1589
+ """Create the universal dead-letter tool for blocked calls.
1590
+
1591
+ This tool acts as a sink for calls blocked by anti-fanout rules.
1592
+ It returns instantly with structured metadata, allowing nodes to complete normally.
1593
+ """
1594
+ def dasein_deadletter(
1595
+ original_tool: str,
1596
+ original_args_fingerprint: str,
1597
+ reason_code: str,
1598
+ policy_trace_id: str,
1599
+ tokens_saved_estimate: int = 0,
1600
+ cached_result: Any = None
1601
+ ) -> Any:
1602
+ """Universal dead-letter tool for blocked policy calls.
1603
+
1604
+ **INTERNAL USE ONLY - DO NOT CALL DIRECTLY**
1605
+
1606
+ This tool is automatically invoked when Dasein blocks a call for policy reasons
1607
+ (e.g., anti-fanout rules). Supports transparent deduplication by returning
1608
+ cached results from previous identical calls.
1609
+
1610
+ Args:
1611
+ original_tool: Name of the tool that was blocked
1612
+ original_args_fingerprint: Hash/summary of original arguments
1613
+ reason_code: Why the call was blocked (e.g., "duplicate_detected")
1614
+ policy_trace_id: Trace ID for the rule that caused the block
1615
+ tokens_saved_estimate: Estimated tokens saved by blocking this call
1616
+ cached_result: If provided, return this (transparent deduplication)
1617
+
1618
+ Returns:
1619
+ Either cached_result (transparent) or structured error dict (explicit block)
1620
+ """
1621
+ import time
1622
+
1623
+ if cached_result is not None:
1624
+ # Transparent deduplication - return the original result seamlessly
1625
+ print(f"[DASEIN][DEADLETTER] 🔄 Transparent dedup: {original_tool} (returning cached result, {tokens_saved_estimate} tokens saved)")
1626
+ return cached_result
1627
+ else:
1628
+ # Explicit block - return error structure
1629
+ result = {
1630
+ "blocked_by_policy": True,
1631
+ "original_tool": original_tool,
1632
+ "original_args_fingerprint": original_args_fingerprint,
1633
+ "reason_code": reason_code,
1634
+ "policy_trace_id": policy_trace_id,
1635
+ "tokens_saved_estimate": tokens_saved_estimate,
1636
+ "timestamp": time.time(),
1637
+ "message": f"Call to {original_tool} was blocked by Dasein policy: {reason_code}"
1638
+ }
1639
+ print(f"[DASEIN][DEADLETTER] 🚫 Blocked {original_tool}: {reason_code} (est. {tokens_saved_estimate} tokens saved)")
1640
+ return result
1641
+
1642
+ return dasein_deadletter
1643
+
1644
+ def _inject_deadletter_tool(self):
1645
+ """Inject the dead-letter tool into the agent's tool registry.
1646
+
1647
+ The tool is added to the executor but hidden from the LLM's view by marking it internal.
1648
+ """
1649
+ try:
1650
+ deadletter_fn = self._create_deadletter_tool()
1651
+
1652
+ # Convert to LangChain Tool
1653
+ from langchain.tools import Tool
1654
+ deadletter_tool = Tool(
1655
+ name="dasein_deadletter",
1656
+ description="**INTERNAL USE ONLY - DO NOT CALL DIRECTLY**\nThis tool is automatically invoked when Dasein blocks a call for policy reasons.",
1657
+ func=deadletter_fn
1658
+ )
1659
+
1660
+ # For LangGraph agents: Add to tools list in langgraph_params
1661
+ if self._is_langgraph and self._langgraph_params and 'tools' in self._langgraph_params:
1662
+ self._langgraph_params['tools'].append(deadletter_tool)
1663
+ print(f"[DASEIN][DEADLETTER] Injected dead-letter tool into LangGraph params")
1664
+
1665
+ # For LangChain agents: Add to agent's tools attribute if accessible
1666
+ elif hasattr(self._agent, 'tools'):
1667
+ if isinstance(self._agent.tools, list):
1668
+ self._agent.tools.append(deadletter_tool)
1669
+ print(f"[DASEIN][DEADLETTER] Injected dead-letter tool into LangChain agent")
1670
+
1671
+ # Store reference for later use
1672
+ self._deadletter_tool = deadletter_tool
1673
+ self._deadletter_fn = deadletter_fn
1674
+
1675
+ except Exception as e:
1676
+ print(f"[DASEIN][DEADLETTER] Failed to inject dead-letter tool: {e}")
1677
+ import traceback
1678
+ traceback.print_exc()
1679
+ self._deadletter_tool = None
1680
+ self._deadletter_fn = None
1681
+
1192
1682
  def _wrap_agent_llm(self):
1193
- """Dynamically find and wrap any LLM in the agent structure."""
1683
+ """Monkey-patch ALL LLM classes found in agent + tools."""
1194
1684
  try:
1195
- # Recursively search for any LLM-like object in the agent structure
1196
- llm = self._find_llm_recursively(self._agent, max_depth=5)
1197
- if llm:
1198
- wrapped_llm = DaseinLLMWrapper(llm, self._callback_handler)
1199
- # Replace the original LLM with our wrapped version
1200
- self._replace_llm_in_structure(self._agent, llm, wrapped_llm, max_depth=5)
1201
- self._wrapped_llm = wrapped_llm
1202
- self._vprint(f"[DASEIN][WRAPPER] Successfully wrapped {type(llm).__name__} LLM")
1203
- return
1685
+ # Find ALL LLMs in agent structure + tools
1686
+ print(f"[DASEIN][WRAPPER] Searching for ALL LLMs in agent+tools...")
1687
+ all_llms = []
1204
1688
 
1205
- self._vprint(f"[DASEIN][WRAPPER] Could not find any LLM in agent structure")
1206
- self._wrapped_llm = None
1689
+ # 1. Search in agent
1690
+ agent_llm = self._find_llm_recursively(self._agent, max_depth=5)
1691
+ if agent_llm:
1692
+ all_llms.append(('agent', agent_llm))
1693
+
1694
+ # 2. Search in tools (where Summary LLM lives!)
1695
+ if hasattr(self._agent, 'tools'):
1696
+ for i, tool in enumerate(self._agent.tools or []):
1697
+ tool_llm = self._find_llm_recursively(tool, max_depth=3, path=f"tools[{i}]")
1698
+ if tool_llm:
1699
+ all_llms.append((f'tool_{i}_{getattr(tool, "name", "unknown")}', tool_llm))
1700
+
1701
+ print(f"[DASEIN][WRAPPER] Found {len(all_llms)} LLM(s)")
1702
+ for location, llm in all_llms:
1703
+ print(f"[DASEIN][WRAPPER] - {location}: {type(llm).__name__}")
1704
+
1705
+ # Patch all unique LLM classes
1706
+ patched_classes = set()
1707
+ for location, llm in all_llms:
1708
+ llm_class = type(llm)
1709
+ if llm_class in patched_classes:
1710
+ print(f"[DASEIN][WRAPPER] {llm_class.__name__} already patched for {location}, skipping")
1711
+ continue
1712
+
1713
+ print(f"[DASEIN][WRAPPER] Patching {llm_class.__name__} (found in {location})...")
1714
+
1715
+ # Check what methods the LLM class has
1716
+ # Only patch TOP-LEVEL methods to avoid double-deduplication from internal calls
1717
+ print(f"[DASEIN][WRAPPER] Checking LLM methods...")
1718
+ methods_to_patch = []
1719
+ for method in ['invoke', 'ainvoke']: # Only patch user-facing methods, not internal _generate
1720
+ if hasattr(llm_class, method):
1721
+ print(f"[DASEIN][WRAPPER] - Has {method}")
1722
+ methods_to_patch.append(method)
1723
+
1724
+ if not methods_to_patch:
1725
+ print(f"[DASEIN][WRAPPER] No methods to patch found!")
1726
+ return
1727
+
1728
+ # Check if we already patched this class
1729
+ first_method = getattr(llm_class, methods_to_patch[0])
1730
+ if hasattr(first_method, '_dasein_patched'):
1731
+ print(f"[DASEIN][WRAPPER] {llm_class.__name__} already patched, skipping")
1732
+ return
1733
+
1734
+ callback_handler = self._callback_handler
1735
+
1736
+ # Thread-local to track depth and max depth reached
1737
+ import threading
1738
+ _patch_depth = threading.local()
1739
+
1740
+ def get_max_depth():
1741
+ return getattr(_patch_depth, 'max_depth', 0)
1742
+
1743
+ def set_max_depth(val):
1744
+ _patch_depth.max_depth = val
1745
+
1746
+ def is_in_microturn():
1747
+ return getattr(_patch_depth, 'in_microturn', False)
1748
+
1749
+ def set_in_microturn(val):
1750
+ _patch_depth.in_microturn = val
1751
+
1752
+ # Thread-local state tracking for Summary calls (mirrors callback pattern)
1753
+ def get_summary_calls_made():
1754
+ """Get count of Summary calls made in this run."""
1755
+ return getattr(_patch_depth, 'summary_calls_made', 0)
1756
+
1757
+ def increment_summary_calls():
1758
+ """Increment Summary call counter."""
1759
+ current = getattr(_patch_depth, 'summary_calls_made', 0)
1760
+ _patch_depth.summary_calls_made = current + 1
1761
+ return _patch_depth.summary_calls_made
1762
+
1763
+ # Patch ALL methods (silent)
1764
+ for method_name in methods_to_patch:
1765
+ original_method = getattr(llm_class, method_name)
1766
+ is_async = 'a' in method_name and (method_name.startswith('a') or method_name.startswith('_a'))
1767
+
1768
+ # Use a factory function to properly capture the closure variables
1769
+ def make_patched_method(orig_method, meth_name, is_async_method, depth_tracker, max_depth_getter, max_depth_setter, in_microturn_getter, in_microturn_setter, get_summary_calls, increment_summary):
1770
+ if is_async_method:
1771
+ async def patched_method(self_llm, *args, **kwargs):
1772
+ # Track depth to find the leaf method
1773
+ depth = getattr(depth_tracker, 'value', 0)
1774
+ is_entry_point = (depth == 0)
1775
+ depth_tracker.value = depth + 1
1776
+ current_depth = depth_tracker.value
1777
+
1778
+ # Track max depth reached (silent)
1779
+ if is_entry_point:
1780
+ max_depth_setter(current_depth)
1781
+ else:
1782
+ if current_depth > max_depth_getter():
1783
+ max_depth_setter(current_depth)
1784
+
1785
+ # 🔥 PIPECLEANER DEDUPLICATION (only patching top-level methods, always apply)
1786
+ # Skip depth checks - they don't work with async/parallel execution
1787
+ if callback_handler:
1788
+ try:
1789
+ # Extract messages from args based on method signature
1790
+ messages_to_dedupe = None
1791
+ arg_index = 0
1792
+
1793
+ if meth_name in ['invoke', 'ainvoke']:
1794
+ # First arg is 'input' (can be string, list, or PromptValue)
1795
+ messages_to_dedupe = args[0] if args else kwargs.get('input', kwargs.get('messages'))
1796
+ arg_index = 0
1797
+ elif meth_name in ['_generate', '_agenerate']:
1798
+ # First arg is 'messages' (list of BaseMessage)
1799
+ messages_to_dedupe = args[0] if args else kwargs.get('messages')
1800
+ arg_index = 0
1801
+ elif meth_name in ['generate', 'agenerate']:
1802
+ # First arg is 'prompts' (list of message lists)
1803
+ messages_to_dedupe = args[0] if args else kwargs.get('prompts')
1804
+ arg_index = 0
1805
+
1806
+ # Convert to strings for deduplication
1807
+ if messages_to_dedupe:
1808
+ prompt_strings = []
1809
+ for msg in (messages_to_dedupe if isinstance(messages_to_dedupe, list) else [messages_to_dedupe]):
1810
+ if hasattr(msg, 'content'):
1811
+ prompt_strings.append(msg.content)
1812
+ elif isinstance(msg, str):
1813
+ prompt_strings.append(msg)
1814
+ else:
1815
+ prompt_strings.append(str(msg))
1816
+
1817
+ # =============================================================
1818
+ # HOTPATH DEBUGGING (commented out for production)
1819
+ # =============================================================
1820
+ # print(f"\n{'='*70}")
1821
+ # print(f"[🔥 HOTPATH FULL DEBUG] {meth_name}() call")
1822
+ # print(f"{'='*70}")
1823
+ #
1824
+ # # 1. Callback state
1825
+ # current_node = getattr(callback_handler, '_current_chain_node', None)
1826
+ # current_tool = getattr(callback_handler, '_current_tool_name', None)
1827
+ # print(f"[🔥] Current node: {current_node}")
1828
+ # print(f"[🔥] Current tool: {current_tool}")
1829
+ #
1830
+ # # 2. Tools in this call
1831
+ # tools_in_call = []
1832
+ # if 'invocation_params' in kwargs:
1833
+ # tools = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions') or []
1834
+ # tools_in_call = [t.get('name', t.get('function', {}).get('name', '?')) for t in tools]
1835
+ # elif 'tools' in kwargs:
1836
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('tools', [])]
1837
+ # elif 'functions' in kwargs:
1838
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('functions', [])]
1839
+ # print(f"[🔥] Tools in call: {tools_in_call if tools_in_call else 'NONE'}")
1840
+ #
1841
+ # # 3. Prompt characteristics
1842
+ # prompt_lens = [len(s) for s in prompt_strings]
1843
+ # print(f"[🔥] Prompt count: {len(prompt_strings)}")
1844
+ # print(f"[🔥] Prompt lengths: {prompt_lens}")
1845
+ #
1846
+ # # 4. Kwargs keys (for debugging)
1847
+ # print(f"[🔥] Kwargs keys: {list(kwargs.keys())}")
1848
+ #
1849
+ # # 5. Messages structure
1850
+ # if messages_to_dedupe:
1851
+ # if isinstance(messages_to_dedupe, list):
1852
+ # msg_types = [type(m).__name__ for m in messages_to_dedupe[:3]]
1853
+ # print(f"[🔥] Message types (first 3): {msg_types}")
1854
+ # else:
1855
+ # print(f"[🔥] Messages type: {type(messages_to_dedupe).__name__}")
1856
+ #
1857
+ # print(f"{'='*70}\n")
1858
+ #
1859
+ # # Show first 200 chars to see the fingerprint
1860
+ # if prompt_strings:
1861
+ # first_200 = prompt_strings[0][:200] if len(prompt_strings[0]) > 200 else prompt_strings[0]
1862
+ # print(f"[🔥] Prompt start (200 chars): {first_200}")
1863
+
1864
+ # =============================================================
1865
+ # Extract tools from LLM call kwargs (for filter_search rules)
1866
+ # =============================================================
1867
+ tools_in_this_call = []
1868
+
1869
+ # Extract tool names from kwargs (handles multiple LLM providers' formats)
1870
+ # Pattern 1: invocation_params (some providers)
1871
+ if 'invocation_params' in kwargs:
1872
+ inv_params = kwargs['invocation_params']
1873
+ tools_param = inv_params.get('tools') or inv_params.get('functions') or []
1874
+ for t in tools_param:
1875
+ if isinstance(t, dict):
1876
+ # Try: t['name'] or t['function']['name']
1877
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
1878
+ if name:
1879
+ tools_in_this_call.append(name)
1880
+ # Pattern 2: Direct 'tools' key (common)
1881
+ elif 'tools' in kwargs:
1882
+ tools_param = kwargs.get('tools', [])
1883
+ for t in tools_param:
1884
+ if isinstance(t, dict):
1885
+ # Try: t['name'] or t['function']['name']
1886
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
1887
+ if name:
1888
+ tools_in_this_call.append(name)
1889
+ # Pattern 3: 'functions' key (OpenAI function calling)
1890
+ elif 'functions' in kwargs:
1891
+ funcs_param = kwargs.get('functions', [])
1892
+ for t in funcs_param:
1893
+ if isinstance(t, dict):
1894
+ name = t.get('name')
1895
+ if name:
1896
+ tools_in_this_call.append(name)
1897
+
1898
+ # Check if any filter_search rules match the tools in this LLM call
1899
+ from .pipecleaner import _find_filter_search_rules
1900
+ filter_rules = None
1901
+ should_dedupe = False
1902
+
1903
+ if hasattr(callback_handler, '_selected_rules') and prompt_strings and tools_in_this_call:
1904
+ # Get all filter_search rules (they specify which tools to target via references.tools)
1905
+ filter_rules = _find_filter_search_rules('*', callback_handler._selected_rules)
1906
+
1907
+ # Check if any tool in this call matches rule's target tools
1908
+ if filter_rules:
1909
+ for rule in filter_rules:
1910
+ # Handle both dict and object formats
1911
+ if isinstance(rule, dict):
1912
+ references = rule.get('references', {})
1913
+ rule_tools = references.get('tools', []) if isinstance(references, dict) else []
1914
+ else:
1915
+ references = getattr(rule, 'references', None)
1916
+ # references might be a dict or object, handle both
1917
+ if isinstance(references, dict):
1918
+ rule_tools = references.get('tools', [])
1919
+ elif references:
1920
+ rule_tools = getattr(references, 'tools', [])
1921
+ else:
1922
+ rule_tools = []
1923
+
1924
+ for tool_in_call in tools_in_this_call:
1925
+ if tool_in_call.lower() in [rt.lower() for rt in rule_tools]:
1926
+ should_dedupe = True
1927
+ break
1928
+ if should_dedupe:
1929
+ break
1930
+
1931
+ if should_dedupe:
1932
+ # Deduplicate each prompt
1933
+ from .pipecleaner import get_or_create_corpus
1934
+ import hashlib
1935
+ corpus = get_or_create_corpus(callback_handler.run_id, verbose=callback_handler._verbose)
1936
+
1937
+ deduplicated_strings = []
1938
+ for i, prompt_str in enumerate(prompt_strings):
1939
+ if len(prompt_str) < 2500:
1940
+ deduplicated_strings.append(prompt_str)
1941
+ continue
1942
+
1943
+ # Split system/content like in callback
1944
+ system_part = prompt_str[:2000]
1945
+ content_part = prompt_str[2000:]
1946
+ prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
1947
+
1948
+ # Deduplicate (ASYNC - allows parallel Summary calls to batch together)
1949
+ deduplicated_content = await corpus.enqueue_prompt(prompt_id, content_part)
1950
+ deduplicated_str = system_part + deduplicated_content
1951
+ deduplicated_strings.append(deduplicated_str)
1952
+
1953
+ # Convert back to original format
1954
+ if isinstance(messages_to_dedupe, list):
1955
+ for i, msg in enumerate(messages_to_dedupe):
1956
+ if i < len(deduplicated_strings) and hasattr(msg, 'content'):
1957
+ msg.content = deduplicated_strings[i]
1958
+ elif isinstance(messages_to_dedupe, str):
1959
+ messages_to_dedupe = deduplicated_strings[0] if deduplicated_strings else messages_to_dedupe
1960
+
1961
+ # Replace in args/kwargs
1962
+ if args and arg_index < len(args):
1963
+ args = list(args)
1964
+ args[arg_index] = messages_to_dedupe
1965
+ args = tuple(args)
1966
+ elif 'input' in kwargs:
1967
+ kwargs['input'] = messages_to_dedupe
1968
+ elif 'messages' in kwargs:
1969
+ kwargs['messages'] = messages_to_dedupe
1970
+ elif 'prompts' in kwargs:
1971
+ kwargs['prompts'] = messages_to_dedupe
1972
+ except Exception as e:
1973
+ print(f"[🔥 HOTPATH] ⚠️ Deduplication error: {e}")
1974
+ import traceback
1975
+ traceback.print_exc()
1976
+
1977
+ try:
1978
+ result = await orig_method(self_llm, *args, **kwargs)
1979
+
1980
+ # 🚨 MICROTURN ENFORCEMENT - DISABLED
1981
+ # Microturn can interfere with tool execution, so it's disabled
1982
+ # TODO: Re-enable with proper gating if needed for specific use cases
1983
+
1984
+ return result
1985
+ finally:
1986
+ depth_tracker.value = depth # Restore depth on exit
1987
+ # Clear processed tool calls set when returning to entry point (prevents memory leak)
1988
+ if depth == 0:
1989
+ if hasattr(_patch_depth, 'processed_tool_calls'):
1990
+ _patch_depth.processed_tool_calls.clear()
1991
+ if hasattr(_patch_depth, 'seen_tool_signatures'):
1992
+ _patch_depth.seen_tool_signatures.clear()
1993
+ if hasattr(_patch_depth, 'tool_result_cache'):
1994
+ _patch_depth.tool_result_cache.clear()
1995
+ else:
1996
+ def patched_method(self_llm, *args, **kwargs):
1997
+ # Track depth to find the leaf method
1998
+ depth = getattr(depth_tracker, 'value', 0)
1999
+ is_entry_point = (depth == 0)
2000
+ depth_tracker.value = depth + 1
2001
+ current_depth = depth_tracker.value
2002
+
2003
+ # Track max depth reached
2004
+ if is_entry_point:
2005
+ max_depth_setter(current_depth) # Reset for new entry
2006
+ else:
2007
+ # Update max if we went deeper
2008
+ if current_depth > max_depth_getter():
2009
+ max_depth_setter(current_depth)
2010
+
2011
+ # 🔥 PIPECLEANER DEDUPLICATION (only patching top-level methods, always apply)
2012
+ # Skip depth checks - they don't work with async/parallel execution
2013
+ if callback_handler:
2014
+ try:
2015
+ # Extract messages from args based on method signature
2016
+ messages_to_dedupe = None
2017
+ arg_index = 0
2018
+
2019
+ if meth_name in ['invoke', 'ainvoke']:
2020
+ messages_to_dedupe = args[0] if args else kwargs.get('input', kwargs.get('messages'))
2021
+ arg_index = 0
2022
+ elif meth_name in ['_generate', '_agenerate']:
2023
+ messages_to_dedupe = args[0] if args else kwargs.get('messages')
2024
+ arg_index = 0
2025
+ elif meth_name in ['generate', 'agenerate']:
2026
+ messages_to_dedupe = args[0] if args else kwargs.get('prompts')
2027
+ arg_index = 0
2028
+
2029
+ # Convert to strings for deduplication
2030
+ if messages_to_dedupe:
2031
+ prompt_strings = []
2032
+ for msg in (messages_to_dedupe if isinstance(messages_to_dedupe, list) else [messages_to_dedupe]):
2033
+ if hasattr(msg, 'content'):
2034
+ prompt_strings.append(msg.content)
2035
+ elif isinstance(msg, str):
2036
+ prompt_strings.append(msg)
2037
+ else:
2038
+ prompt_strings.append(str(msg))
2039
+
2040
+ # =============================================================
2041
+ # HOTPATH DEBUGGING (commented out for production)
2042
+ # =============================================================
2043
+ # print(f"\n{'='*70}")
2044
+ # print(f"[🔥 HOTPATH FULL DEBUG] {meth_name}() call")
2045
+ # print(f"{'='*70}")
2046
+ #
2047
+ # # 1. Callback state
2048
+ # current_node = getattr(callback_handler, '_current_chain_node', None)
2049
+ # current_tool = getattr(callback_handler, '_current_tool_name', None)
2050
+ # print(f"[🔥] Current node: {current_node}")
2051
+ # print(f"[🔥] Current tool: {current_tool}")
2052
+ #
2053
+ # # 2. Tools in this call
2054
+ # tools_in_call = []
2055
+ # if 'invocation_params' in kwargs:
2056
+ # tools = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions') or []
2057
+ # tools_in_call = [t.get('name', t.get('function', {}).get('name', '?')) for t in tools]
2058
+ # elif 'tools' in kwargs:
2059
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('tools', [])]
2060
+ # elif 'functions' in kwargs:
2061
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('functions', [])]
2062
+ # print(f"[🔥] Tools in call: {tools_in_call if tools_in_call else 'NONE'}")
2063
+ #
2064
+ # # 3. Prompt characteristics
2065
+ # prompt_lens = [len(s) for s in prompt_strings]
2066
+ # print(f"[🔥] Prompt count: {len(prompt_strings)}")
2067
+ # print(f"[🔥] Prompt lengths: {prompt_lens}")
2068
+ #
2069
+ # # 4. Kwargs keys (for debugging)
2070
+ # print(f"[🔥] Kwargs keys: {list(kwargs.keys())}")
2071
+ #
2072
+ # # 5. Messages structure
2073
+ # if messages_to_dedupe:
2074
+ # if isinstance(messages_to_dedupe, list):
2075
+ # msg_types = [type(m).__name__ for m in messages_to_dedupe[:3]]
2076
+ # print(f"[🔥] Message types (first 3): {msg_types}")
2077
+ # else:
2078
+ # print(f"[🔥] Messages type: {type(messages_to_dedupe).__name__}")
2079
+ #
2080
+ # print(f"{'='*70}\n")
2081
+ #
2082
+ # # Show first 200 chars to see the fingerprint
2083
+ # if prompt_strings:
2084
+ # first_200 = prompt_strings[0][:200] if len(prompt_strings[0]) > 200 else prompt_strings[0]
2085
+ # print(f"[🔥] Prompt start (200 chars): {first_200}")
2086
+
2087
+ # =============================================================
2088
+ # Extract tools from LLM call kwargs (for filter_search rules)
2089
+ # =============================================================
2090
+ tools_in_this_call = []
2091
+
2092
+ # Extract tool names from kwargs (handles multiple LLM providers' formats)
2093
+ # Pattern 1: invocation_params (some providers)
2094
+ if 'invocation_params' in kwargs:
2095
+ inv_params = kwargs['invocation_params']
2096
+ tools_param = inv_params.get('tools') or inv_params.get('functions') or []
2097
+ for t in tools_param:
2098
+ if isinstance(t, dict):
2099
+ # Try: t['name'] or t['function']['name']
2100
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
2101
+ if name:
2102
+ tools_in_this_call.append(name)
2103
+ # Pattern 2: Direct 'tools' key (common)
2104
+ elif 'tools' in kwargs:
2105
+ tools_param = kwargs.get('tools', [])
2106
+ for t in tools_param:
2107
+ if isinstance(t, dict):
2108
+ # Try: t['name'] or t['function']['name']
2109
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
2110
+ if name:
2111
+ tools_in_this_call.append(name)
2112
+ # Pattern 3: 'functions' key (OpenAI function calling)
2113
+ elif 'functions' in kwargs:
2114
+ funcs_param = kwargs.get('functions', [])
2115
+ for t in funcs_param:
2116
+ if isinstance(t, dict):
2117
+ name = t.get('name')
2118
+ if name:
2119
+ tools_in_this_call.append(name)
2120
+
2121
+ # Check if any filter_search rules match the tools in this LLM call
2122
+ from .pipecleaner import _find_filter_search_rules
2123
+ filter_rules = None
2124
+ should_dedupe = False
2125
+
2126
+ if hasattr(callback_handler, '_selected_rules') and prompt_strings and tools_in_this_call:
2127
+ # Get all filter_search rules (they specify which tools to target via references.tools)
2128
+ filter_rules = _find_filter_search_rules('*', callback_handler._selected_rules)
2129
+
2130
+ # Check if any tool in this call matches rule's target tools
2131
+ if filter_rules:
2132
+ for rule in filter_rules:
2133
+ # Handle both dict and object formats
2134
+ if isinstance(rule, dict):
2135
+ references = rule.get('references', {})
2136
+ rule_tools = references.get('tools', []) if isinstance(references, dict) else []
2137
+ else:
2138
+ references = getattr(rule, 'references', None)
2139
+ # references might be a dict or object, handle both
2140
+ if isinstance(references, dict):
2141
+ rule_tools = references.get('tools', [])
2142
+ elif references:
2143
+ rule_tools = getattr(references, 'tools', [])
2144
+ else:
2145
+ rule_tools = []
2146
+
2147
+ for tool_in_call in tools_in_this_call:
2148
+ if tool_in_call.lower() in [rt.lower() for rt in rule_tools]:
2149
+ should_dedupe = True
2150
+ break
2151
+ if should_dedupe:
2152
+ break
2153
+
2154
+ if should_dedupe:
2155
+ # Deduplicate each prompt
2156
+ from .pipecleaner import get_or_create_corpus
2157
+ import hashlib
2158
+ corpus = get_or_create_corpus(callback_handler.run_id, verbose=callback_handler._verbose)
2159
+
2160
+ deduplicated_strings = []
2161
+ for i, prompt_str in enumerate(prompt_strings):
2162
+ if len(prompt_str) < 2500:
2163
+ deduplicated_strings.append(prompt_str)
2164
+ continue
2165
+
2166
+ # Split system/content like in callback
2167
+ system_part = prompt_str[:2000]
2168
+ content_part = prompt_str[2000:]
2169
+ prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
2170
+
2171
+ # Deduplicate (wrap async in sync context)
2172
+ import asyncio
2173
+ try:
2174
+ loop = asyncio.get_event_loop()
2175
+ except RuntimeError:
2176
+ loop = asyncio.new_event_loop()
2177
+ asyncio.set_event_loop(loop)
2178
+
2179
+ deduplicated_content = loop.run_until_complete(corpus.enqueue_prompt(prompt_id, content_part))
2180
+ deduplicated_str = system_part + deduplicated_content
2181
+ deduplicated_strings.append(deduplicated_str)
2182
+
2183
+ # Convert back to original format
2184
+ if isinstance(messages_to_dedupe, list):
2185
+ for i, msg in enumerate(messages_to_dedupe):
2186
+ if i < len(deduplicated_strings) and hasattr(msg, 'content'):
2187
+ msg.content = deduplicated_strings[i]
2188
+ elif isinstance(messages_to_dedupe, str):
2189
+ messages_to_dedupe = deduplicated_strings[0] if deduplicated_strings else messages_to_dedupe
2190
+
2191
+ # Replace in args/kwargs
2192
+ if args and arg_index < len(args):
2193
+ args = list(args)
2194
+ args[arg_index] = messages_to_dedupe
2195
+ args = tuple(args)
2196
+ elif 'input' in kwargs:
2197
+ kwargs['input'] = messages_to_dedupe
2198
+ elif 'messages' in kwargs:
2199
+ kwargs['messages'] = messages_to_dedupe
2200
+ elif 'prompts' in kwargs:
2201
+ kwargs['prompts'] = messages_to_dedupe
2202
+ except Exception as e:
2203
+ print(f"[🔥 HOTPATH] ⚠️ Deduplication error: {e}")
2204
+ import traceback
2205
+ traceback.print_exc()
2206
+
2207
+ try:
2208
+ result = orig_method(self_llm, *args, **kwargs)
2209
+
2210
+ # 🚨 MICROTURN ENFORCEMENT - DISABLED (can interfere with tool execution)
2211
+ # TODO: Re-enable with proper gating if needed
2212
+
2213
+ return result
2214
+ finally:
2215
+ depth_tracker.value = depth # Restore depth on exit
2216
+ # Clear processed tool calls set when returning to entry point (prevents memory leak)
2217
+ if depth == 0:
2218
+ if hasattr(_patch_depth, 'processed_tool_calls'):
2219
+ _patch_depth.processed_tool_calls.clear()
2220
+ if hasattr(_patch_depth, 'seen_tool_signatures'):
2221
+ _patch_depth.seen_tool_signatures.clear()
2222
+ if hasattr(_patch_depth, 'tool_result_cache'):
2223
+ _patch_depth.tool_result_cache.clear()
2224
+ return patched_method
2225
+
2226
+ patched_method = make_patched_method(original_method, method_name, is_async, _patch_depth, get_max_depth, set_max_depth, is_in_microturn, set_in_microturn, get_summary_calls_made, increment_summary_calls)
2227
+
2228
+ # Mark and apply the patch
2229
+ patched_method._dasein_patched = True
2230
+ setattr(llm_class, method_name, patched_method)
2231
+ print(f"[DASEIN][WRAPPER] Patched {method_name}")
2232
+
2233
+ # Mark this class as patched
2234
+ patched_classes.add(llm_class)
2235
+ self._wrapped_llm = llm
2236
+ print(f"[DASEIN][WRAPPER] Successfully patched {len(methods_to_patch)} methods in {llm_class.__name__}")
2237
+
2238
+ print(f"[DASEIN][WRAPPER] Finished patching {len(patched_classes)} unique LLM class(es)")
2239
+ return
1207
2240
  except Exception as e:
1208
- self._vprint(f"[DASEIN][WRAPPER] Failed to wrap agent LLM: {e}")
2241
+ print(f"[DASEIN][WRAPPER] Failed to wrap agent LLM: {e}")
2242
+ import traceback
2243
+ traceback.print_exc()
1209
2244
  self._wrapped_llm = None
1210
2245
 
1211
2246
  def _set_callback_handler_llm(self):
@@ -1312,7 +2347,7 @@ Follow these rules when planning your actions."""
1312
2347
 
1313
2348
  return False
1314
2349
 
1315
- def _replace_llm_in_structure(self, obj, original_llm, wrapped_llm, max_depth=5, path=""):
2350
+ def _replace_llm_in_structure(self, obj, original_llm, wrapped_llm, max_depth=5, path="", count=[0]):
1316
2351
  """Replace the original LLM with wrapped LLM in the structure."""
1317
2352
  if max_depth <= 0:
1318
2353
  return
@@ -1321,16 +2356,16 @@ Follow these rules when planning your actions."""
1321
2356
  if hasattr(obj, 'steps') and hasattr(obj, '__iter__'):
1322
2357
  for i, step in enumerate(obj.steps):
1323
2358
  if step is original_llm:
1324
- self._vprint(f"[DASEIN][WRAPPER] Replacing LLM at {path}.steps[{i}]")
2359
+ count[0] += 1
2360
+ print(f"[DASEIN][WRAPPER] Replacing LLM #{count[0]} at {path}.steps[{i}]")
1325
2361
  obj.steps[i] = wrapped_llm
1326
- return
1327
2362
  # Check if step has bound attribute (RunnableBinding)
1328
2363
  if hasattr(step, 'bound') and step.bound is original_llm:
1329
- self._vprint(f"[DASEIN][WRAPPER] Replacing LLM at {path}.steps[{i}].bound")
2364
+ count[0] += 1
2365
+ print(f"[DASEIN][WRAPPER] Replacing LLM #{count[0]} at {path}.steps[{i}].bound")
1330
2366
  step.bound = wrapped_llm
1331
- return
1332
2367
  # Recursively search in the step
1333
- self._replace_llm_in_structure(step, original_llm, wrapped_llm, max_depth - 1, f"{path}.steps[{i}]")
2368
+ self._replace_llm_in_structure(step, original_llm, wrapped_llm, max_depth - 1, f"{path}.steps[{i}]", count)
1334
2369
 
1335
2370
  # Search in attributes
1336
2371
  for attr_name in dir(obj):
@@ -1339,8 +2374,12 @@ Follow these rules when planning your actions."""
1339
2374
  try:
1340
2375
  attr_value = getattr(obj, attr_name)
1341
2376
  if attr_value is original_llm:
1342
- self._vprint(f"[DASEIN][WRAPPER] Replacing LLM at {path}.{attr_name}")
2377
+ print(f"[DASEIN][WRAPPER] Replacing LLM at {path}.{attr_name}")
1343
2378
  setattr(obj, attr_name, wrapped_llm)
2379
+ # Verify replacement
2380
+ new_value = getattr(obj, attr_name)
2381
+ print(f"[DASEIN][WRAPPER] After replacement, {path}.{attr_name} is now: {type(new_value).__name__}")
2382
+ print(f"[DASEIN][WRAPPER] Is it our wrapper? {isinstance(new_value, DaseinLLMWrapper)}")
1344
2383
  return
1345
2384
  # Recursively search in the attribute
1346
2385
  if hasattr(attr_value, '__dict__') or hasattr(attr_value, '__iter__'):
@@ -1725,6 +2764,12 @@ Follow these rules when planning your actions."""
1725
2764
  # Run the agent
1726
2765
  result = self._agent.invoke(*args, **kwargs)
1727
2766
 
2767
+ # Print tools summary if available
2768
+ if hasattr(self._callback_handler, 'get_compiled_tools_summary'):
2769
+ summary = self._callback_handler.get_compiled_tools_summary()
2770
+ if summary:
2771
+ print(f"[DASEIN] {summary}")
2772
+
1728
2773
  # FIXED: Extract trace for display but never calculate KPIs locally
1729
2774
  # Service-first architecture: All KPI calculation done by distributed services
1730
2775
  self._vprint(f"[DASEIN][SERVICE_FIRST] Extracting trace for display - KPIs handled by post-run API service")
@@ -1749,6 +2794,11 @@ Follow these rules when planning your actions."""
1749
2794
  # Clear tool rules from system prompt after triggering async post-run
1750
2795
  self._clear_tool_rules_from_system()
1751
2796
 
2797
+ # Cleanup run-scoped corpus (print telemetry and free memory)
2798
+ if hasattr(self, '_callback_handler') and hasattr(self._callback_handler, 'run_id'):
2799
+ from .pipecleaner import cleanup_corpus
2800
+ cleanup_corpus(self._callback_handler.run_id)
2801
+
1752
2802
  return result
1753
2803
 
1754
2804
  async def _ainvoke_single(self, *args, **kwargs):
@@ -1796,6 +2846,12 @@ Follow these rules when planning your actions."""
1796
2846
  # Run the agent asynchronously
1797
2847
  result = await self._agent.ainvoke(*args, **kwargs)
1798
2848
 
2849
+ # Print tools summary if available
2850
+ if hasattr(self._callback_handler, 'get_compiled_tools_summary'):
2851
+ summary = self._callback_handler.get_compiled_tools_summary()
2852
+ if summary:
2853
+ print(f"[DASEIN] {summary}")
2854
+
1799
2855
  # FIXED: Extract trace for display but never calculate KPIs locally
1800
2856
  # Service-first architecture: All KPI calculation done by distributed services
1801
2857
  self._vprint(f"[DASEIN][SERVICE_FIRST] Extracting trace for display - KPIs handled by post-run API service")
@@ -1820,6 +2876,11 @@ Follow these rules when planning your actions."""
1820
2876
  # Clear tool rules from system prompt after triggering async post-run
1821
2877
  self._clear_tool_rules_from_system()
1822
2878
 
2879
+ # Cleanup run-scoped corpus (print telemetry and free memory)
2880
+ if hasattr(self, '_callback_handler') and hasattr(self._callback_handler, 'run_id'):
2881
+ from .pipecleaner import cleanup_corpus
2882
+ cleanup_corpus(self._callback_handler.run_id)
2883
+
1823
2884
  return result
1824
2885
 
1825
2886
  def _invoke_with_retry(self, *args, **kwargs):
@@ -2231,11 +3292,8 @@ Follow these rules when planning your actions."""
2231
3292
  print(f" Wall Time (ms): {metrics['wall_time_ms']}")
2232
3293
  print(f" Success Rate: {metrics['success_rate']:.1f}% ({metrics['total_turns']}/{metrics['total_turns']})")
2233
3294
  print(f" Overall Success: {'✅' if metrics['overall_success'] else '❌'}")
2234
- # Format final outcome using the wrapped LLM's method
2235
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
2236
- final_outcome_formatted = self._wrapped_llm._format_final_outcome(metrics.get('final_outcome', 'unknown'))
2237
- else:
2238
- final_outcome_formatted = f" {metrics.get('final_outcome', 'unknown')}"
3295
+ # Format final outcome
3296
+ final_outcome_formatted = self._format_final_outcome(metrics.get('final_outcome', 'unknown'))
2239
3297
  print(f" Final Outcome: {final_outcome_formatted}")
2240
3298
  print(f" Result: {str(metrics['result'])[:100]}...")
2241
3299
 
@@ -2317,12 +3375,8 @@ Follow these rules when planning your actions."""
2317
3375
  first_outcome = first_metrics.get('final_outcome', 'unknown')
2318
3376
  last_outcome = last_metrics.get('final_outcome', 'unknown')
2319
3377
  # Format final outcomes using the wrapped LLM's method
2320
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
2321
- first_formatted = self._wrapped_llm._format_final_outcome(first_outcome)
2322
- last_formatted = self._wrapped_llm._format_final_outcome(last_outcome)
2323
- else:
2324
- first_formatted = f"❓ {first_outcome}"
2325
- last_formatted = f"❓ {last_outcome}"
3378
+ first_formatted = self._format_final_outcome(first_outcome)
3379
+ last_formatted = self._format_final_outcome(last_outcome)
2326
3380
  outcome_improvement = f"{first_formatted} → {last_formatted}"
2327
3381
  print(f"🎯 Final Outcome: {outcome_improvement}")
2328
3382
 
@@ -2344,13 +3398,8 @@ Follow these rules when planning your actions."""
2344
3398
 
2345
3399
  # Prioritize final outcome improvement
2346
3400
  if outcome_improved:
2347
- # Format final outcomes using the wrapped LLM's method
2348
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
2349
- first_formatted = self._wrapped_llm._format_final_outcome(first_outcome)
2350
- last_formatted = self._wrapped_llm._format_final_outcome(last_outcome)
2351
- else:
2352
- first_formatted = f" {first_outcome}"
2353
- last_formatted = f" {last_outcome}"
3401
+ first_formatted = self._format_final_outcome(first_outcome)
3402
+ last_formatted = self._format_final_outcome(last_outcome)
2354
3403
  print(f"🎉 BREAKTHROUGH: Agent went from {first_formatted} to {last_formatted}!")
2355
3404
  elif turns_improved or tokens_improved or time_improved or success_improved:
2356
3405
  improvements = []
@@ -2625,6 +3674,38 @@ Follow these rules when planning your actions."""
2625
3674
 
2626
3675
  print(f"[DASEIN] Pre-run service returned {len(selected_rules)} rules")
2627
3676
 
3677
+ # Pre-load embedding model if we have filter_search rules (avoid timeout on first batch)
3678
+ if selected_rules:
3679
+ # Check for any llm_start rules with "filter search" keywords
3680
+ has_filter_search_rules = False
3681
+ for rule_meta in selected_rules:
3682
+ # Unwrap tuple if needed
3683
+ rule_obj = rule_meta[0] if isinstance(rule_meta, tuple) and len(rule_meta) == 2 else rule_meta
3684
+
3685
+ # Check if this is an llm_start rule with filter/search keywords
3686
+ # Handle both dict and object formats
3687
+ if isinstance(rule_obj, dict):
3688
+ target_step_type = rule_obj.get('target_step_type')
3689
+ advice = rule_obj.get('advice_text') or rule_obj.get('advice', '')
3690
+ else:
3691
+ target_step_type = getattr(rule_obj, 'target_step_type', None)
3692
+ advice = getattr(rule_obj, 'advice_text', None) or getattr(rule_obj, 'advice', None) or ''
3693
+
3694
+ advice_lower = advice.lower() if advice else ''
3695
+
3696
+ if target_step_type == 'llm_start' and 'filter' in advice_lower and 'search' in advice_lower:
3697
+ has_filter_search_rules = True
3698
+ break
3699
+
3700
+ if has_filter_search_rules:
3701
+ print(f"[DASEIN] 🔧 Pre-loading embedding model for pipecleaner (found filter search rules)...")
3702
+ from .pipecleaner import _get_embedding_model
3703
+ try:
3704
+ _get_embedding_model() # Warm up the model
3705
+ print(f"[DASEIN] ✅ Embedding model pre-loaded successfully")
3706
+ except Exception as e:
3707
+ print(f"[DASEIN] ⚠️ Failed to pre-load embedding model: {e}")
3708
+
2628
3709
  # CRITICAL: For LangGraph agents, recreate with injected prompt
2629
3710
  if self._is_langgraph and selected_rules:
2630
3711
  print(f" [DASEIN][PRERUN] LangGraph agent detected with {len(selected_rules)} rules")
@@ -2826,75 +3907,13 @@ Follow these rules when planning your actions."""
2826
3907
 
2827
3908
  agent_fingerprint = _minimal_agent_fingerprint(self._agent)
2828
3909
 
2829
- # Extract tool metadata for Stage 3.5 tool grounding
2830
- def _extract_tool_metadata(agent):
2831
- """
2832
- Extract tool metadata (name, description, args_schema) from agent.
2833
-
2834
- CRITICAL: Extracts ALL available tools from the agent, not just tools used in trace.
2835
- Why: If agent used wrong tool (e.g., extract_text instead of get_elements),
2836
- the trace won't show the correct tool. Stage 3.5 needs to see all options
2837
- to suggest better alternatives.
2838
- """
2839
- tools_metadata = []
2840
- tools_to_process = []
2841
-
2842
- # Get ALL tools from agent (LangChain or LangGraph) - not filtered by trace usage
2843
- tools_attr = getattr(agent, 'tools', None)
2844
- if tools_attr:
2845
- try:
2846
- tools_to_process = list(tools_attr)
2847
- except Exception:
2848
- pass
2849
- elif getattr(agent, 'toolkit', None):
2850
- tk = getattr(agent, 'toolkit')
2851
- tk_tools = getattr(tk, 'tools', None) or getattr(tk, 'get_tools', None)
2852
- try:
2853
- tools_to_process = list(tk_tools() if callable(tk_tools) else tk_tools or [])
2854
- except Exception:
2855
- pass
2856
-
2857
- # Also try LangGraph tools from compiled graph
2858
- if hasattr(agent, 'nodes') and 'tools' in agent.nodes:
2859
- tools_node = agent.nodes['tools']
2860
- if hasattr(tools_node, 'node') and hasattr(tools_node.node, 'steps'):
2861
- for step in tools_node.node.steps:
2862
- if hasattr(step, 'tools_by_name'):
2863
- tools_to_process.extend(step.tools_by_name.values())
2864
- break
2865
-
2866
- # Extract metadata from each tool
2867
- for tool in tools_to_process:
2868
- try:
2869
- tool_meta = {
2870
- 'name': getattr(tool, 'name', str(tool.__class__.__name__)),
2871
- 'description': getattr(tool, 'description', ''),
2872
- }
2873
-
2874
- # Extract args_schema if available
2875
- if hasattr(tool, 'args_schema') and tool.args_schema:
2876
- try:
2877
- # Try Pydantic v2 method
2878
- if hasattr(tool.args_schema, 'model_json_schema'):
2879
- tool_meta['args_schema'] = tool.args_schema.model_json_schema()
2880
- # Fallback to Pydantic v1 method
2881
- elif hasattr(tool.args_schema, 'schema'):
2882
- tool_meta['args_schema'] = tool.args_schema.schema()
2883
- else:
2884
- tool_meta['args_schema'] = {}
2885
- except Exception:
2886
- tool_meta['args_schema'] = {}
2887
- else:
2888
- tool_meta['args_schema'] = {}
2889
-
2890
- tools_metadata.append(tool_meta)
2891
- except Exception as e:
2892
- # Skip tools that fail to extract
2893
- pass
2894
-
2895
- return tools_metadata
2896
-
2897
- tools_metadata = _extract_tool_metadata(self._agent)
3910
+ # Get tool metadata from callback handler (extracted during runtime)
3911
+ tools_metadata = []
3912
+ if hasattr(self._callback_handler, '_compiled_tools_metadata'):
3913
+ tools_metadata = self._callback_handler._compiled_tools_metadata
3914
+ # Fallback: try extracting now (may not work if tools unbound)
3915
+ if not tools_metadata:
3916
+ tools_metadata = self._extract_tool_metadata(self._agent)
2898
3917
 
2899
3918
  # Reuse existing graph analysis (already extracted in __init__)
2900
3919
  graph_metadata = None
@@ -3512,11 +4531,8 @@ Follow these rules when planning your actions."""
3512
4531
 
3513
4532
  # Step-by-step comparison
3514
4533
  print(f"\n STEP-BY-STEP COMPARISON:")
3515
- # Format step 1 outcome using the wrapped LLM's method
3516
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
3517
- step1_outcome = self._wrapped_llm._format_final_outcome(learning_metrics[0].get('final_outcome', 'unknown'))
3518
- else:
3519
- step1_outcome = f" {learning_metrics[0].get('final_outcome', 'unknown')}"
4534
+ # Format step 1 outcome
4535
+ step1_outcome = self._format_final_outcome(learning_metrics[0].get('final_outcome', 'unknown'))
3520
4536
  print(f" Step 1: {learning_metrics[0]['total_tokens']} tokens, {learning_metrics[0]['total_turns']} turns, {learning_metrics[0]['trace_time_ms']}ms, {learning_metrics[0]['success_rate']:.1f}%, {step1_outcome} (naive baseline)")
3521
4537
 
3522
4538
  # Compare steps 2N (learning vs baseline)
@@ -3541,13 +4557,8 @@ Follow these rules when planning your actions."""
3541
4557
  failure_direction = "" if failure_change > 0 else "" if failure_change < 0 else "="
3542
4558
 
3543
4559
  # Get final outcomes for this step
3544
- # Format final outcomes using the wrapped LLM's method
3545
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
3546
- base_outcome = self._wrapped_llm._format_final_outcome(base.get('final_outcome', 'unknown'))
3547
- learn_outcome = self._wrapped_llm._format_final_outcome(learn.get('final_outcome', 'unknown'))
3548
- else:
3549
- base_outcome = f" {base.get('final_outcome', 'unknown')}"
3550
- learn_outcome = f" {learn.get('final_outcome', 'unknown')}"
4560
+ base_outcome = self._format_final_outcome(base.get('final_outcome', 'unknown'))
4561
+ learn_outcome = self._format_final_outcome(learn.get('final_outcome', 'unknown'))
3551
4562
 
3552
4563
  print(f" Step {i}: {base['total_tokens']} {learn['total_tokens']} tokens ({token_direction}{abs(token_improvement)}, {token_pct:+.1f}%)")
3553
4564
  print(f" {base['total_turns']} {learn['total_turns']} turns ({turn_direction}{abs(turn_improvement)}, {turn_pct:+.1f}%)")