dasein-core 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dasein/api.py +1202 -133
  2. dasein/capture.py +2379 -1803
  3. dasein/microturn.py +475 -0
  4. dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
  5. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
  6. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
  7. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
  8. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
  9. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
  10. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
  11. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
  12. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
  13. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
  14. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
  15. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
  16. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
  17. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
  18. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
  19. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
  20. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
  21. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
  22. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
  23. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
  24. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
  25. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
  26. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
  27. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
  28. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
  29. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
  30. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
  31. dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
  32. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
  33. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
  34. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
  35. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
  36. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
  37. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
  38. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
  39. dasein/pipecleaner.py +1917 -0
  40. dasein/wrappers.py +314 -0
  41. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/METADATA +4 -1
  42. dasein_core-0.2.9.dist-info/RECORD +59 -0
  43. dasein_core-0.2.7.dist-info/RECORD +0 -21
  44. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/WHEEL +0 -0
  45. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/licenses/LICENSE +0 -0
  46. {dasein_core-0.2.7.dist-info → dasein_core-0.2.9.dist-info}/top_level.txt +0 -0
dasein/api.py CHANGED
@@ -18,6 +18,15 @@ from .services import ServiceAdapter
18
18
  from .config import W_COST
19
19
 
20
20
 
21
+ # ============================================================================
22
+ # CONFIGURATION
23
+ # ============================================================================
24
+
25
+ # Microturn enforcement configuration
26
+ USE_LLM_MICROTURN = False # If True, use LLM to judge which calls to allow
27
+ # If False, use deterministic duplicate detection only
28
+ # (Keep False - LLM microturn adds latency, use only for semantic rules)
29
+
21
30
  # ============================================================================
22
31
  # VERBOSE LOGGING HELPER
23
32
  # ============================================================================
@@ -51,6 +60,7 @@ class DaseinLLMWrapper(BaseChatModel):
51
60
 
52
61
  def _generate(self, messages, stop=None, run_manager=None, **kwargs):
53
62
  """Generate response and capture trace."""
63
+ print(f"[DASEIN][WRAPPER] _generate called with {len(messages)} messages")
54
64
  self._vprint(f"[DASEIN][TRACE] LLM wrapper _generate called with {len(messages)} messages")
55
65
 
56
66
  # Get model name dynamically
@@ -163,6 +173,89 @@ class DaseinLLMWrapper(BaseChatModel):
163
173
  self._vprint(f"[DASEIN][TRACE] LLM result: {result_text[:100]}...")
164
174
  self._vprint(f"[DASEIN][METRICS] Tokens: {step['tokens_input']}->{output_tokens} | Time: {duration_ms}ms | Success: {'OK' if success else 'FAIL'}")
165
175
 
176
+ # 🚨 MICROTURN ENFORCEMENT - RUN 1 ONLY
177
+ run_number = getattr(self._callback_handler, '_run_number', 1) if self._callback_handler else 1
178
+ print(f"[DASEIN][MICROTURN_DEBUG] run_number={run_number}, callback_handler={self._callback_handler is not None}")
179
+ if run_number == 1 and self._callback_handler:
180
+ try:
181
+ proposed_func_name = None
182
+ print(f"[DASEIN][MICROTURN_DEBUG] Checking result for function call...")
183
+ if hasattr(result, 'generations') and result.generations:
184
+ first_gen = result.generations[0]
185
+ if isinstance(first_gen, list) and len(first_gen) > 0:
186
+ generation = first_gen[0]
187
+ else:
188
+ generation = first_gen
189
+
190
+ print(f"[DASEIN][MICROTURN_DEBUG] generation type: {type(generation)}")
191
+ if hasattr(generation, 'message') and hasattr(generation.message, 'additional_kwargs'):
192
+ func_call = generation.message.additional_kwargs.get('function_call')
193
+ print(f"[DASEIN][MICROTURN_DEBUG] func_call: {func_call}")
194
+ if func_call and isinstance(func_call, dict) and 'name' in func_call:
195
+ proposed_func_name = func_call['name']
196
+ else:
197
+ print(f"[DASEIN][MICROTURN_DEBUG] No generations in result")
198
+
199
+ if not proposed_func_name:
200
+ print(f"[DASEIN][MICROTURN_DEBUG] No function call in response, skipping microturn")
201
+ else:
202
+ print(f"[DASEIN][MICROTURN_DEBUG] Found proposed function: {proposed_func_name}")
203
+
204
+ # Build execution state (BEFORE adding current call)
205
+ state_lines = []
206
+ if hasattr(self._callback_handler, '_function_calls_made') and self._callback_handler._function_calls_made:
207
+ for fname in sorted(self._callback_handler._function_calls_made.keys()):
208
+ count = len(self._callback_handler._function_calls_made[fname])
209
+ if count > 0:
210
+ state_lines.append(f" • {fname}: called {count}x")
211
+
212
+ state_context = "EXECUTION STATE:\n" + "\n".join(state_lines) if state_lines else "EXECUTION STATE: No calls yet"
213
+
214
+ microturn_prompt = f"""You are a rule enforcement system. Your job is to decide if a proposed action violates the rules.
215
+
216
+ HARD RULE: You MUST make at maximum a single summary call
217
+
218
+ {state_context}
219
+
220
+ PROPOSED ACTION: Call {proposed_func_name}
221
+
222
+ DECISION:
223
+ If this action violates the rule, respond with EXACTLY: BLOCK
224
+ If this action is allowed, respond with EXACTLY: PASS
225
+
226
+ Your response (BLOCK or PASS):"""
227
+
228
+ print(f"[DASEIN][MICROTURN_DEBUG] Calling microturn LLM...")
229
+ from langchain_core.messages import HumanMessage
230
+ messages_for_microturn = [HumanMessage(content=microturn_prompt)]
231
+ microturn_response = self._llm.invoke(messages_for_microturn)
232
+
233
+ if hasattr(microturn_response, 'content'):
234
+ decision = microturn_response.content.strip().upper()
235
+ else:
236
+ decision = str(microturn_response).strip().upper()
237
+
238
+ node_name = getattr(self._callback_handler, '_current_chain_node', 'agent')
239
+ print(f"[DASEIN][MICROTURN] Node: {node_name} | Proposed: {proposed_func_name} | Decision: {decision}")
240
+
241
+ if "BLOCK" in decision:
242
+ print(f"[DASEIN][MICROTURN] BLOCKING {proposed_func_name} call!")
243
+ # Modify the result to clear the function call
244
+ if hasattr(result, 'generations') and result.generations:
245
+ first_gen = result.generations[0]
246
+ if isinstance(first_gen, list) and len(first_gen) > 0:
247
+ generation = first_gen[0]
248
+ else:
249
+ generation = first_gen
250
+
251
+ if hasattr(generation, 'message'):
252
+ generation.message.additional_kwargs['function_call'] = {}
253
+ generation.message.content = ""
254
+ except Exception as e:
255
+ print(f"[DASEIN][MICROTURN] Error in microturn: {e}")
256
+ import traceback
257
+ traceback.print_exc()
258
+
166
259
  # Trigger on_llm_end callback
167
260
  if self._callback_handler:
168
261
  self._callback_handler.on_llm_end(
@@ -217,6 +310,15 @@ class DaseinLLMWrapper(BaseChatModel):
217
310
  except:
218
311
  return "No result"
219
312
 
313
+ def invoke(self, messages, **kwargs):
314
+ """Override invoke to intercept all LLM calls."""
315
+ print(f"[DASEIN][WRAPPER] invoke() called with {len(messages) if isinstance(messages, list) else 1} messages")
316
+
317
+ # Call the parent's invoke which will call our _generate
318
+ result = super().invoke(messages, **kwargs)
319
+
320
+ return result
321
+
220
322
  def _llm_type(self):
221
323
  return "dasein_llm_wrapper"
222
324
 
@@ -419,6 +521,14 @@ def cognate(agent, *, weights=None, verbose=False, retry=1, performance_tracking
419
521
  Returns:
420
522
  A proxy object with .run() and .invoke() methods
421
523
  """
524
+ # CRITICAL: Prevent double-wrapping in Jupyter/Colab when cell is rerun
525
+ # If agent is already a CognateProxy, unwrap it first to avoid nested retry loops
526
+ if isinstance(agent, CognateProxy):
527
+ print("[DASEIN][WARNING] Agent is already wrapped with cognate(). Unwrapping to prevent nested loops.")
528
+ print(f"[DASEIN][WARNING] Previous config: retry={agent._retry}, performance_tracking={agent._performance_tracking}")
529
+ print(f"[DASEIN][WARNING] New config: retry={retry}, performance_tracking={performance_tracking}")
530
+ agent = agent._agent # Unwrap to get original agent
531
+
422
532
  global _global_cognate_proxy
423
533
  _global_cognate_proxy = CognateProxy(agent, weights=weights, verbose=verbose, retry=retry, performance_tracking=performance_tracking, rule_trace=rule_trace, post_run=post_run, performance_tracking_id=performance_tracking_id, top_k=top_k)
424
534
  return _global_cognate_proxy
@@ -728,7 +838,7 @@ class CognateProxy:
728
838
  print(f"[DASEIN] Coordinator node: {coordinator_node}")
729
839
  planning_nodes = self._identify_planning_nodes(agent, coordinator_node)
730
840
 
731
- self._callback_handler = DaseinCallbackHandler(weights=weights, llm=None, is_langgraph=self._is_langgraph, coordinator_node=coordinator_node, planning_nodes=planning_nodes, verbose=verbose)
841
+ self._callback_handler = DaseinCallbackHandler(weights=weights, llm=None, is_langgraph=self._is_langgraph, coordinator_node=coordinator_node, planning_nodes=planning_nodes, verbose=verbose, agent=self._agent, extract_tools_fn=self._extract_tool_metadata)
732
842
  self._langgraph_params = None
733
843
  self._original_agent = agent # Keep reference to original
734
844
  self._agent_was_recreated = False # Track if agent recreation succeeded
@@ -775,10 +885,43 @@ class CognateProxy:
775
885
 
776
886
  # Wrap the agent's LLM with our trace capture wrapper
777
887
  self._wrap_agent_llm()
888
+
889
+ # Wrap the agent's tools for pipecleaner deduplication
890
+ print(f"\n{'='*70}")
891
+ print(f"[DASEIN] Patching tool execution for pipecleaner...")
892
+ print(f"{'='*70}")
893
+ try:
894
+ from .wrappers import wrap_tools_for_pipecleaner
895
+ verbose = getattr(self._callback_handler, '_verbose', False)
896
+ success = wrap_tools_for_pipecleaner(self._agent, self._callback_handler, verbose=verbose)
897
+ if success:
898
+ print(f"[DASEIN] ✅ Tool execution patched successfully")
899
+ else:
900
+ print(f"[DASEIN] ⚠️ Tool execution patching failed")
901
+ print(f"{'='*70}\n")
902
+ except Exception as e:
903
+ print(f"[DASEIN] ❌ ERROR patching tool execution: {e}")
904
+ import traceback
905
+ traceback.print_exc()
906
+ print(f"{'='*70}\n")
907
+
908
+ # Inject universal dead-letter tool
909
+ self._inject_deadletter_tool()
778
910
 
779
911
  def _vprint(self, message: str, force: bool = False):
780
912
  """Helper for verbose printing."""
781
913
  _vprint(message, self._verbose, force)
914
+
915
+ def _format_final_outcome(self, outcome):
916
+ """Format final outcome for display."""
917
+ if outcome == "completed":
918
+ return "✅ Task Completed"
919
+ elif outcome == "gave_up":
920
+ return "⚠️ Agent Gave Up"
921
+ elif outcome == "failed":
922
+ return "❌ Failed"
923
+ else:
924
+ return f"❓ {outcome}"
782
925
 
783
926
  def _extract_query_from_input(self, input_data):
784
927
  """ CRITICAL: Extract query string from various input formats."""
@@ -965,6 +1108,168 @@ class CognateProxy:
965
1108
  self._vprint(f"[DASEIN][PLANNING_NODES] ERROR: {e}")
966
1109
  return set()
967
1110
 
1111
+ def _extract_tool_metadata(self, agent):
1112
+ """
1113
+ Extract tool metadata (name, description, args_schema) from agent.
1114
+
1115
+ CRITICAL: Extracts ALL available tools from the agent, not just tools used in trace.
1116
+ Why: If agent used wrong tool (e.g., extract_text instead of get_elements),
1117
+ the trace won't show the correct tool. Stage 3.5 needs to see all options
1118
+ to suggest better alternatives.
1119
+
1120
+ For multi-agent systems, preserves node→tool mapping so Stage 3.5 knows
1121
+ which tools are available in which nodes (critical for grounding).
1122
+ """
1123
+ tools_metadata = []
1124
+ tools_to_process = [] # Format: (tool, node_name or None)
1125
+
1126
+ # Get ALL tools from agent (LangChain or LangGraph) - not filtered by trace usage
1127
+ tools_attr = getattr(agent, 'tools', None)
1128
+ if tools_attr:
1129
+ try:
1130
+ # Top-level tools have no node context
1131
+ tools_to_process = [(t, None) for t in list(tools_attr)]
1132
+ except Exception:
1133
+ pass
1134
+ elif getattr(agent, 'toolkit', None):
1135
+ tk = getattr(agent, 'toolkit')
1136
+ tk_tools = getattr(tk, 'tools', None) or getattr(tk, 'get_tools', None)
1137
+ try:
1138
+ # Toolkit tools have no node context
1139
+ tools_to_process = [(t, None) for t in list(tk_tools() if callable(tk_tools) else tk_tools or [])]
1140
+ except Exception:
1141
+ pass
1142
+
1143
+ # Also try LangGraph tools from compiled graph
1144
+ # For multi-agent systems, scan ALL nodes for tools (not just 'tools' node)
1145
+ # CRITICAL: Preserve node→tool mapping for proper grounding
1146
+ # CRITICAL: Use agent.get_graph().nodes (same as planning node discovery)
1147
+ # NOT agent.nodes which returns different objects without .data attribute
1148
+ if hasattr(agent, 'get_graph'):
1149
+ graph = agent.get_graph()
1150
+ nodes = graph.nodes
1151
+ for node_name, node_obj in nodes.items():
1152
+ if node_name.startswith('__'): # Skip __start__, __end__
1153
+ continue
1154
+
1155
+ # Check if this is a subgraph with child nodes (like research_supervisor)
1156
+ # CRITICAL: Use node_obj.data (compiled graph) not node_obj.node (implementation)
1157
+ if hasattr(node_obj, 'data') and hasattr(node_obj.data, 'nodes') and 'Compiled' in type(node_obj.data).__name__:
1158
+ try:
1159
+ subgraph = node_obj.data.get_graph()
1160
+ for sub_node_name, sub_node_obj in subgraph.nodes.items():
1161
+ if sub_node_name.startswith('__'):
1162
+ continue
1163
+ if hasattr(sub_node_obj, 'node'):
1164
+ sub_actual = sub_node_obj.node
1165
+ # Use fully qualified node name: parent.child
1166
+ full_node_name = f"{node_name}.{sub_node_name}"
1167
+
1168
+ # Check all tool patterns in subgraph children
1169
+ if hasattr(sub_actual, 'tools_by_name'):
1170
+ tools_to_process.extend([(t, full_node_name) for t in sub_actual.tools_by_name.values()])
1171
+ if hasattr(sub_actual, 'runnable') and hasattr(sub_actual.runnable, 'tools'):
1172
+ sub_tools = sub_actual.runnable.tools
1173
+ if callable(sub_tools):
1174
+ try:
1175
+ sub_tools = sub_tools()
1176
+ except:
1177
+ pass
1178
+ if isinstance(sub_tools, list):
1179
+ tools_to_process.extend([(t, full_node_name) for t in sub_tools])
1180
+ print(f" [DASEIN][EXTRACT] Found {len(sub_tools)} tools in {full_node_name}.runnable.tools")
1181
+ else:
1182
+ tools_to_process.append((sub_tools, full_node_name))
1183
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {full_node_name}.runnable.tools")
1184
+ except Exception as e:
1185
+ print(f" [DASEIN][EXTRACT] Failed to analyze subgraph: {e}")
1186
+
1187
+ # Check if node has steps with tools
1188
+ if hasattr(node_obj, 'node'):
1189
+ actual_node = node_obj.node
1190
+
1191
+ # Check for tools_by_name (common in agent nodes)
1192
+ if hasattr(actual_node, 'tools_by_name'):
1193
+ node_tools = actual_node.tools_by_name.values()
1194
+ tools_to_process.extend([(t, node_name) for t in node_tools])
1195
+ print(f" [DASEIN][EXTRACT] Found {len(node_tools)} tools in {node_name}.tools_by_name")
1196
+
1197
+ # Check for runnable.tools (dynamic tools like ConductResearch)
1198
+ if hasattr(actual_node, 'runnable') and hasattr(actual_node.runnable, 'tools'):
1199
+ runnable_tools = actual_node.runnable.tools
1200
+ if callable(runnable_tools):
1201
+ try:
1202
+ runnable_tools = runnable_tools()
1203
+ except:
1204
+ pass
1205
+ if isinstance(runnable_tools, list):
1206
+ tools_to_process.extend([(t, node_name) for t in runnable_tools])
1207
+ print(f" [DASEIN][EXTRACT] Found {len(runnable_tools)} tools in {node_name}.runnable.tools")
1208
+ else:
1209
+ tools_to_process.append((runnable_tools, node_name))
1210
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.runnable.tools")
1211
+
1212
+ # Check for bound.tools (another common pattern)
1213
+ if hasattr(actual_node, 'bound') and hasattr(actual_node.bound, 'tools'):
1214
+ bound_tools = actual_node.bound.tools
1215
+ if isinstance(bound_tools, list):
1216
+ tools_to_process.extend([(t, node_name) for t in bound_tools])
1217
+ print(f" [DASEIN][EXTRACT] Found {len(bound_tools)} tools in {node_name}.bound.tools")
1218
+ else:
1219
+ tools_to_process.append((bound_tools, node_name))
1220
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.bound.tools")
1221
+
1222
+ # Check for steps (legacy pattern)
1223
+ if hasattr(actual_node, 'steps'):
1224
+ for step in actual_node.steps:
1225
+ if hasattr(step, 'tools_by_name'):
1226
+ step_tools = step.tools_by_name.values()
1227
+ tools_to_process.extend([(t, node_name) for t in step_tools])
1228
+ print(f" [DASEIN][EXTRACT] Found {len(step_tools)} tools in {node_name}.steps")
1229
+ break
1230
+
1231
+ # Extract metadata from each tool (with node context for multi-agent)
1232
+ for tool_tuple in tools_to_process:
1233
+ try:
1234
+ # Unpack (tool, node_name)
1235
+ if isinstance(tool_tuple, tuple) and len(tool_tuple) == 2:
1236
+ tool, node_name = tool_tuple
1237
+ else:
1238
+ tool = tool_tuple
1239
+ node_name = None
1240
+
1241
+ tool_meta = {
1242
+ 'name': getattr(tool, 'name', str(tool.__class__.__name__)),
1243
+ 'description': getattr(tool, 'description', ''),
1244
+ }
1245
+
1246
+ # CRITICAL: Add node context for multi-agent systems (for grounding)
1247
+ if node_name:
1248
+ tool_meta['node'] = node_name
1249
+
1250
+ # Extract args_schema if available
1251
+ if hasattr(tool, 'args_schema') and tool.args_schema:
1252
+ try:
1253
+ # Try Pydantic v2 method
1254
+ if hasattr(tool.args_schema, 'model_json_schema'):
1255
+ tool_meta['args_schema'] = tool.args_schema.model_json_schema()
1256
+ # Fallback to Pydantic v1 method
1257
+ elif hasattr(tool.args_schema, 'schema'):
1258
+ tool_meta['args_schema'] = tool.args_schema.schema()
1259
+ else:
1260
+ tool_meta['args_schema'] = {}
1261
+ except Exception:
1262
+ tool_meta['args_schema'] = {}
1263
+ else:
1264
+ tool_meta['args_schema'] = {}
1265
+
1266
+ tools_metadata.append(tool_meta)
1267
+ except Exception as e:
1268
+ # Skip tools that fail to extract
1269
+ pass
1270
+
1271
+ return tools_metadata
1272
+
968
1273
  def _extract_langgraph_params(self, agent):
969
1274
  """ CRITICAL: Extract LangGraph agent creation parameters for recreation."""
970
1275
  try:
@@ -979,24 +1284,131 @@ class CognateProxy:
979
1284
  return None
980
1285
 
981
1286
  # Try to extract tools from the compiled graph
982
- if hasattr(agent, 'nodes') and 'tools' in agent.nodes:
983
- tools_node = agent.nodes['tools']
984
- if hasattr(tools_node, 'node') and hasattr(tools_node.node, 'steps'):
985
- for step in tools_node.node.steps:
986
- if hasattr(step, 'tools_by_name'):
987
- # Extract original tools (before our wrapping)
988
- tools = []
989
- for tool_name, tool in step.tools_by_name.items():
1287
+ # CRITICAL: For multi-agent, scan ALL nodes (not just 'tools' node)
1288
+ tools = []
1289
+ # CRITICAL: Use agent.get_graph().nodes (same as planning node discovery)
1290
+ # NOT agent.nodes which returns different objects without .data attribute
1291
+ if hasattr(agent, 'get_graph'):
1292
+ graph = agent.get_graph()
1293
+ nodes = graph.nodes
1294
+ print(f" [DASEIN][EXTRACT] Scanning {len(nodes)} LangGraph nodes for tools...")
1295
+ for node_name, node_obj in nodes.items():
1296
+ if node_name.startswith('__'): # Skip __start__, __end__
1297
+ continue
1298
+
1299
+ print(f" [DASEIN][EXTRACT] Checking node: {node_name}")
1300
+
1301
+ # Check if this is a subgraph with child nodes (like research_supervisor)
1302
+ # CRITICAL: Use node_obj.data (compiled graph) not node_obj.node (implementation)
1303
+ if hasattr(node_obj, 'data') and hasattr(node_obj.data, 'nodes') and 'Compiled' in type(node_obj.data).__name__:
1304
+ try:
1305
+ subgraph = node_obj.data.get_graph()
1306
+ print(f" [DASEIN][EXTRACT] {node_name} is a subgraph with {len(subgraph.nodes)} child nodes")
1307
+ for sub_node_name, sub_node_obj in subgraph.nodes.items():
1308
+ if sub_node_name.startswith('__'):
1309
+ continue
1310
+ print(f" [DASEIN][EXTRACT] Checking subgraph child: {sub_node_name}")
1311
+ if hasattr(sub_node_obj, 'node'):
1312
+ sub_actual = sub_node_obj.node
1313
+
1314
+ # Debug: print what attributes this node has
1315
+ attrs = [a for a in dir(sub_actual) if not a.startswith('_')]
1316
+ print(f" [DASEIN][EXTRACT] Node attributes: {', '.join(attrs[:10])}...")
1317
+
1318
+ # Check all tool patterns in subgraph children
1319
+ if hasattr(sub_actual, 'tools_by_name'):
1320
+ for tool_name, tool in sub_actual.tools_by_name.items():
1321
+ if hasattr(tool, 'original_tool'):
1322
+ tools.append(tool.original_tool)
1323
+ else:
1324
+ tools.append(tool)
1325
+ print(f" [DASEIN][EXTRACT] Found {len(sub_actual.tools_by_name)} tools in {node_name}.{sub_node_name}.tools_by_name")
1326
+ if hasattr(sub_actual, 'runnable') and hasattr(sub_actual.runnable, 'tools'):
1327
+ sub_tools = sub_actual.runnable.tools
1328
+ if callable(sub_tools):
1329
+ try:
1330
+ sub_tools = sub_tools()
1331
+ except:
1332
+ pass
1333
+ if isinstance(sub_tools, list):
1334
+ tools.extend(sub_tools)
1335
+ print(f" [DASEIN][EXTRACT] Found {len(sub_tools)} tools in {node_name}.{sub_node_name}.runnable.tools")
1336
+ else:
1337
+ tools.append(sub_tools)
1338
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.{sub_node_name}.runnable.tools")
1339
+
1340
+ # Also check if sub_actual IS a callable with tools (another pattern)
1341
+ if callable(sub_actual) and hasattr(sub_actual, 'tools'):
1342
+ direct_tools = sub_actual.tools
1343
+ if callable(direct_tools):
1344
+ try:
1345
+ direct_tools = direct_tools()
1346
+ except:
1347
+ pass
1348
+ if isinstance(direct_tools, list):
1349
+ tools.extend(direct_tools)
1350
+ print(f" [DASEIN][EXTRACT] Found {len(direct_tools)} tools in {node_name}.{sub_node_name} (direct)")
1351
+ elif direct_tools:
1352
+ tools.append(direct_tools)
1353
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.{sub_node_name} (direct)")
1354
+ except Exception as e:
1355
+ print(f" [DASEIN][EXTRACT] Failed to analyze subgraph: {e}")
1356
+
1357
+ # Check if node has tools
1358
+ if hasattr(node_obj, 'node'):
1359
+ actual_node = node_obj.node
1360
+
1361
+ # Check for tools_by_name (common in agent nodes)
1362
+ if hasattr(actual_node, 'tools_by_name'):
1363
+ for tool_name, tool in actual_node.tools_by_name.items():
990
1364
  # If it's our wrapped tool, get the original
991
1365
  if hasattr(tool, 'original_tool'):
992
1366
  tools.append(tool.original_tool)
993
1367
  else:
994
1368
  tools.append(tool)
995
- params['tools'] = tools
996
- print(f" [DASEIN][EXTRACT] Found {len(tools)} tools")
997
- break
998
-
999
- if 'tools' not in params:
1369
+ print(f" [DASEIN][EXTRACT] Found {len(actual_node.tools_by_name)} tools in {node_name}.tools_by_name")
1370
+
1371
+ # Check for runnable.tools (dynamic tools like ConductResearch)
1372
+ if hasattr(actual_node, 'runnable') and hasattr(actual_node.runnable, 'tools'):
1373
+ runnable_tools = actual_node.runnable.tools
1374
+ if callable(runnable_tools):
1375
+ try:
1376
+ runnable_tools = runnable_tools()
1377
+ except:
1378
+ pass
1379
+ if isinstance(runnable_tools, list):
1380
+ tools.extend(runnable_tools)
1381
+ print(f" [DASEIN][EXTRACT] Found {len(runnable_tools)} tools in {node_name}.runnable.tools")
1382
+ else:
1383
+ tools.append(runnable_tools)
1384
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.runnable.tools")
1385
+
1386
+ # Check for bound.tools (another common pattern)
1387
+ if hasattr(actual_node, 'bound') and hasattr(actual_node.bound, 'tools'):
1388
+ bound_tools = actual_node.bound.tools
1389
+ if isinstance(bound_tools, list):
1390
+ tools.extend(bound_tools)
1391
+ print(f" [DASEIN][EXTRACT] Found {len(bound_tools)} tools in {node_name}.bound.tools")
1392
+ else:
1393
+ tools.append(bound_tools)
1394
+ print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.bound.tools")
1395
+
1396
+ # Check for steps (legacy pattern)
1397
+ if hasattr(actual_node, 'steps'):
1398
+ for step in actual_node.steps:
1399
+ if hasattr(step, 'tools_by_name'):
1400
+ for tool_name, tool in step.tools_by_name.items():
1401
+ if hasattr(tool, 'original_tool'):
1402
+ tools.append(tool.original_tool)
1403
+ else:
1404
+ tools.append(tool)
1405
+ print(f" [DASEIN][EXTRACT] Found {len(step.tools_by_name)} tools in {node_name}.steps")
1406
+ break
1407
+
1408
+ if tools:
1409
+ params['tools'] = tools
1410
+ print(f" [DASEIN][EXTRACT] Total: {len(tools)} tools extracted")
1411
+ else:
1000
1412
  print(f" [DASEIN][EXTRACT] No tools found in agent")
1001
1413
  return None
1002
1414
 
@@ -1189,23 +1601,704 @@ Follow these rules when planning your actions."""
1189
1601
  traceback.print_exc()
1190
1602
  return False
1191
1603
 
1604
+ @staticmethod
1605
+ def _create_deadletter_tool():
1606
+ """Create the universal dead-letter tool for blocked calls.
1607
+
1608
+ This tool acts as a sink for calls blocked by anti-fanout rules.
1609
+ It returns instantly with structured metadata, allowing nodes to complete normally.
1610
+ """
1611
+ def dasein_deadletter(
1612
+ original_tool: str,
1613
+ original_args_fingerprint: str,
1614
+ reason_code: str,
1615
+ policy_trace_id: str,
1616
+ tokens_saved_estimate: int = 0,
1617
+ cached_result: Any = None
1618
+ ) -> Any:
1619
+ """Universal dead-letter tool for blocked policy calls.
1620
+
1621
+ **INTERNAL USE ONLY - DO NOT CALL DIRECTLY**
1622
+
1623
+ This tool is automatically invoked when Dasein blocks a call for policy reasons
1624
+ (e.g., anti-fanout rules). Supports transparent deduplication by returning
1625
+ cached results from previous identical calls.
1626
+
1627
+ Args:
1628
+ original_tool: Name of the tool that was blocked
1629
+ original_args_fingerprint: Hash/summary of original arguments
1630
+ reason_code: Why the call was blocked (e.g., "duplicate_detected")
1631
+ policy_trace_id: Trace ID for the rule that caused the block
1632
+ tokens_saved_estimate: Estimated tokens saved by blocking this call
1633
+ cached_result: If provided, return this (transparent deduplication)
1634
+
1635
+ Returns:
1636
+ Either cached_result (transparent) or structured error dict (explicit block)
1637
+ """
1638
+ import time
1639
+
1640
+ if cached_result is not None:
1641
+ # Transparent deduplication - return the original result seamlessly
1642
+ print(f"[DASEIN][DEADLETTER] 🔄 Transparent dedup: {original_tool} (returning cached result, {tokens_saved_estimate} tokens saved)")
1643
+ return cached_result
1644
+ else:
1645
+ # Explicit block - return error structure
1646
+ result = {
1647
+ "blocked_by_policy": True,
1648
+ "original_tool": original_tool,
1649
+ "original_args_fingerprint": original_args_fingerprint,
1650
+ "reason_code": reason_code,
1651
+ "policy_trace_id": policy_trace_id,
1652
+ "tokens_saved_estimate": tokens_saved_estimate,
1653
+ "timestamp": time.time(),
1654
+ "message": f"Call to {original_tool} was blocked by Dasein policy: {reason_code}"
1655
+ }
1656
+ print(f"[DASEIN][DEADLETTER] 🚫 Blocked {original_tool}: {reason_code} (est. {tokens_saved_estimate} tokens saved)")
1657
+ return result
1658
+
1659
+ return dasein_deadletter
1660
+
1661
+ def _inject_deadletter_tool(self):
1662
+ """Inject the dead-letter tool into the agent's tool registry.
1663
+
1664
+ The tool is added to the executor but hidden from the LLM's view by marking it internal.
1665
+ """
1666
+ try:
1667
+ deadletter_fn = self._create_deadletter_tool()
1668
+
1669
+ # Convert to LangChain Tool
1670
+ from langchain.tools import Tool
1671
+ deadletter_tool = Tool(
1672
+ name="dasein_deadletter",
1673
+ description="**INTERNAL USE ONLY - DO NOT CALL DIRECTLY**\nThis tool is automatically invoked when Dasein blocks a call for policy reasons.",
1674
+ func=deadletter_fn
1675
+ )
1676
+
1677
+ # For LangGraph agents: Add to tools list in langgraph_params
1678
+ if self._is_langgraph and self._langgraph_params and 'tools' in self._langgraph_params:
1679
+ self._langgraph_params['tools'].append(deadletter_tool)
1680
+ print(f"[DASEIN][DEADLETTER] Injected dead-letter tool into LangGraph params")
1681
+
1682
+ # For LangChain agents: Add to agent's tools attribute if accessible
1683
+ elif hasattr(self._agent, 'tools'):
1684
+ if isinstance(self._agent.tools, list):
1685
+ self._agent.tools.append(deadletter_tool)
1686
+ print(f"[DASEIN][DEADLETTER] Injected dead-letter tool into LangChain agent")
1687
+
1688
+ # Store reference for later use
1689
+ self._deadletter_tool = deadletter_tool
1690
+ self._deadletter_fn = deadletter_fn
1691
+
1692
+ except Exception as e:
1693
+ print(f"[DASEIN][DEADLETTER] Failed to inject dead-letter tool: {e}")
1694
+ import traceback
1695
+ traceback.print_exc()
1696
+ self._deadletter_tool = None
1697
+ self._deadletter_fn = None
1698
+
1192
1699
  def _wrap_agent_llm(self):
1193
- """Dynamically find and wrap any LLM in the agent structure."""
1700
+ """Monkey-patch ALL LLM classes found in agent + tools."""
1194
1701
  try:
1195
- # Recursively search for any LLM-like object in the agent structure
1196
- llm = self._find_llm_recursively(self._agent, max_depth=5)
1197
- if llm:
1198
- wrapped_llm = DaseinLLMWrapper(llm, self._callback_handler)
1199
- # Replace the original LLM with our wrapped version
1200
- self._replace_llm_in_structure(self._agent, llm, wrapped_llm, max_depth=5)
1201
- self._wrapped_llm = wrapped_llm
1202
- self._vprint(f"[DASEIN][WRAPPER] Successfully wrapped {type(llm).__name__} LLM")
1203
- return
1702
+ # Find ALL LLMs in agent structure + tools
1703
+ print(f"[DASEIN][WRAPPER] Searching for ALL LLMs in agent+tools...")
1704
+ all_llms = []
1204
1705
 
1205
- self._vprint(f"[DASEIN][WRAPPER] Could not find any LLM in agent structure")
1206
- self._wrapped_llm = None
1706
+ # 1. Search in agent
1707
+ agent_llm = self._find_llm_recursively(self._agent, max_depth=5)
1708
+ if agent_llm:
1709
+ all_llms.append(('agent', agent_llm))
1710
+
1711
+ # 2. Search in tools (where Summary LLM lives!)
1712
+ if hasattr(self._agent, 'tools'):
1713
+ for i, tool in enumerate(self._agent.tools or []):
1714
+ tool_llm = self._find_llm_recursively(tool, max_depth=3, path=f"tools[{i}]")
1715
+ if tool_llm:
1716
+ all_llms.append((f'tool_{i}_{getattr(tool, "name", "unknown")}', tool_llm))
1717
+
1718
+ print(f"[DASEIN][WRAPPER] Found {len(all_llms)} LLM(s)")
1719
+ for location, llm in all_llms:
1720
+ print(f"[DASEIN][WRAPPER] - {location}: {type(llm).__name__}")
1721
+
1722
+ # Patch all unique LLM classes
1723
+ patched_classes = set()
1724
+ for location, llm in all_llms:
1725
+ llm_class = type(llm)
1726
+ if llm_class in patched_classes:
1727
+ print(f"[DASEIN][WRAPPER] {llm_class.__name__} already patched for {location}, skipping")
1728
+ continue
1729
+
1730
+ print(f"[DASEIN][WRAPPER] Patching {llm_class.__name__} (found in {location})...")
1731
+
1732
+ # Check what methods the LLM class has
1733
+ # Only patch TOP-LEVEL methods to avoid double-deduplication from internal calls
1734
+ print(f"[DASEIN][WRAPPER] Checking LLM methods...")
1735
+ methods_to_patch = []
1736
+ for method in ['invoke', 'ainvoke']: # Only patch user-facing methods, not internal _generate
1737
+ if hasattr(llm_class, method):
1738
+ print(f"[DASEIN][WRAPPER] - Has {method}")
1739
+ methods_to_patch.append(method)
1740
+
1741
+ if not methods_to_patch:
1742
+ print(f"[DASEIN][WRAPPER] No methods to patch found!")
1743
+ return
1744
+
1745
+ # Check if we already patched this class
1746
+ first_method = getattr(llm_class, methods_to_patch[0])
1747
+ if hasattr(first_method, '_dasein_patched'):
1748
+ print(f"[DASEIN][WRAPPER] {llm_class.__name__} already patched, skipping")
1749
+ return
1750
+
1751
+ callback_handler = self._callback_handler
1752
+
1753
+ # Thread-local to track depth and max depth reached
1754
+ import threading
1755
+ _patch_depth = threading.local()
1756
+
1757
+ def get_max_depth():
1758
+ return getattr(_patch_depth, 'max_depth', 0)
1759
+
1760
+ def set_max_depth(val):
1761
+ _patch_depth.max_depth = val
1762
+
1763
+ def is_in_microturn():
1764
+ return getattr(_patch_depth, 'in_microturn', False)
1765
+
1766
+ def set_in_microturn(val):
1767
+ _patch_depth.in_microturn = val
1768
+
1769
+ # Thread-local state tracking for Summary calls (mirrors callback pattern)
1770
+ def get_summary_calls_made():
1771
+ """Get count of Summary calls made in this run."""
1772
+ return getattr(_patch_depth, 'summary_calls_made', 0)
1773
+
1774
+ def increment_summary_calls():
1775
+ """Increment Summary call counter."""
1776
+ current = getattr(_patch_depth, 'summary_calls_made', 0)
1777
+ _patch_depth.summary_calls_made = current + 1
1778
+ return _patch_depth.summary_calls_made
1779
+
1780
+ # Patch ALL methods (silent)
1781
+ for method_name in methods_to_patch:
1782
+ original_method = getattr(llm_class, method_name)
1783
+ is_async = 'a' in method_name and (method_name.startswith('a') or method_name.startswith('_a'))
1784
+
1785
+ # Use a factory function to properly capture the closure variables
1786
+ def make_patched_method(orig_method, meth_name, is_async_method, depth_tracker, max_depth_getter, max_depth_setter, in_microturn_getter, in_microturn_setter, get_summary_calls, increment_summary):
1787
+ if is_async_method:
1788
+ async def patched_method(self_llm, *args, **kwargs):
1789
+ # Track depth to find the leaf method
1790
+ depth = getattr(depth_tracker, 'value', 0)
1791
+ is_entry_point = (depth == 0)
1792
+ depth_tracker.value = depth + 1
1793
+ current_depth = depth_tracker.value
1794
+
1795
+ # Track max depth reached (silent)
1796
+ if is_entry_point:
1797
+ max_depth_setter(current_depth)
1798
+ else:
1799
+ if current_depth > max_depth_getter():
1800
+ max_depth_setter(current_depth)
1801
+
1802
+ # 🔥 PIPECLEANER DEDUPLICATION (only patching top-level methods, always apply)
1803
+ # Skip depth checks - they don't work with async/parallel execution
1804
+ if callback_handler:
1805
+ try:
1806
+ # Extract messages from args based on method signature
1807
+ messages_to_dedupe = None
1808
+ arg_index = 0
1809
+
1810
+ if meth_name in ['invoke', 'ainvoke']:
1811
+ # First arg is 'input' (can be string, list, or PromptValue)
1812
+ messages_to_dedupe = args[0] if args else kwargs.get('input', kwargs.get('messages'))
1813
+ arg_index = 0
1814
+ elif meth_name in ['_generate', '_agenerate']:
1815
+ # First arg is 'messages' (list of BaseMessage)
1816
+ messages_to_dedupe = args[0] if args else kwargs.get('messages')
1817
+ arg_index = 0
1818
+ elif meth_name in ['generate', 'agenerate']:
1819
+ # First arg is 'prompts' (list of message lists)
1820
+ messages_to_dedupe = args[0] if args else kwargs.get('prompts')
1821
+ arg_index = 0
1822
+
1823
+ # Convert to strings for deduplication
1824
+ if messages_to_dedupe:
1825
+ prompt_strings = []
1826
+ for msg in (messages_to_dedupe if isinstance(messages_to_dedupe, list) else [messages_to_dedupe]):
1827
+ if hasattr(msg, 'content'):
1828
+ prompt_strings.append(msg.content)
1829
+ elif isinstance(msg, str):
1830
+ prompt_strings.append(msg)
1831
+ else:
1832
+ prompt_strings.append(str(msg))
1833
+
1834
+ # =============================================================
1835
+ # HOTPATH DEBUGGING (commented out for production)
1836
+ # =============================================================
1837
+ # print(f"\n{'='*70}")
1838
+ # print(f"[🔥 HOTPATH FULL DEBUG] {meth_name}() call")
1839
+ # print(f"{'='*70}")
1840
+ #
1841
+ # # 1. Callback state
1842
+ # current_node = getattr(callback_handler, '_current_chain_node', None)
1843
+ # current_tool = getattr(callback_handler, '_current_tool_name', None)
1844
+ # print(f"[🔥] Current node: {current_node}")
1845
+ # print(f"[🔥] Current tool: {current_tool}")
1846
+ #
1847
+ # # 2. Tools in this call
1848
+ # tools_in_call = []
1849
+ # if 'invocation_params' in kwargs:
1850
+ # tools = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions') or []
1851
+ # tools_in_call = [t.get('name', t.get('function', {}).get('name', '?')) for t in tools]
1852
+ # elif 'tools' in kwargs:
1853
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('tools', [])]
1854
+ # elif 'functions' in kwargs:
1855
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('functions', [])]
1856
+ # print(f"[🔥] Tools in call: {tools_in_call if tools_in_call else 'NONE'}")
1857
+ #
1858
+ # # 3. Prompt characteristics
1859
+ # prompt_lens = [len(s) for s in prompt_strings]
1860
+ # print(f"[🔥] Prompt count: {len(prompt_strings)}")
1861
+ # print(f"[🔥] Prompt lengths: {prompt_lens}")
1862
+ #
1863
+ # # 4. Kwargs keys (for debugging)
1864
+ # print(f"[🔥] Kwargs keys: {list(kwargs.keys())}")
1865
+ #
1866
+ # # 5. Messages structure
1867
+ # if messages_to_dedupe:
1868
+ # if isinstance(messages_to_dedupe, list):
1869
+ # msg_types = [type(m).__name__ for m in messages_to_dedupe[:3]]
1870
+ # print(f"[🔥] Message types (first 3): {msg_types}")
1871
+ # else:
1872
+ # print(f"[🔥] Messages type: {type(messages_to_dedupe).__name__}")
1873
+ #
1874
+ # print(f"{'='*70}\n")
1875
+ #
1876
+ # # Show first 200 chars to see the fingerprint
1877
+ # if prompt_strings:
1878
+ # first_200 = prompt_strings[0][:200] if len(prompt_strings[0]) > 200 else prompt_strings[0]
1879
+ # print(f"[🔥] Prompt start (200 chars): {first_200}")
1880
+
1881
+ # =============================================================
1882
+ # Extract tools from LLM call kwargs (for filter_search rules)
1883
+ # =============================================================
1884
+ tools_in_this_call = []
1885
+
1886
+ # Extract tool names from kwargs (handles multiple LLM providers' formats)
1887
+ # Pattern 1: invocation_params (some providers)
1888
+ if 'invocation_params' in kwargs:
1889
+ inv_params = kwargs['invocation_params']
1890
+ tools_param = inv_params.get('tools') or inv_params.get('functions') or []
1891
+ for t in tools_param:
1892
+ if isinstance(t, dict):
1893
+ # Try: t['name'] or t['function']['name']
1894
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
1895
+ if name:
1896
+ tools_in_this_call.append(name)
1897
+ # Pattern 2: Direct 'tools' key (common)
1898
+ elif 'tools' in kwargs:
1899
+ tools_param = kwargs.get('tools', [])
1900
+ for t in tools_param:
1901
+ if isinstance(t, dict):
1902
+ # Try: t['name'] or t['function']['name']
1903
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
1904
+ if name:
1905
+ tools_in_this_call.append(name)
1906
+ # Pattern 3: 'functions' key (OpenAI function calling)
1907
+ elif 'functions' in kwargs:
1908
+ funcs_param = kwargs.get('functions', [])
1909
+ for t in funcs_param:
1910
+ if isinstance(t, dict):
1911
+ name = t.get('name')
1912
+ if name:
1913
+ tools_in_this_call.append(name)
1914
+
1915
+ # Check if any filter_search rules match the tools in this LLM call
1916
+ from .pipecleaner import _find_filter_search_rules
1917
+ filter_rules = None
1918
+ should_dedupe = False
1919
+
1920
+ if hasattr(callback_handler, '_selected_rules') and prompt_strings and tools_in_this_call:
1921
+ # Get all filter_search rules (they specify which tools to target via references.tools)
1922
+ filter_rules = _find_filter_search_rules('*', callback_handler._selected_rules)
1923
+
1924
+ # Check if any tool in this call matches rule's target tools
1925
+ if filter_rules:
1926
+ for rule in filter_rules:
1927
+ # Handle both dict and object formats
1928
+ if isinstance(rule, dict):
1929
+ references = rule.get('references', {})
1930
+ rule_tools = references.get('tools', []) if isinstance(references, dict) else []
1931
+ else:
1932
+ references = getattr(rule, 'references', None)
1933
+ # references might be a dict or object, handle both
1934
+ if isinstance(references, dict):
1935
+ rule_tools = references.get('tools', [])
1936
+ elif references:
1937
+ rule_tools = getattr(references, 'tools', [])
1938
+ else:
1939
+ rule_tools = []
1940
+
1941
+ for tool_in_call in tools_in_this_call:
1942
+ if tool_in_call.lower() in [rt.lower() for rt in rule_tools]:
1943
+ should_dedupe = True
1944
+ break
1945
+ if should_dedupe:
1946
+ break
1947
+
1948
+ if should_dedupe:
1949
+ # Deduplicate each prompt
1950
+ from .pipecleaner import get_or_create_corpus
1951
+ import hashlib
1952
+ corpus = get_or_create_corpus(callback_handler.run_id, verbose=callback_handler._verbose)
1953
+
1954
+ deduplicated_strings = []
1955
+ for i, prompt_str in enumerate(prompt_strings):
1956
+ if len(prompt_str) < 2500:
1957
+ deduplicated_strings.append(prompt_str)
1958
+ continue
1959
+
1960
+ # Split system/content like in callback
1961
+ system_part = prompt_str[:2000]
1962
+ content_part = prompt_str[2000:]
1963
+ prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
1964
+
1965
+ # Deduplicate (ASYNC - allows parallel Summary calls to batch together)
1966
+ deduplicated_content = await corpus.enqueue_prompt(prompt_id, content_part)
1967
+ deduplicated_str = system_part + deduplicated_content
1968
+ deduplicated_strings.append(deduplicated_str)
1969
+
1970
+ # Convert back to original format
1971
+ if isinstance(messages_to_dedupe, list):
1972
+ for i, msg in enumerate(messages_to_dedupe):
1973
+ if i < len(deduplicated_strings) and hasattr(msg, 'content'):
1974
+ msg.content = deduplicated_strings[i]
1975
+ elif isinstance(messages_to_dedupe, str):
1976
+ messages_to_dedupe = deduplicated_strings[0] if deduplicated_strings else messages_to_dedupe
1977
+
1978
+ # Replace in args/kwargs
1979
+ if args and arg_index < len(args):
1980
+ args = list(args)
1981
+ args[arg_index] = messages_to_dedupe
1982
+ args = tuple(args)
1983
+ elif 'input' in kwargs:
1984
+ kwargs['input'] = messages_to_dedupe
1985
+ elif 'messages' in kwargs:
1986
+ kwargs['messages'] = messages_to_dedupe
1987
+ elif 'prompts' in kwargs:
1988
+ kwargs['prompts'] = messages_to_dedupe
1989
+ except Exception as e:
1990
+ print(f"[🔥 HOTPATH] ⚠️ Deduplication error: {e}")
1991
+ import traceback
1992
+ traceback.print_exc()
1993
+
1994
+ try:
1995
+ result = await orig_method(self_llm, *args, **kwargs)
1996
+
1997
+ # 🚨 MICROTURN ENFORCEMENT - Only if tool_end rules exist
1998
+ in_microturn = in_microturn_getter()
1999
+ if not in_microturn:
2000
+ run_number = getattr(callback_handler, '_run_number', 1) if callback_handler else 1
2001
+ if run_number == 1 and callback_handler:
2002
+ # GATE: Only run microturn if tool_end rules exist
2003
+ from .microturn import has_tool_end_rules, extract_proposed_function_calls, extract_tool_call_signatures
2004
+
2005
+ if not has_tool_end_rules(callback_handler):
2006
+ # No tool_end rules - silently skip microturn
2007
+ pass
2008
+ else:
2009
+ # Check if we've already processed these specific tool calls (prevents duplicate checks as call stack unwinds)
2010
+ temp_names, temp_msg = extract_proposed_function_calls(result)
2011
+ if temp_msg:
2012
+ temp_sigs = extract_tool_call_signatures(temp_msg)
2013
+ tool_calls_sig = f"{','.join(sorted(temp_sigs.values()))}" if temp_sigs else "empty"
2014
+ else:
2015
+ tool_calls_sig = f"{','.join(sorted(temp_names))}" if temp_names else "empty"
2016
+
2017
+ if not hasattr(_patch_depth, 'processed_tool_calls'):
2018
+ _patch_depth.processed_tool_calls = set()
2019
+
2020
+ if tool_calls_sig not in _patch_depth.processed_tool_calls:
2021
+ # Mark these specific tool calls as processed
2022
+ _patch_depth.processed_tool_calls.add(tool_calls_sig)
2023
+
2024
+ # Run microturn enforcement (for tool CALLS)
2025
+ from .microturn import run_microturn_enforcement
2026
+ try:
2027
+ await run_microturn_enforcement(
2028
+ result=result,
2029
+ callback_handler=callback_handler,
2030
+ self_llm=self_llm,
2031
+ patch_depth=_patch_depth,
2032
+ use_llm_microturn=USE_LLM_MICROTURN
2033
+ )
2034
+ except Exception as e:
2035
+ print(f"[DASEIN][MICROTURN] ⚠️ Microturn error: {e}")
2036
+
2037
+ return result
2038
+ finally:
2039
+ depth_tracker.value = depth # Restore depth on exit
2040
+ # Clear processed tool calls set when returning to entry point (prevents memory leak)
2041
+ if depth == 0:
2042
+ if hasattr(_patch_depth, 'processed_tool_calls'):
2043
+ _patch_depth.processed_tool_calls.clear()
2044
+ if hasattr(_patch_depth, 'seen_tool_signatures'):
2045
+ _patch_depth.seen_tool_signatures.clear()
2046
+ if hasattr(_patch_depth, 'tool_result_cache'):
2047
+ _patch_depth.tool_result_cache.clear()
2048
+ else:
2049
+ def patched_method(self_llm, *args, **kwargs):
2050
+ # Track depth to find the leaf method
2051
+ depth = getattr(depth_tracker, 'value', 0)
2052
+ is_entry_point = (depth == 0)
2053
+ depth_tracker.value = depth + 1
2054
+ current_depth = depth_tracker.value
2055
+
2056
+ # Track max depth reached
2057
+ if is_entry_point:
2058
+ max_depth_setter(current_depth) # Reset for new entry
2059
+ else:
2060
+ # Update max if we went deeper
2061
+ if current_depth > max_depth_getter():
2062
+ max_depth_setter(current_depth)
2063
+
2064
+ # 🔥 PIPECLEANER DEDUPLICATION (only patching top-level methods, always apply)
2065
+ # Skip depth checks - they don't work with async/parallel execution
2066
+ if callback_handler:
2067
+ try:
2068
+ # Extract messages from args based on method signature
2069
+ messages_to_dedupe = None
2070
+ arg_index = 0
2071
+
2072
+ if meth_name in ['invoke', 'ainvoke']:
2073
+ messages_to_dedupe = args[0] if args else kwargs.get('input', kwargs.get('messages'))
2074
+ arg_index = 0
2075
+ elif meth_name in ['_generate', '_agenerate']:
2076
+ messages_to_dedupe = args[0] if args else kwargs.get('messages')
2077
+ arg_index = 0
2078
+ elif meth_name in ['generate', 'agenerate']:
2079
+ messages_to_dedupe = args[0] if args else kwargs.get('prompts')
2080
+ arg_index = 0
2081
+
2082
+ # Convert to strings for deduplication
2083
+ if messages_to_dedupe:
2084
+ prompt_strings = []
2085
+ for msg in (messages_to_dedupe if isinstance(messages_to_dedupe, list) else [messages_to_dedupe]):
2086
+ if hasattr(msg, 'content'):
2087
+ prompt_strings.append(msg.content)
2088
+ elif isinstance(msg, str):
2089
+ prompt_strings.append(msg)
2090
+ else:
2091
+ prompt_strings.append(str(msg))
2092
+
2093
+ # =============================================================
2094
+ # HOTPATH DEBUGGING (commented out for production)
2095
+ # =============================================================
2096
+ # print(f"\n{'='*70}")
2097
+ # print(f"[🔥 HOTPATH FULL DEBUG] {meth_name}() call")
2098
+ # print(f"{'='*70}")
2099
+ #
2100
+ # # 1. Callback state
2101
+ # current_node = getattr(callback_handler, '_current_chain_node', None)
2102
+ # current_tool = getattr(callback_handler, '_current_tool_name', None)
2103
+ # print(f"[🔥] Current node: {current_node}")
2104
+ # print(f"[🔥] Current tool: {current_tool}")
2105
+ #
2106
+ # # 2. Tools in this call
2107
+ # tools_in_call = []
2108
+ # if 'invocation_params' in kwargs:
2109
+ # tools = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions') or []
2110
+ # tools_in_call = [t.get('name', t.get('function', {}).get('name', '?')) for t in tools]
2111
+ # elif 'tools' in kwargs:
2112
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('tools', [])]
2113
+ # elif 'functions' in kwargs:
2114
+ # tools_in_call = [t.get('name', '?') for t in kwargs.get('functions', [])]
2115
+ # print(f"[🔥] Tools in call: {tools_in_call if tools_in_call else 'NONE'}")
2116
+ #
2117
+ # # 3. Prompt characteristics
2118
+ # prompt_lens = [len(s) for s in prompt_strings]
2119
+ # print(f"[🔥] Prompt count: {len(prompt_strings)}")
2120
+ # print(f"[🔥] Prompt lengths: {prompt_lens}")
2121
+ #
2122
+ # # 4. Kwargs keys (for debugging)
2123
+ # print(f"[🔥] Kwargs keys: {list(kwargs.keys())}")
2124
+ #
2125
+ # # 5. Messages structure
2126
+ # if messages_to_dedupe:
2127
+ # if isinstance(messages_to_dedupe, list):
2128
+ # msg_types = [type(m).__name__ for m in messages_to_dedupe[:3]]
2129
+ # print(f"[🔥] Message types (first 3): {msg_types}")
2130
+ # else:
2131
+ # print(f"[🔥] Messages type: {type(messages_to_dedupe).__name__}")
2132
+ #
2133
+ # print(f"{'='*70}\n")
2134
+ #
2135
+ # # Show first 200 chars to see the fingerprint
2136
+ # if prompt_strings:
2137
+ # first_200 = prompt_strings[0][:200] if len(prompt_strings[0]) > 200 else prompt_strings[0]
2138
+ # print(f"[🔥] Prompt start (200 chars): {first_200}")
2139
+
2140
+ # =============================================================
2141
+ # Extract tools from LLM call kwargs (for filter_search rules)
2142
+ # =============================================================
2143
+ tools_in_this_call = []
2144
+
2145
+ # Extract tool names from kwargs (handles multiple LLM providers' formats)
2146
+ # Pattern 1: invocation_params (some providers)
2147
+ if 'invocation_params' in kwargs:
2148
+ inv_params = kwargs['invocation_params']
2149
+ tools_param = inv_params.get('tools') or inv_params.get('functions') or []
2150
+ for t in tools_param:
2151
+ if isinstance(t, dict):
2152
+ # Try: t['name'] or t['function']['name']
2153
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
2154
+ if name:
2155
+ tools_in_this_call.append(name)
2156
+ # Pattern 2: Direct 'tools' key (common)
2157
+ elif 'tools' in kwargs:
2158
+ tools_param = kwargs.get('tools', [])
2159
+ for t in tools_param:
2160
+ if isinstance(t, dict):
2161
+ # Try: t['name'] or t['function']['name']
2162
+ name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
2163
+ if name:
2164
+ tools_in_this_call.append(name)
2165
+ # Pattern 3: 'functions' key (OpenAI function calling)
2166
+ elif 'functions' in kwargs:
2167
+ funcs_param = kwargs.get('functions', [])
2168
+ for t in funcs_param:
2169
+ if isinstance(t, dict):
2170
+ name = t.get('name')
2171
+ if name:
2172
+ tools_in_this_call.append(name)
2173
+
2174
+ # Check if any filter_search rules match the tools in this LLM call
2175
+ from .pipecleaner import _find_filter_search_rules
2176
+ filter_rules = None
2177
+ should_dedupe = False
2178
+
2179
+ if hasattr(callback_handler, '_selected_rules') and prompt_strings and tools_in_this_call:
2180
+ # Get all filter_search rules (they specify which tools to target via references.tools)
2181
+ filter_rules = _find_filter_search_rules('*', callback_handler._selected_rules)
2182
+
2183
+ # Check if any tool in this call matches rule's target tools
2184
+ if filter_rules:
2185
+ for rule in filter_rules:
2186
+ # Handle both dict and object formats
2187
+ if isinstance(rule, dict):
2188
+ references = rule.get('references', {})
2189
+ rule_tools = references.get('tools', []) if isinstance(references, dict) else []
2190
+ else:
2191
+ references = getattr(rule, 'references', None)
2192
+ # references might be a dict or object, handle both
2193
+ if isinstance(references, dict):
2194
+ rule_tools = references.get('tools', [])
2195
+ elif references:
2196
+ rule_tools = getattr(references, 'tools', [])
2197
+ else:
2198
+ rule_tools = []
2199
+
2200
+ for tool_in_call in tools_in_this_call:
2201
+ if tool_in_call.lower() in [rt.lower() for rt in rule_tools]:
2202
+ should_dedupe = True
2203
+ break
2204
+ if should_dedupe:
2205
+ break
2206
+
2207
+ if should_dedupe:
2208
+ # Deduplicate each prompt
2209
+ from .pipecleaner import get_or_create_corpus
2210
+ import hashlib
2211
+ corpus = get_or_create_corpus(callback_handler.run_id, verbose=callback_handler._verbose)
2212
+
2213
+ deduplicated_strings = []
2214
+ for i, prompt_str in enumerate(prompt_strings):
2215
+ if len(prompt_str) < 2500:
2216
+ deduplicated_strings.append(prompt_str)
2217
+ continue
2218
+
2219
+ # Split system/content like in callback
2220
+ system_part = prompt_str[:2000]
2221
+ content_part = prompt_str[2000:]
2222
+ prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
2223
+
2224
+ # Deduplicate (wrap async in sync context)
2225
+ import asyncio
2226
+ try:
2227
+ loop = asyncio.get_event_loop()
2228
+ except RuntimeError:
2229
+ loop = asyncio.new_event_loop()
2230
+ asyncio.set_event_loop(loop)
2231
+
2232
+ deduplicated_content = loop.run_until_complete(corpus.enqueue_prompt(prompt_id, content_part))
2233
+ deduplicated_str = system_part + deduplicated_content
2234
+ deduplicated_strings.append(deduplicated_str)
2235
+
2236
+ # Convert back to original format
2237
+ if isinstance(messages_to_dedupe, list):
2238
+ for i, msg in enumerate(messages_to_dedupe):
2239
+ if i < len(deduplicated_strings) and hasattr(msg, 'content'):
2240
+ msg.content = deduplicated_strings[i]
2241
+ elif isinstance(messages_to_dedupe, str):
2242
+ messages_to_dedupe = deduplicated_strings[0] if deduplicated_strings else messages_to_dedupe
2243
+
2244
+ # Replace in args/kwargs
2245
+ if args and arg_index < len(args):
2246
+ args = list(args)
2247
+ args[arg_index] = messages_to_dedupe
2248
+ args = tuple(args)
2249
+ elif 'input' in kwargs:
2250
+ kwargs['input'] = messages_to_dedupe
2251
+ elif 'messages' in kwargs:
2252
+ kwargs['messages'] = messages_to_dedupe
2253
+ elif 'prompts' in kwargs:
2254
+ kwargs['prompts'] = messages_to_dedupe
2255
+ except Exception as e:
2256
+ print(f"[🔥 HOTPATH] ⚠️ Deduplication error: {e}")
2257
+ import traceback
2258
+ traceback.print_exc()
2259
+
2260
+ try:
2261
+ result = orig_method(self_llm, *args, **kwargs)
2262
+
2263
+ # 🚨 MICROTURN ENFORCEMENT - Only at the DEEPEST level (max depth)
2264
+ if current_depth == max_depth_getter():
2265
+ run_number = getattr(callback_handler, '_run_number', 1) if callback_handler else 1
2266
+ if run_number == 1 and callback_handler:
2267
+ print(f"[DASEIN][MICROTURN_DEBUG] 🎯 DEEPEST METHOD: {meth_name} (depth={current_depth}) - Checking result...")
2268
+ print(f"[DASEIN][MICROTURN_DEBUG] Result type: {type(result)}")
2269
+ # TODO: Add full microturn logic here
2270
+
2271
+ return result
2272
+ finally:
2273
+ depth_tracker.value = depth # Restore depth on exit
2274
+ # Clear processed tool calls set when returning to entry point (prevents memory leak)
2275
+ if depth == 0:
2276
+ if hasattr(_patch_depth, 'processed_tool_calls'):
2277
+ _patch_depth.processed_tool_calls.clear()
2278
+ if hasattr(_patch_depth, 'seen_tool_signatures'):
2279
+ _patch_depth.seen_tool_signatures.clear()
2280
+ if hasattr(_patch_depth, 'tool_result_cache'):
2281
+ _patch_depth.tool_result_cache.clear()
2282
+ return patched_method
2283
+
2284
+ patched_method = make_patched_method(original_method, method_name, is_async, _patch_depth, get_max_depth, set_max_depth, is_in_microturn, set_in_microturn, get_summary_calls_made, increment_summary_calls)
2285
+
2286
+ # Mark and apply the patch
2287
+ patched_method._dasein_patched = True
2288
+ setattr(llm_class, method_name, patched_method)
2289
+ print(f"[DASEIN][WRAPPER] ✅ Patched {method_name}")
2290
+
2291
+ # Mark this class as patched
2292
+ patched_classes.add(llm_class)
2293
+ self._wrapped_llm = llm
2294
+ print(f"[DASEIN][WRAPPER] Successfully patched {len(methods_to_patch)} methods in {llm_class.__name__}")
2295
+
2296
+ print(f"[DASEIN][WRAPPER] Finished patching {len(patched_classes)} unique LLM class(es)")
2297
+ return
1207
2298
  except Exception as e:
1208
- self._vprint(f"[DASEIN][WRAPPER] Failed to wrap agent LLM: {e}")
2299
+ print(f"[DASEIN][WRAPPER] Failed to wrap agent LLM: {e}")
2300
+ import traceback
2301
+ traceback.print_exc()
1209
2302
  self._wrapped_llm = None
1210
2303
 
1211
2304
  def _set_callback_handler_llm(self):
@@ -1312,7 +2405,7 @@ Follow these rules when planning your actions."""
1312
2405
 
1313
2406
  return False
1314
2407
 
1315
- def _replace_llm_in_structure(self, obj, original_llm, wrapped_llm, max_depth=5, path=""):
2408
+ def _replace_llm_in_structure(self, obj, original_llm, wrapped_llm, max_depth=5, path="", count=[0]):
1316
2409
  """Replace the original LLM with wrapped LLM in the structure."""
1317
2410
  if max_depth <= 0:
1318
2411
  return
@@ -1321,16 +2414,16 @@ Follow these rules when planning your actions."""
1321
2414
  if hasattr(obj, 'steps') and hasattr(obj, '__iter__'):
1322
2415
  for i, step in enumerate(obj.steps):
1323
2416
  if step is original_llm:
1324
- self._vprint(f"[DASEIN][WRAPPER] Replacing LLM at {path}.steps[{i}]")
2417
+ count[0] += 1
2418
+ print(f"[DASEIN][WRAPPER] Replacing LLM #{count[0]} at {path}.steps[{i}]")
1325
2419
  obj.steps[i] = wrapped_llm
1326
- return
1327
2420
  # Check if step has bound attribute (RunnableBinding)
1328
2421
  if hasattr(step, 'bound') and step.bound is original_llm:
1329
- self._vprint(f"[DASEIN][WRAPPER] Replacing LLM at {path}.steps[{i}].bound")
2422
+ count[0] += 1
2423
+ print(f"[DASEIN][WRAPPER] Replacing LLM #{count[0]} at {path}.steps[{i}].bound")
1330
2424
  step.bound = wrapped_llm
1331
- return
1332
2425
  # Recursively search in the step
1333
- self._replace_llm_in_structure(step, original_llm, wrapped_llm, max_depth - 1, f"{path}.steps[{i}]")
2426
+ self._replace_llm_in_structure(step, original_llm, wrapped_llm, max_depth - 1, f"{path}.steps[{i}]", count)
1334
2427
 
1335
2428
  # Search in attributes
1336
2429
  for attr_name in dir(obj):
@@ -1339,8 +2432,12 @@ Follow these rules when planning your actions."""
1339
2432
  try:
1340
2433
  attr_value = getattr(obj, attr_name)
1341
2434
  if attr_value is original_llm:
1342
- self._vprint(f"[DASEIN][WRAPPER] Replacing LLM at {path}.{attr_name}")
2435
+ print(f"[DASEIN][WRAPPER] Replacing LLM at {path}.{attr_name}")
1343
2436
  setattr(obj, attr_name, wrapped_llm)
2437
+ # Verify replacement
2438
+ new_value = getattr(obj, attr_name)
2439
+ print(f"[DASEIN][WRAPPER] After replacement, {path}.{attr_name} is now: {type(new_value).__name__}")
2440
+ print(f"[DASEIN][WRAPPER] Is it our wrapper? {isinstance(new_value, DaseinLLMWrapper)}")
1344
2441
  return
1345
2442
  # Recursively search in the attribute
1346
2443
  if hasattr(attr_value, '__dict__') or hasattr(attr_value, '__iter__'):
@@ -1725,6 +2822,12 @@ Follow these rules when planning your actions."""
1725
2822
  # Run the agent
1726
2823
  result = self._agent.invoke(*args, **kwargs)
1727
2824
 
2825
+ # Print tools summary if available
2826
+ if hasattr(self._callback_handler, 'get_compiled_tools_summary'):
2827
+ summary = self._callback_handler.get_compiled_tools_summary()
2828
+ if summary:
2829
+ print(f"[DASEIN] {summary}")
2830
+
1728
2831
  # FIXED: Extract trace for display but never calculate KPIs locally
1729
2832
  # Service-first architecture: All KPI calculation done by distributed services
1730
2833
  self._vprint(f"[DASEIN][SERVICE_FIRST] Extracting trace for display - KPIs handled by post-run API service")
@@ -1749,6 +2852,11 @@ Follow these rules when planning your actions."""
1749
2852
  # Clear tool rules from system prompt after triggering async post-run
1750
2853
  self._clear_tool_rules_from_system()
1751
2854
 
2855
+ # Cleanup run-scoped corpus (print telemetry and free memory)
2856
+ if hasattr(self, '_callback_handler') and hasattr(self._callback_handler, 'run_id'):
2857
+ from .pipecleaner import cleanup_corpus
2858
+ cleanup_corpus(self._callback_handler.run_id)
2859
+
1752
2860
  return result
1753
2861
 
1754
2862
  async def _ainvoke_single(self, *args, **kwargs):
@@ -1796,6 +2904,12 @@ Follow these rules when planning your actions."""
1796
2904
  # Run the agent asynchronously
1797
2905
  result = await self._agent.ainvoke(*args, **kwargs)
1798
2906
 
2907
+ # Print tools summary if available
2908
+ if hasattr(self._callback_handler, 'get_compiled_tools_summary'):
2909
+ summary = self._callback_handler.get_compiled_tools_summary()
2910
+ if summary:
2911
+ print(f"[DASEIN] {summary}")
2912
+
1799
2913
  # FIXED: Extract trace for display but never calculate KPIs locally
1800
2914
  # Service-first architecture: All KPI calculation done by distributed services
1801
2915
  self._vprint(f"[DASEIN][SERVICE_FIRST] Extracting trace for display - KPIs handled by post-run API service")
@@ -1820,6 +2934,11 @@ Follow these rules when planning your actions."""
1820
2934
  # Clear tool rules from system prompt after triggering async post-run
1821
2935
  self._clear_tool_rules_from_system()
1822
2936
 
2937
+ # Cleanup run-scoped corpus (print telemetry and free memory)
2938
+ if hasattr(self, '_callback_handler') and hasattr(self._callback_handler, 'run_id'):
2939
+ from .pipecleaner import cleanup_corpus
2940
+ cleanup_corpus(self._callback_handler.run_id)
2941
+
1823
2942
  return result
1824
2943
 
1825
2944
  def _invoke_with_retry(self, *args, **kwargs):
@@ -2231,11 +3350,8 @@ Follow these rules when planning your actions."""
2231
3350
  print(f" Wall Time (ms): {metrics['wall_time_ms']}")
2232
3351
  print(f" Success Rate: {metrics['success_rate']:.1f}% ({metrics['total_turns']}/{metrics['total_turns']})")
2233
3352
  print(f" Overall Success: {'✅' if metrics['overall_success'] else '❌'}")
2234
- # Format final outcome using the wrapped LLM's method
2235
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
2236
- final_outcome_formatted = self._wrapped_llm._format_final_outcome(metrics.get('final_outcome', 'unknown'))
2237
- else:
2238
- final_outcome_formatted = f" {metrics.get('final_outcome', 'unknown')}"
3353
+ # Format final outcome
3354
+ final_outcome_formatted = self._format_final_outcome(metrics.get('final_outcome', 'unknown'))
2239
3355
  print(f" Final Outcome: {final_outcome_formatted}")
2240
3356
  print(f" Result: {str(metrics['result'])[:100]}...")
2241
3357
 
@@ -2317,12 +3433,8 @@ Follow these rules when planning your actions."""
2317
3433
  first_outcome = first_metrics.get('final_outcome', 'unknown')
2318
3434
  last_outcome = last_metrics.get('final_outcome', 'unknown')
2319
3435
  # Format final outcomes using the wrapped LLM's method
2320
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
2321
- first_formatted = self._wrapped_llm._format_final_outcome(first_outcome)
2322
- last_formatted = self._wrapped_llm._format_final_outcome(last_outcome)
2323
- else:
2324
- first_formatted = f"❓ {first_outcome}"
2325
- last_formatted = f"❓ {last_outcome}"
3436
+ first_formatted = self._format_final_outcome(first_outcome)
3437
+ last_formatted = self._format_final_outcome(last_outcome)
2326
3438
  outcome_improvement = f"{first_formatted} → {last_formatted}"
2327
3439
  print(f"🎯 Final Outcome: {outcome_improvement}")
2328
3440
 
@@ -2344,13 +3456,8 @@ Follow these rules when planning your actions."""
2344
3456
 
2345
3457
  # Prioritize final outcome improvement
2346
3458
  if outcome_improved:
2347
- # Format final outcomes using the wrapped LLM's method
2348
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
2349
- first_formatted = self._wrapped_llm._format_final_outcome(first_outcome)
2350
- last_formatted = self._wrapped_llm._format_final_outcome(last_outcome)
2351
- else:
2352
- first_formatted = f" {first_outcome}"
2353
- last_formatted = f" {last_outcome}"
3459
+ first_formatted = self._format_final_outcome(first_outcome)
3460
+ last_formatted = self._format_final_outcome(last_outcome)
2354
3461
  print(f"🎉 BREAKTHROUGH: Agent went from {first_formatted} to {last_formatted}!")
2355
3462
  elif turns_improved or tokens_improved or time_improved or success_improved:
2356
3463
  improvements = []
@@ -2625,6 +3732,38 @@ Follow these rules when planning your actions."""
2625
3732
 
2626
3733
  print(f"[DASEIN] Pre-run service returned {len(selected_rules)} rules")
2627
3734
 
3735
+ # Pre-load embedding model if we have filter_search rules (avoid timeout on first batch)
3736
+ if selected_rules:
3737
+ # Check for any llm_start rules with "filter search" keywords
3738
+ has_filter_search_rules = False
3739
+ for rule_meta in selected_rules:
3740
+ # Unwrap tuple if needed
3741
+ rule_obj = rule_meta[0] if isinstance(rule_meta, tuple) and len(rule_meta) == 2 else rule_meta
3742
+
3743
+ # Check if this is an llm_start rule with filter/search keywords
3744
+ # Handle both dict and object formats
3745
+ if isinstance(rule_obj, dict):
3746
+ target_step_type = rule_obj.get('target_step_type')
3747
+ advice = rule_obj.get('advice_text') or rule_obj.get('advice', '')
3748
+ else:
3749
+ target_step_type = getattr(rule_obj, 'target_step_type', None)
3750
+ advice = getattr(rule_obj, 'advice_text', None) or getattr(rule_obj, 'advice', None) or ''
3751
+
3752
+ advice_lower = advice.lower() if advice else ''
3753
+
3754
+ if target_step_type == 'llm_start' and 'filter' in advice_lower and 'search' in advice_lower:
3755
+ has_filter_search_rules = True
3756
+ break
3757
+
3758
+ if has_filter_search_rules:
3759
+ print(f"[DASEIN] 🔧 Pre-loading embedding model for pipecleaner (found filter search rules)...")
3760
+ from .pipecleaner import _get_embedding_model
3761
+ try:
3762
+ _get_embedding_model() # Warm up the model
3763
+ print(f"[DASEIN] ✅ Embedding model pre-loaded successfully")
3764
+ except Exception as e:
3765
+ print(f"[DASEIN] ⚠️ Failed to pre-load embedding model: {e}")
3766
+
2628
3767
  # CRITICAL: For LangGraph agents, recreate with injected prompt
2629
3768
  if self._is_langgraph and selected_rules:
2630
3769
  print(f" [DASEIN][PRERUN] LangGraph agent detected with {len(selected_rules)} rules")
@@ -2826,75 +3965,13 @@ Follow these rules when planning your actions."""
2826
3965
 
2827
3966
  agent_fingerprint = _minimal_agent_fingerprint(self._agent)
2828
3967
 
2829
- # Extract tool metadata for Stage 3.5 tool grounding
2830
- def _extract_tool_metadata(agent):
2831
- """
2832
- Extract tool metadata (name, description, args_schema) from agent.
2833
-
2834
- CRITICAL: Extracts ALL available tools from the agent, not just tools used in trace.
2835
- Why: If agent used wrong tool (e.g., extract_text instead of get_elements),
2836
- the trace won't show the correct tool. Stage 3.5 needs to see all options
2837
- to suggest better alternatives.
2838
- """
2839
- tools_metadata = []
2840
- tools_to_process = []
2841
-
2842
- # Get ALL tools from agent (LangChain or LangGraph) - not filtered by trace usage
2843
- tools_attr = getattr(agent, 'tools', None)
2844
- if tools_attr:
2845
- try:
2846
- tools_to_process = list(tools_attr)
2847
- except Exception:
2848
- pass
2849
- elif getattr(agent, 'toolkit', None):
2850
- tk = getattr(agent, 'toolkit')
2851
- tk_tools = getattr(tk, 'tools', None) or getattr(tk, 'get_tools', None)
2852
- try:
2853
- tools_to_process = list(tk_tools() if callable(tk_tools) else tk_tools or [])
2854
- except Exception:
2855
- pass
2856
-
2857
- # Also try LangGraph tools from compiled graph
2858
- if hasattr(agent, 'nodes') and 'tools' in agent.nodes:
2859
- tools_node = agent.nodes['tools']
2860
- if hasattr(tools_node, 'node') and hasattr(tools_node.node, 'steps'):
2861
- for step in tools_node.node.steps:
2862
- if hasattr(step, 'tools_by_name'):
2863
- tools_to_process.extend(step.tools_by_name.values())
2864
- break
2865
-
2866
- # Extract metadata from each tool
2867
- for tool in tools_to_process:
2868
- try:
2869
- tool_meta = {
2870
- 'name': getattr(tool, 'name', str(tool.__class__.__name__)),
2871
- 'description': getattr(tool, 'description', ''),
2872
- }
2873
-
2874
- # Extract args_schema if available
2875
- if hasattr(tool, 'args_schema') and tool.args_schema:
2876
- try:
2877
- # Try Pydantic v2 method
2878
- if hasattr(tool.args_schema, 'model_json_schema'):
2879
- tool_meta['args_schema'] = tool.args_schema.model_json_schema()
2880
- # Fallback to Pydantic v1 method
2881
- elif hasattr(tool.args_schema, 'schema'):
2882
- tool_meta['args_schema'] = tool.args_schema.schema()
2883
- else:
2884
- tool_meta['args_schema'] = {}
2885
- except Exception:
2886
- tool_meta['args_schema'] = {}
2887
- else:
2888
- tool_meta['args_schema'] = {}
2889
-
2890
- tools_metadata.append(tool_meta)
2891
- except Exception as e:
2892
- # Skip tools that fail to extract
2893
- pass
2894
-
2895
- return tools_metadata
2896
-
2897
- tools_metadata = _extract_tool_metadata(self._agent)
3968
+ # Get tool metadata from callback handler (extracted during runtime)
3969
+ tools_metadata = []
3970
+ if hasattr(self._callback_handler, '_compiled_tools_metadata'):
3971
+ tools_metadata = self._callback_handler._compiled_tools_metadata
3972
+ # Fallback: try extracting now (may not work if tools unbound)
3973
+ if not tools_metadata:
3974
+ tools_metadata = self._extract_tool_metadata(self._agent)
2898
3975
 
2899
3976
  # Reuse existing graph analysis (already extracted in __init__)
2900
3977
  graph_metadata = None
@@ -3512,11 +4589,8 @@ Follow these rules when planning your actions."""
3512
4589
 
3513
4590
  # Step-by-step comparison
3514
4591
  print(f"\n STEP-BY-STEP COMPARISON:")
3515
- # Format step 1 outcome using the wrapped LLM's method
3516
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
3517
- step1_outcome = self._wrapped_llm._format_final_outcome(learning_metrics[0].get('final_outcome', 'unknown'))
3518
- else:
3519
- step1_outcome = f" {learning_metrics[0].get('final_outcome', 'unknown')}"
4592
+ # Format step 1 outcome
4593
+ step1_outcome = self._format_final_outcome(learning_metrics[0].get('final_outcome', 'unknown'))
3520
4594
  print(f" Step 1: {learning_metrics[0]['total_tokens']} tokens, {learning_metrics[0]['total_turns']} turns, {learning_metrics[0]['trace_time_ms']}ms, {learning_metrics[0]['success_rate']:.1f}%, {step1_outcome} (naive baseline)")
3521
4595
 
3522
4596
  # Compare steps 2N (learning vs baseline)
@@ -3541,13 +4615,8 @@ Follow these rules when planning your actions."""
3541
4615
  failure_direction = "" if failure_change > 0 else "" if failure_change < 0 else "="
3542
4616
 
3543
4617
  # Get final outcomes for this step
3544
- # Format final outcomes using the wrapped LLM's method
3545
- if hasattr(self, '_wrapped_llm') and self._wrapped_llm:
3546
- base_outcome = self._wrapped_llm._format_final_outcome(base.get('final_outcome', 'unknown'))
3547
- learn_outcome = self._wrapped_llm._format_final_outcome(learn.get('final_outcome', 'unknown'))
3548
- else:
3549
- base_outcome = f" {base.get('final_outcome', 'unknown')}"
3550
- learn_outcome = f" {learn.get('final_outcome', 'unknown')}"
4618
+ base_outcome = self._format_final_outcome(base.get('final_outcome', 'unknown'))
4619
+ learn_outcome = self._format_final_outcome(learn.get('final_outcome', 'unknown'))
3551
4620
 
3552
4621
  print(f" Step {i}: {base['total_tokens']} {learn['total_tokens']} tokens ({token_direction}{abs(token_improvement)}, {token_pct:+.1f}%)")
3553
4622
  print(f" {base['total_turns']} {learn['total_turns']} turns ({turn_direction}{abs(turn_improvement)}, {turn_pct:+.1f}%)")