dasein-core 0.2.7__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dasein/api.py +1144 -133
- dasein/capture.py +2325 -1803
- dasein/microturn.py +475 -0
- dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
- dasein/pipecleaner.py +1917 -0
- dasein/wrappers.py +315 -0
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/METADATA +4 -1
- dasein_core-0.2.10.dist-info/RECORD +59 -0
- dasein_core-0.2.7.dist-info/RECORD +0 -21
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/WHEEL +0 -0
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/top_level.txt +0 -0
dasein/api.py
CHANGED
@@ -18,6 +18,15 @@ from .services import ServiceAdapter
|
|
18
18
|
from .config import W_COST
|
19
19
|
|
20
20
|
|
21
|
+
# ============================================================================
|
22
|
+
# CONFIGURATION
|
23
|
+
# ============================================================================
|
24
|
+
|
25
|
+
# Microturn enforcement configuration
|
26
|
+
USE_LLM_MICROTURN = False # If True, use LLM to judge which calls to allow
|
27
|
+
# If False, use deterministic duplicate detection only
|
28
|
+
# (Keep False - LLM microturn adds latency, use only for semantic rules)
|
29
|
+
|
21
30
|
# ============================================================================
|
22
31
|
# VERBOSE LOGGING HELPER
|
23
32
|
# ============================================================================
|
@@ -51,6 +60,7 @@ class DaseinLLMWrapper(BaseChatModel):
|
|
51
60
|
|
52
61
|
def _generate(self, messages, stop=None, run_manager=None, **kwargs):
|
53
62
|
"""Generate response and capture trace."""
|
63
|
+
print(f"[DASEIN][WRAPPER] _generate called with {len(messages)} messages")
|
54
64
|
self._vprint(f"[DASEIN][TRACE] LLM wrapper _generate called with {len(messages)} messages")
|
55
65
|
|
56
66
|
# Get model name dynamically
|
@@ -163,6 +173,87 @@ class DaseinLLMWrapper(BaseChatModel):
|
|
163
173
|
self._vprint(f"[DASEIN][TRACE] LLM result: {result_text[:100]}...")
|
164
174
|
self._vprint(f"[DASEIN][METRICS] Tokens: {step['tokens_input']}->{output_tokens} | Time: {duration_ms}ms | Success: {'OK' if success else 'FAIL'}")
|
165
175
|
|
176
|
+
# 🚨 MICROTURN ENFORCEMENT - DISABLED (can interfere with tool execution)
|
177
|
+
if False: # Disabled
|
178
|
+
try:
|
179
|
+
proposed_func_name = None
|
180
|
+
print(f"[DASEIN][MICROTURN_DEBUG] Checking result for function call...")
|
181
|
+
if hasattr(result, 'generations') and result.generations:
|
182
|
+
first_gen = result.generations[0]
|
183
|
+
if isinstance(first_gen, list) and len(first_gen) > 0:
|
184
|
+
generation = first_gen[0]
|
185
|
+
else:
|
186
|
+
generation = first_gen
|
187
|
+
|
188
|
+
print(f"[DASEIN][MICROTURN_DEBUG] generation type: {type(generation)}")
|
189
|
+
if hasattr(generation, 'message') and hasattr(generation.message, 'additional_kwargs'):
|
190
|
+
func_call = generation.message.additional_kwargs.get('function_call')
|
191
|
+
print(f"[DASEIN][MICROTURN_DEBUG] func_call: {func_call}")
|
192
|
+
if func_call and isinstance(func_call, dict) and 'name' in func_call:
|
193
|
+
proposed_func_name = func_call['name']
|
194
|
+
else:
|
195
|
+
print(f"[DASEIN][MICROTURN_DEBUG] No generations in result")
|
196
|
+
|
197
|
+
if not proposed_func_name:
|
198
|
+
print(f"[DASEIN][MICROTURN_DEBUG] No function call in response, skipping microturn")
|
199
|
+
else:
|
200
|
+
print(f"[DASEIN][MICROTURN_DEBUG] Found proposed function: {proposed_func_name}")
|
201
|
+
|
202
|
+
# Build execution state (BEFORE adding current call)
|
203
|
+
state_lines = []
|
204
|
+
if hasattr(self._callback_handler, '_function_calls_made') and self._callback_handler._function_calls_made:
|
205
|
+
for fname in sorted(self._callback_handler._function_calls_made.keys()):
|
206
|
+
count = len(self._callback_handler._function_calls_made[fname])
|
207
|
+
if count > 0:
|
208
|
+
state_lines.append(f" • {fname}: called {count}x")
|
209
|
+
|
210
|
+
state_context = "EXECUTION STATE:\n" + "\n".join(state_lines) if state_lines else "EXECUTION STATE: No calls yet"
|
211
|
+
|
212
|
+
microturn_prompt = f"""You are a rule enforcement system. Your job is to decide if a proposed action violates the rules.
|
213
|
+
|
214
|
+
HARD RULE: You MUST make at maximum a single summary call
|
215
|
+
|
216
|
+
{state_context}
|
217
|
+
|
218
|
+
PROPOSED ACTION: Call {proposed_func_name}
|
219
|
+
|
220
|
+
DECISION:
|
221
|
+
If this action violates the rule, respond with EXACTLY: BLOCK
|
222
|
+
If this action is allowed, respond with EXACTLY: PASS
|
223
|
+
|
224
|
+
Your response (BLOCK or PASS):"""
|
225
|
+
|
226
|
+
print(f"[DASEIN][MICROTURN_DEBUG] Calling microturn LLM...")
|
227
|
+
from langchain_core.messages import HumanMessage
|
228
|
+
messages_for_microturn = [HumanMessage(content=microturn_prompt)]
|
229
|
+
microturn_response = self._llm.invoke(messages_for_microturn)
|
230
|
+
|
231
|
+
if hasattr(microturn_response, 'content'):
|
232
|
+
decision = microturn_response.content.strip().upper()
|
233
|
+
else:
|
234
|
+
decision = str(microturn_response).strip().upper()
|
235
|
+
|
236
|
+
node_name = getattr(self._callback_handler, '_current_chain_node', 'agent')
|
237
|
+
print(f"[DASEIN][MICROTURN] Node: {node_name} | Proposed: {proposed_func_name} | Decision: {decision}")
|
238
|
+
|
239
|
+
if "BLOCK" in decision:
|
240
|
+
print(f"[DASEIN][MICROTURN] BLOCKING {proposed_func_name} call!")
|
241
|
+
# Modify the result to clear the function call
|
242
|
+
if hasattr(result, 'generations') and result.generations:
|
243
|
+
first_gen = result.generations[0]
|
244
|
+
if isinstance(first_gen, list) and len(first_gen) > 0:
|
245
|
+
generation = first_gen[0]
|
246
|
+
else:
|
247
|
+
generation = first_gen
|
248
|
+
|
249
|
+
if hasattr(generation, 'message'):
|
250
|
+
generation.message.additional_kwargs['function_call'] = {}
|
251
|
+
generation.message.content = ""
|
252
|
+
except Exception as e:
|
253
|
+
print(f"[DASEIN][MICROTURN] Error in microturn: {e}")
|
254
|
+
import traceback
|
255
|
+
traceback.print_exc()
|
256
|
+
|
166
257
|
# Trigger on_llm_end callback
|
167
258
|
if self._callback_handler:
|
168
259
|
self._callback_handler.on_llm_end(
|
@@ -217,6 +308,15 @@ class DaseinLLMWrapper(BaseChatModel):
|
|
217
308
|
except:
|
218
309
|
return "No result"
|
219
310
|
|
311
|
+
def invoke(self, messages, **kwargs):
|
312
|
+
"""Override invoke to intercept all LLM calls."""
|
313
|
+
print(f"[DASEIN][WRAPPER] invoke() called with {len(messages) if isinstance(messages, list) else 1} messages")
|
314
|
+
|
315
|
+
# Call the parent's invoke which will call our _generate
|
316
|
+
result = super().invoke(messages, **kwargs)
|
317
|
+
|
318
|
+
return result
|
319
|
+
|
220
320
|
def _llm_type(self):
|
221
321
|
return "dasein_llm_wrapper"
|
222
322
|
|
@@ -419,6 +519,14 @@ def cognate(agent, *, weights=None, verbose=False, retry=1, performance_tracking
|
|
419
519
|
Returns:
|
420
520
|
A proxy object with .run() and .invoke() methods
|
421
521
|
"""
|
522
|
+
# CRITICAL: Prevent double-wrapping in Jupyter/Colab when cell is rerun
|
523
|
+
# If agent is already a CognateProxy, unwrap it first to avoid nested retry loops
|
524
|
+
if isinstance(agent, CognateProxy):
|
525
|
+
print("[DASEIN][WARNING] Agent is already wrapped with cognate(). Unwrapping to prevent nested loops.")
|
526
|
+
print(f"[DASEIN][WARNING] Previous config: retry={agent._retry}, performance_tracking={agent._performance_tracking}")
|
527
|
+
print(f"[DASEIN][WARNING] New config: retry={retry}, performance_tracking={performance_tracking}")
|
528
|
+
agent = agent._agent # Unwrap to get original agent
|
529
|
+
|
422
530
|
global _global_cognate_proxy
|
423
531
|
_global_cognate_proxy = CognateProxy(agent, weights=weights, verbose=verbose, retry=retry, performance_tracking=performance_tracking, rule_trace=rule_trace, post_run=post_run, performance_tracking_id=performance_tracking_id, top_k=top_k)
|
424
532
|
return _global_cognate_proxy
|
@@ -728,7 +836,7 @@ class CognateProxy:
|
|
728
836
|
print(f"[DASEIN] Coordinator node: {coordinator_node}")
|
729
837
|
planning_nodes = self._identify_planning_nodes(agent, coordinator_node)
|
730
838
|
|
731
|
-
self._callback_handler = DaseinCallbackHandler(weights=weights, llm=None, is_langgraph=self._is_langgraph, coordinator_node=coordinator_node, planning_nodes=planning_nodes, verbose=verbose)
|
839
|
+
self._callback_handler = DaseinCallbackHandler(weights=weights, llm=None, is_langgraph=self._is_langgraph, coordinator_node=coordinator_node, planning_nodes=planning_nodes, verbose=verbose, agent=self._agent, extract_tools_fn=self._extract_tool_metadata)
|
732
840
|
self._langgraph_params = None
|
733
841
|
self._original_agent = agent # Keep reference to original
|
734
842
|
self._agent_was_recreated = False # Track if agent recreation succeeded
|
@@ -775,10 +883,24 @@ class CognateProxy:
|
|
775
883
|
|
776
884
|
# Wrap the agent's LLM with our trace capture wrapper
|
777
885
|
self._wrap_agent_llm()
|
886
|
+
|
887
|
+
# Inject universal dead-letter tool
|
888
|
+
self._inject_deadletter_tool()
|
778
889
|
|
779
890
|
def _vprint(self, message: str, force: bool = False):
|
780
891
|
"""Helper for verbose printing."""
|
781
892
|
_vprint(message, self._verbose, force)
|
893
|
+
|
894
|
+
def _format_final_outcome(self, outcome):
|
895
|
+
"""Format final outcome for display."""
|
896
|
+
if outcome == "completed":
|
897
|
+
return "✅ Task Completed"
|
898
|
+
elif outcome == "gave_up":
|
899
|
+
return "⚠️ Agent Gave Up"
|
900
|
+
elif outcome == "failed":
|
901
|
+
return "❌ Failed"
|
902
|
+
else:
|
903
|
+
return f"❓ {outcome}"
|
782
904
|
|
783
905
|
def _extract_query_from_input(self, input_data):
|
784
906
|
""" CRITICAL: Extract query string from various input formats."""
|
@@ -965,6 +1087,172 @@ class CognateProxy:
|
|
965
1087
|
self._vprint(f"[DASEIN][PLANNING_NODES] ERROR: {e}")
|
966
1088
|
return set()
|
967
1089
|
|
1090
|
+
def _extract_tool_metadata(self, agent):
|
1091
|
+
"""
|
1092
|
+
Extract tool metadata (name, description, args_schema) from agent.
|
1093
|
+
|
1094
|
+
CRITICAL: Extracts ALL available tools from the agent, not just tools used in trace.
|
1095
|
+
Why: If agent used wrong tool (e.g., extract_text instead of get_elements),
|
1096
|
+
the trace won't show the correct tool. Stage 3.5 needs to see all options
|
1097
|
+
to suggest better alternatives.
|
1098
|
+
|
1099
|
+
For multi-agent systems, preserves node→tool mapping so Stage 3.5 knows
|
1100
|
+
which tools are available in which nodes (critical for grounding).
|
1101
|
+
"""
|
1102
|
+
tools_metadata = []
|
1103
|
+
tools_to_process = [] # Format: (tool, node_name or None)
|
1104
|
+
|
1105
|
+
# Get ALL tools from agent (LangChain or LangGraph) - not filtered by trace usage
|
1106
|
+
tools_attr = getattr(agent, 'tools', None)
|
1107
|
+
if tools_attr:
|
1108
|
+
try:
|
1109
|
+
# Top-level tools have no node context
|
1110
|
+
tools_to_process = [(t, None) for t in list(tools_attr)]
|
1111
|
+
except Exception:
|
1112
|
+
pass
|
1113
|
+
elif getattr(agent, 'toolkit', None):
|
1114
|
+
tk = getattr(agent, 'toolkit')
|
1115
|
+
tk_tools = getattr(tk, 'tools', None) or getattr(tk, 'get_tools', None)
|
1116
|
+
try:
|
1117
|
+
# Toolkit tools have no node context
|
1118
|
+
tools_to_process = [(t, None) for t in list(tk_tools() if callable(tk_tools) else tk_tools or [])]
|
1119
|
+
except Exception:
|
1120
|
+
pass
|
1121
|
+
|
1122
|
+
# Also try LangGraph tools from compiled graph
|
1123
|
+
# For multi-agent systems, scan ALL nodes for tools (not just 'tools' node)
|
1124
|
+
# CRITICAL: Preserve node→tool mapping for proper grounding
|
1125
|
+
# CRITICAL: Use agent.get_graph().nodes (same as planning node discovery)
|
1126
|
+
# NOT agent.nodes which returns different objects without .data attribute
|
1127
|
+
if hasattr(agent, 'get_graph'):
|
1128
|
+
graph = agent.get_graph()
|
1129
|
+
nodes = graph.nodes
|
1130
|
+
for node_name, node_obj in nodes.items():
|
1131
|
+
if node_name.startswith('__'): # Skip __start__, __end__
|
1132
|
+
continue
|
1133
|
+
|
1134
|
+
# Check if this is a subgraph with child nodes (like research_supervisor)
|
1135
|
+
# CRITICAL: Use node_obj.data (compiled graph) not node_obj.node (implementation)
|
1136
|
+
if hasattr(node_obj, 'data') and hasattr(node_obj.data, 'nodes') and 'Compiled' in type(node_obj.data).__name__:
|
1137
|
+
try:
|
1138
|
+
subgraph = node_obj.data.get_graph()
|
1139
|
+
for sub_node_name, sub_node_obj in subgraph.nodes.items():
|
1140
|
+
if sub_node_name.startswith('__'):
|
1141
|
+
continue
|
1142
|
+
if hasattr(sub_node_obj, 'node'):
|
1143
|
+
sub_actual = sub_node_obj.node
|
1144
|
+
# Use fully qualified node name: parent.child
|
1145
|
+
full_node_name = f"{node_name}.{sub_node_name}"
|
1146
|
+
|
1147
|
+
# Check all tool patterns in subgraph children
|
1148
|
+
if hasattr(sub_actual, 'tools_by_name'):
|
1149
|
+
tools_to_process.extend([(t, full_node_name) for t in sub_actual.tools_by_name.values()])
|
1150
|
+
if hasattr(sub_actual, 'runnable') and hasattr(sub_actual.runnable, 'tools'):
|
1151
|
+
sub_tools = sub_actual.runnable.tools
|
1152
|
+
if callable(sub_tools):
|
1153
|
+
try:
|
1154
|
+
sub_tools = sub_tools()
|
1155
|
+
except:
|
1156
|
+
pass
|
1157
|
+
if isinstance(sub_tools, list):
|
1158
|
+
tools_to_process.extend([(t, full_node_name) for t in sub_tools])
|
1159
|
+
print(f" [DASEIN][EXTRACT] Found {len(sub_tools)} tools in {full_node_name}.runnable.tools")
|
1160
|
+
else:
|
1161
|
+
tools_to_process.append((sub_tools, full_node_name))
|
1162
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {full_node_name}.runnable.tools")
|
1163
|
+
except Exception as e:
|
1164
|
+
print(f" [DASEIN][EXTRACT] Failed to analyze subgraph: {e}")
|
1165
|
+
|
1166
|
+
# Check if node has steps with tools
|
1167
|
+
if hasattr(node_obj, 'node'):
|
1168
|
+
actual_node = node_obj.node
|
1169
|
+
|
1170
|
+
# Check for tools_by_name (common in agent nodes)
|
1171
|
+
if hasattr(actual_node, 'tools_by_name'):
|
1172
|
+
node_tools = actual_node.tools_by_name.values()
|
1173
|
+
tools_to_process.extend([(t, node_name) for t in node_tools])
|
1174
|
+
print(f" [DASEIN][EXTRACT] Found {len(node_tools)} tools in {node_name}.tools_by_name")
|
1175
|
+
|
1176
|
+
# Check for runnable.tools (dynamic tools like ConductResearch)
|
1177
|
+
if hasattr(actual_node, 'runnable') and hasattr(actual_node.runnable, 'tools'):
|
1178
|
+
runnable_tools = actual_node.runnable.tools
|
1179
|
+
if callable(runnable_tools):
|
1180
|
+
try:
|
1181
|
+
runnable_tools = runnable_tools()
|
1182
|
+
except:
|
1183
|
+
pass
|
1184
|
+
if isinstance(runnable_tools, list):
|
1185
|
+
tools_to_process.extend([(t, node_name) for t in runnable_tools])
|
1186
|
+
print(f" [DASEIN][EXTRACT] Found {len(runnable_tools)} tools in {node_name}.runnable.tools")
|
1187
|
+
else:
|
1188
|
+
tools_to_process.append((runnable_tools, node_name))
|
1189
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.runnable.tools")
|
1190
|
+
|
1191
|
+
# Check for bound.tools (another common pattern)
|
1192
|
+
if hasattr(actual_node, 'bound') and hasattr(actual_node.bound, 'tools'):
|
1193
|
+
bound_tools = actual_node.bound.tools
|
1194
|
+
if isinstance(bound_tools, list):
|
1195
|
+
tools_to_process.extend([(t, node_name) for t in bound_tools])
|
1196
|
+
print(f" [DASEIN][EXTRACT] Found {len(bound_tools)} tools in {node_name}.bound.tools")
|
1197
|
+
else:
|
1198
|
+
tools_to_process.append((bound_tools, node_name))
|
1199
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.bound.tools")
|
1200
|
+
|
1201
|
+
# Check for steps (legacy pattern)
|
1202
|
+
if hasattr(actual_node, 'steps'):
|
1203
|
+
for step in actual_node.steps:
|
1204
|
+
if hasattr(step, 'tools_by_name'):
|
1205
|
+
step_tools = step.tools_by_name.values()
|
1206
|
+
tools_to_process.extend([(t, node_name) for t in step_tools])
|
1207
|
+
print(f" [DASEIN][EXTRACT] Found {len(step_tools)} tools in {node_name}.steps")
|
1208
|
+
break
|
1209
|
+
|
1210
|
+
# Extract metadata from each tool (with node context for multi-agent)
|
1211
|
+
for tool_tuple in tools_to_process:
|
1212
|
+
try:
|
1213
|
+
# Unpack (tool, node_name)
|
1214
|
+
if isinstance(tool_tuple, tuple) and len(tool_tuple) == 2:
|
1215
|
+
tool, node_name = tool_tuple
|
1216
|
+
else:
|
1217
|
+
tool = tool_tuple
|
1218
|
+
node_name = None
|
1219
|
+
|
1220
|
+
# Unwrap DaseinToolWrapper to get complete metadata (especially args_schema)
|
1221
|
+
if hasattr(tool, 'original_tool'):
|
1222
|
+
tool = tool.original_tool
|
1223
|
+
|
1224
|
+
tool_meta = {
|
1225
|
+
'name': getattr(tool, 'name', str(tool.__class__.__name__)),
|
1226
|
+
'description': getattr(tool, 'description', ''),
|
1227
|
+
}
|
1228
|
+
|
1229
|
+
# CRITICAL: Add node context for multi-agent systems (for grounding)
|
1230
|
+
if node_name:
|
1231
|
+
tool_meta['node'] = node_name
|
1232
|
+
|
1233
|
+
# Extract args_schema if available
|
1234
|
+
if hasattr(tool, 'args_schema') and tool.args_schema:
|
1235
|
+
try:
|
1236
|
+
# Try Pydantic v2 method
|
1237
|
+
if hasattr(tool.args_schema, 'model_json_schema'):
|
1238
|
+
tool_meta['args_schema'] = tool.args_schema.model_json_schema()
|
1239
|
+
# Fallback to Pydantic v1 method
|
1240
|
+
elif hasattr(tool.args_schema, 'schema'):
|
1241
|
+
tool_meta['args_schema'] = tool.args_schema.schema()
|
1242
|
+
else:
|
1243
|
+
tool_meta['args_schema'] = {}
|
1244
|
+
except Exception:
|
1245
|
+
tool_meta['args_schema'] = {}
|
1246
|
+
else:
|
1247
|
+
tool_meta['args_schema'] = {}
|
1248
|
+
|
1249
|
+
tools_metadata.append(tool_meta)
|
1250
|
+
except Exception as e:
|
1251
|
+
# Skip tools that fail to extract
|
1252
|
+
pass
|
1253
|
+
|
1254
|
+
return tools_metadata
|
1255
|
+
|
968
1256
|
def _extract_langgraph_params(self, agent):
|
969
1257
|
""" CRITICAL: Extract LangGraph agent creation parameters for recreation."""
|
970
1258
|
try:
|
@@ -979,24 +1267,131 @@ class CognateProxy:
|
|
979
1267
|
return None
|
980
1268
|
|
981
1269
|
# Try to extract tools from the compiled graph
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
1270
|
+
# CRITICAL: For multi-agent, scan ALL nodes (not just 'tools' node)
|
1271
|
+
tools = []
|
1272
|
+
# CRITICAL: Use agent.get_graph().nodes (same as planning node discovery)
|
1273
|
+
# NOT agent.nodes which returns different objects without .data attribute
|
1274
|
+
if hasattr(agent, 'get_graph'):
|
1275
|
+
graph = agent.get_graph()
|
1276
|
+
nodes = graph.nodes
|
1277
|
+
print(f" [DASEIN][EXTRACT] Scanning {len(nodes)} LangGraph nodes for tools...")
|
1278
|
+
for node_name, node_obj in nodes.items():
|
1279
|
+
if node_name.startswith('__'): # Skip __start__, __end__
|
1280
|
+
continue
|
1281
|
+
|
1282
|
+
print(f" [DASEIN][EXTRACT] Checking node: {node_name}")
|
1283
|
+
|
1284
|
+
# Check if this is a subgraph with child nodes (like research_supervisor)
|
1285
|
+
# CRITICAL: Use node_obj.data (compiled graph) not node_obj.node (implementation)
|
1286
|
+
if hasattr(node_obj, 'data') and hasattr(node_obj.data, 'nodes') and 'Compiled' in type(node_obj.data).__name__:
|
1287
|
+
try:
|
1288
|
+
subgraph = node_obj.data.get_graph()
|
1289
|
+
print(f" [DASEIN][EXTRACT] {node_name} is a subgraph with {len(subgraph.nodes)} child nodes")
|
1290
|
+
for sub_node_name, sub_node_obj in subgraph.nodes.items():
|
1291
|
+
if sub_node_name.startswith('__'):
|
1292
|
+
continue
|
1293
|
+
print(f" [DASEIN][EXTRACT] Checking subgraph child: {sub_node_name}")
|
1294
|
+
if hasattr(sub_node_obj, 'node'):
|
1295
|
+
sub_actual = sub_node_obj.node
|
1296
|
+
|
1297
|
+
# Debug: print what attributes this node has
|
1298
|
+
attrs = [a for a in dir(sub_actual) if not a.startswith('_')]
|
1299
|
+
print(f" [DASEIN][EXTRACT] Node attributes: {', '.join(attrs[:10])}...")
|
1300
|
+
|
1301
|
+
# Check all tool patterns in subgraph children
|
1302
|
+
if hasattr(sub_actual, 'tools_by_name'):
|
1303
|
+
for tool_name, tool in sub_actual.tools_by_name.items():
|
1304
|
+
if hasattr(tool, 'original_tool'):
|
1305
|
+
tools.append(tool.original_tool)
|
1306
|
+
else:
|
1307
|
+
tools.append(tool)
|
1308
|
+
print(f" [DASEIN][EXTRACT] Found {len(sub_actual.tools_by_name)} tools in {node_name}.{sub_node_name}.tools_by_name")
|
1309
|
+
if hasattr(sub_actual, 'runnable') and hasattr(sub_actual.runnable, 'tools'):
|
1310
|
+
sub_tools = sub_actual.runnable.tools
|
1311
|
+
if callable(sub_tools):
|
1312
|
+
try:
|
1313
|
+
sub_tools = sub_tools()
|
1314
|
+
except:
|
1315
|
+
pass
|
1316
|
+
if isinstance(sub_tools, list):
|
1317
|
+
tools.extend(sub_tools)
|
1318
|
+
print(f" [DASEIN][EXTRACT] Found {len(sub_tools)} tools in {node_name}.{sub_node_name}.runnable.tools")
|
1319
|
+
else:
|
1320
|
+
tools.append(sub_tools)
|
1321
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.{sub_node_name}.runnable.tools")
|
1322
|
+
|
1323
|
+
# Also check if sub_actual IS a callable with tools (another pattern)
|
1324
|
+
if callable(sub_actual) and hasattr(sub_actual, 'tools'):
|
1325
|
+
direct_tools = sub_actual.tools
|
1326
|
+
if callable(direct_tools):
|
1327
|
+
try:
|
1328
|
+
direct_tools = direct_tools()
|
1329
|
+
except:
|
1330
|
+
pass
|
1331
|
+
if isinstance(direct_tools, list):
|
1332
|
+
tools.extend(direct_tools)
|
1333
|
+
print(f" [DASEIN][EXTRACT] Found {len(direct_tools)} tools in {node_name}.{sub_node_name} (direct)")
|
1334
|
+
elif direct_tools:
|
1335
|
+
tools.append(direct_tools)
|
1336
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.{sub_node_name} (direct)")
|
1337
|
+
except Exception as e:
|
1338
|
+
print(f" [DASEIN][EXTRACT] Failed to analyze subgraph: {e}")
|
1339
|
+
|
1340
|
+
# Check if node has tools
|
1341
|
+
if hasattr(node_obj, 'node'):
|
1342
|
+
actual_node = node_obj.node
|
1343
|
+
|
1344
|
+
# Check for tools_by_name (common in agent nodes)
|
1345
|
+
if hasattr(actual_node, 'tools_by_name'):
|
1346
|
+
for tool_name, tool in actual_node.tools_by_name.items():
|
990
1347
|
# If it's our wrapped tool, get the original
|
991
1348
|
if hasattr(tool, 'original_tool'):
|
992
1349
|
tools.append(tool.original_tool)
|
993
1350
|
else:
|
994
1351
|
tools.append(tool)
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1352
|
+
print(f" [DASEIN][EXTRACT] Found {len(actual_node.tools_by_name)} tools in {node_name}.tools_by_name")
|
1353
|
+
|
1354
|
+
# Check for runnable.tools (dynamic tools like ConductResearch)
|
1355
|
+
if hasattr(actual_node, 'runnable') and hasattr(actual_node.runnable, 'tools'):
|
1356
|
+
runnable_tools = actual_node.runnable.tools
|
1357
|
+
if callable(runnable_tools):
|
1358
|
+
try:
|
1359
|
+
runnable_tools = runnable_tools()
|
1360
|
+
except:
|
1361
|
+
pass
|
1362
|
+
if isinstance(runnable_tools, list):
|
1363
|
+
tools.extend(runnable_tools)
|
1364
|
+
print(f" [DASEIN][EXTRACT] Found {len(runnable_tools)} tools in {node_name}.runnable.tools")
|
1365
|
+
else:
|
1366
|
+
tools.append(runnable_tools)
|
1367
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.runnable.tools")
|
1368
|
+
|
1369
|
+
# Check for bound.tools (another common pattern)
|
1370
|
+
if hasattr(actual_node, 'bound') and hasattr(actual_node.bound, 'tools'):
|
1371
|
+
bound_tools = actual_node.bound.tools
|
1372
|
+
if isinstance(bound_tools, list):
|
1373
|
+
tools.extend(bound_tools)
|
1374
|
+
print(f" [DASEIN][EXTRACT] Found {len(bound_tools)} tools in {node_name}.bound.tools")
|
1375
|
+
else:
|
1376
|
+
tools.append(bound_tools)
|
1377
|
+
print(f" [DASEIN][EXTRACT] Found 1 tool in {node_name}.bound.tools")
|
1378
|
+
|
1379
|
+
# Check for steps (legacy pattern)
|
1380
|
+
if hasattr(actual_node, 'steps'):
|
1381
|
+
for step in actual_node.steps:
|
1382
|
+
if hasattr(step, 'tools_by_name'):
|
1383
|
+
for tool_name, tool in step.tools_by_name.items():
|
1384
|
+
if hasattr(tool, 'original_tool'):
|
1385
|
+
tools.append(tool.original_tool)
|
1386
|
+
else:
|
1387
|
+
tools.append(tool)
|
1388
|
+
print(f" [DASEIN][EXTRACT] Found {len(step.tools_by_name)} tools in {node_name}.steps")
|
1389
|
+
break
|
1390
|
+
|
1391
|
+
if tools:
|
1392
|
+
params['tools'] = tools
|
1393
|
+
print(f" [DASEIN][EXTRACT] Total: {len(tools)} tools extracted")
|
1394
|
+
else:
|
1000
1395
|
print(f" [DASEIN][EXTRACT] No tools found in agent")
|
1001
1396
|
return None
|
1002
1397
|
|
@@ -1189,23 +1584,663 @@ Follow these rules when planning your actions."""
|
|
1189
1584
|
traceback.print_exc()
|
1190
1585
|
return False
|
1191
1586
|
|
1587
|
+
@staticmethod
|
1588
|
+
def _create_deadletter_tool():
|
1589
|
+
"""Create the universal dead-letter tool for blocked calls.
|
1590
|
+
|
1591
|
+
This tool acts as a sink for calls blocked by anti-fanout rules.
|
1592
|
+
It returns instantly with structured metadata, allowing nodes to complete normally.
|
1593
|
+
"""
|
1594
|
+
def dasein_deadletter(
|
1595
|
+
original_tool: str,
|
1596
|
+
original_args_fingerprint: str,
|
1597
|
+
reason_code: str,
|
1598
|
+
policy_trace_id: str,
|
1599
|
+
tokens_saved_estimate: int = 0,
|
1600
|
+
cached_result: Any = None
|
1601
|
+
) -> Any:
|
1602
|
+
"""Universal dead-letter tool for blocked policy calls.
|
1603
|
+
|
1604
|
+
**INTERNAL USE ONLY - DO NOT CALL DIRECTLY**
|
1605
|
+
|
1606
|
+
This tool is automatically invoked when Dasein blocks a call for policy reasons
|
1607
|
+
(e.g., anti-fanout rules). Supports transparent deduplication by returning
|
1608
|
+
cached results from previous identical calls.
|
1609
|
+
|
1610
|
+
Args:
|
1611
|
+
original_tool: Name of the tool that was blocked
|
1612
|
+
original_args_fingerprint: Hash/summary of original arguments
|
1613
|
+
reason_code: Why the call was blocked (e.g., "duplicate_detected")
|
1614
|
+
policy_trace_id: Trace ID for the rule that caused the block
|
1615
|
+
tokens_saved_estimate: Estimated tokens saved by blocking this call
|
1616
|
+
cached_result: If provided, return this (transparent deduplication)
|
1617
|
+
|
1618
|
+
Returns:
|
1619
|
+
Either cached_result (transparent) or structured error dict (explicit block)
|
1620
|
+
"""
|
1621
|
+
import time
|
1622
|
+
|
1623
|
+
if cached_result is not None:
|
1624
|
+
# Transparent deduplication - return the original result seamlessly
|
1625
|
+
print(f"[DASEIN][DEADLETTER] 🔄 Transparent dedup: {original_tool} (returning cached result, {tokens_saved_estimate} tokens saved)")
|
1626
|
+
return cached_result
|
1627
|
+
else:
|
1628
|
+
# Explicit block - return error structure
|
1629
|
+
result = {
|
1630
|
+
"blocked_by_policy": True,
|
1631
|
+
"original_tool": original_tool,
|
1632
|
+
"original_args_fingerprint": original_args_fingerprint,
|
1633
|
+
"reason_code": reason_code,
|
1634
|
+
"policy_trace_id": policy_trace_id,
|
1635
|
+
"tokens_saved_estimate": tokens_saved_estimate,
|
1636
|
+
"timestamp": time.time(),
|
1637
|
+
"message": f"Call to {original_tool} was blocked by Dasein policy: {reason_code}"
|
1638
|
+
}
|
1639
|
+
print(f"[DASEIN][DEADLETTER] 🚫 Blocked {original_tool}: {reason_code} (est. {tokens_saved_estimate} tokens saved)")
|
1640
|
+
return result
|
1641
|
+
|
1642
|
+
return dasein_deadletter
|
1643
|
+
|
1644
|
+
def _inject_deadletter_tool(self):
|
1645
|
+
"""Inject the dead-letter tool into the agent's tool registry.
|
1646
|
+
|
1647
|
+
The tool is added to the executor but hidden from the LLM's view by marking it internal.
|
1648
|
+
"""
|
1649
|
+
try:
|
1650
|
+
deadletter_fn = self._create_deadletter_tool()
|
1651
|
+
|
1652
|
+
# Convert to LangChain Tool
|
1653
|
+
from langchain.tools import Tool
|
1654
|
+
deadletter_tool = Tool(
|
1655
|
+
name="dasein_deadletter",
|
1656
|
+
description="**INTERNAL USE ONLY - DO NOT CALL DIRECTLY**\nThis tool is automatically invoked when Dasein blocks a call for policy reasons.",
|
1657
|
+
func=deadletter_fn
|
1658
|
+
)
|
1659
|
+
|
1660
|
+
# For LangGraph agents: Add to tools list in langgraph_params
|
1661
|
+
if self._is_langgraph and self._langgraph_params and 'tools' in self._langgraph_params:
|
1662
|
+
self._langgraph_params['tools'].append(deadletter_tool)
|
1663
|
+
print(f"[DASEIN][DEADLETTER] Injected dead-letter tool into LangGraph params")
|
1664
|
+
|
1665
|
+
# For LangChain agents: Add to agent's tools attribute if accessible
|
1666
|
+
elif hasattr(self._agent, 'tools'):
|
1667
|
+
if isinstance(self._agent.tools, list):
|
1668
|
+
self._agent.tools.append(deadletter_tool)
|
1669
|
+
print(f"[DASEIN][DEADLETTER] Injected dead-letter tool into LangChain agent")
|
1670
|
+
|
1671
|
+
# Store reference for later use
|
1672
|
+
self._deadletter_tool = deadletter_tool
|
1673
|
+
self._deadletter_fn = deadletter_fn
|
1674
|
+
|
1675
|
+
except Exception as e:
|
1676
|
+
print(f"[DASEIN][DEADLETTER] Failed to inject dead-letter tool: {e}")
|
1677
|
+
import traceback
|
1678
|
+
traceback.print_exc()
|
1679
|
+
self._deadletter_tool = None
|
1680
|
+
self._deadletter_fn = None
|
1681
|
+
|
1192
1682
|
def _wrap_agent_llm(self):
|
1193
|
-
"""
|
1683
|
+
"""Monkey-patch ALL LLM classes found in agent + tools."""
|
1194
1684
|
try:
|
1195
|
-
#
|
1196
|
-
|
1197
|
-
|
1198
|
-
wrapped_llm = DaseinLLMWrapper(llm, self._callback_handler)
|
1199
|
-
# Replace the original LLM with our wrapped version
|
1200
|
-
self._replace_llm_in_structure(self._agent, llm, wrapped_llm, max_depth=5)
|
1201
|
-
self._wrapped_llm = wrapped_llm
|
1202
|
-
self._vprint(f"[DASEIN][WRAPPER] Successfully wrapped {type(llm).__name__} LLM")
|
1203
|
-
return
|
1685
|
+
# Find ALL LLMs in agent structure + tools
|
1686
|
+
print(f"[DASEIN][WRAPPER] Searching for ALL LLMs in agent+tools...")
|
1687
|
+
all_llms = []
|
1204
1688
|
|
1205
|
-
|
1206
|
-
self.
|
1689
|
+
# 1. Search in agent
|
1690
|
+
agent_llm = self._find_llm_recursively(self._agent, max_depth=5)
|
1691
|
+
if agent_llm:
|
1692
|
+
all_llms.append(('agent', agent_llm))
|
1693
|
+
|
1694
|
+
# 2. Search in tools (where Summary LLM lives!)
|
1695
|
+
if hasattr(self._agent, 'tools'):
|
1696
|
+
for i, tool in enumerate(self._agent.tools or []):
|
1697
|
+
tool_llm = self._find_llm_recursively(tool, max_depth=3, path=f"tools[{i}]")
|
1698
|
+
if tool_llm:
|
1699
|
+
all_llms.append((f'tool_{i}_{getattr(tool, "name", "unknown")}', tool_llm))
|
1700
|
+
|
1701
|
+
print(f"[DASEIN][WRAPPER] Found {len(all_llms)} LLM(s)")
|
1702
|
+
for location, llm in all_llms:
|
1703
|
+
print(f"[DASEIN][WRAPPER] - {location}: {type(llm).__name__}")
|
1704
|
+
|
1705
|
+
# Patch all unique LLM classes
|
1706
|
+
patched_classes = set()
|
1707
|
+
for location, llm in all_llms:
|
1708
|
+
llm_class = type(llm)
|
1709
|
+
if llm_class in patched_classes:
|
1710
|
+
print(f"[DASEIN][WRAPPER] {llm_class.__name__} already patched for {location}, skipping")
|
1711
|
+
continue
|
1712
|
+
|
1713
|
+
print(f"[DASEIN][WRAPPER] Patching {llm_class.__name__} (found in {location})...")
|
1714
|
+
|
1715
|
+
# Check what methods the LLM class has
|
1716
|
+
# Only patch TOP-LEVEL methods to avoid double-deduplication from internal calls
|
1717
|
+
print(f"[DASEIN][WRAPPER] Checking LLM methods...")
|
1718
|
+
methods_to_patch = []
|
1719
|
+
for method in ['invoke', 'ainvoke']: # Only patch user-facing methods, not internal _generate
|
1720
|
+
if hasattr(llm_class, method):
|
1721
|
+
print(f"[DASEIN][WRAPPER] - Has {method}")
|
1722
|
+
methods_to_patch.append(method)
|
1723
|
+
|
1724
|
+
if not methods_to_patch:
|
1725
|
+
print(f"[DASEIN][WRAPPER] No methods to patch found!")
|
1726
|
+
return
|
1727
|
+
|
1728
|
+
# Check if we already patched this class
|
1729
|
+
first_method = getattr(llm_class, methods_to_patch[0])
|
1730
|
+
if hasattr(first_method, '_dasein_patched'):
|
1731
|
+
print(f"[DASEIN][WRAPPER] {llm_class.__name__} already patched, skipping")
|
1732
|
+
return
|
1733
|
+
|
1734
|
+
callback_handler = self._callback_handler
|
1735
|
+
|
1736
|
+
# Thread-local to track depth and max depth reached
|
1737
|
+
import threading
|
1738
|
+
_patch_depth = threading.local()
|
1739
|
+
|
1740
|
+
def get_max_depth():
|
1741
|
+
return getattr(_patch_depth, 'max_depth', 0)
|
1742
|
+
|
1743
|
+
def set_max_depth(val):
|
1744
|
+
_patch_depth.max_depth = val
|
1745
|
+
|
1746
|
+
def is_in_microturn():
|
1747
|
+
return getattr(_patch_depth, 'in_microturn', False)
|
1748
|
+
|
1749
|
+
def set_in_microturn(val):
|
1750
|
+
_patch_depth.in_microturn = val
|
1751
|
+
|
1752
|
+
# Thread-local state tracking for Summary calls (mirrors callback pattern)
|
1753
|
+
def get_summary_calls_made():
|
1754
|
+
"""Get count of Summary calls made in this run."""
|
1755
|
+
return getattr(_patch_depth, 'summary_calls_made', 0)
|
1756
|
+
|
1757
|
+
def increment_summary_calls():
|
1758
|
+
"""Increment Summary call counter."""
|
1759
|
+
current = getattr(_patch_depth, 'summary_calls_made', 0)
|
1760
|
+
_patch_depth.summary_calls_made = current + 1
|
1761
|
+
return _patch_depth.summary_calls_made
|
1762
|
+
|
1763
|
+
# Patch ALL methods (silent)
|
1764
|
+
for method_name in methods_to_patch:
|
1765
|
+
original_method = getattr(llm_class, method_name)
|
1766
|
+
is_async = 'a' in method_name and (method_name.startswith('a') or method_name.startswith('_a'))
|
1767
|
+
|
1768
|
+
# Use a factory function to properly capture the closure variables
|
1769
|
+
def make_patched_method(orig_method, meth_name, is_async_method, depth_tracker, max_depth_getter, max_depth_setter, in_microturn_getter, in_microturn_setter, get_summary_calls, increment_summary):
|
1770
|
+
if is_async_method:
|
1771
|
+
async def patched_method(self_llm, *args, **kwargs):
|
1772
|
+
# Track depth to find the leaf method
|
1773
|
+
depth = getattr(depth_tracker, 'value', 0)
|
1774
|
+
is_entry_point = (depth == 0)
|
1775
|
+
depth_tracker.value = depth + 1
|
1776
|
+
current_depth = depth_tracker.value
|
1777
|
+
|
1778
|
+
# Track max depth reached (silent)
|
1779
|
+
if is_entry_point:
|
1780
|
+
max_depth_setter(current_depth)
|
1781
|
+
else:
|
1782
|
+
if current_depth > max_depth_getter():
|
1783
|
+
max_depth_setter(current_depth)
|
1784
|
+
|
1785
|
+
# 🔥 PIPECLEANER DEDUPLICATION (only patching top-level methods, always apply)
|
1786
|
+
# Skip depth checks - they don't work with async/parallel execution
|
1787
|
+
if callback_handler:
|
1788
|
+
try:
|
1789
|
+
# Extract messages from args based on method signature
|
1790
|
+
messages_to_dedupe = None
|
1791
|
+
arg_index = 0
|
1792
|
+
|
1793
|
+
if meth_name in ['invoke', 'ainvoke']:
|
1794
|
+
# First arg is 'input' (can be string, list, or PromptValue)
|
1795
|
+
messages_to_dedupe = args[0] if args else kwargs.get('input', kwargs.get('messages'))
|
1796
|
+
arg_index = 0
|
1797
|
+
elif meth_name in ['_generate', '_agenerate']:
|
1798
|
+
# First arg is 'messages' (list of BaseMessage)
|
1799
|
+
messages_to_dedupe = args[0] if args else kwargs.get('messages')
|
1800
|
+
arg_index = 0
|
1801
|
+
elif meth_name in ['generate', 'agenerate']:
|
1802
|
+
# First arg is 'prompts' (list of message lists)
|
1803
|
+
messages_to_dedupe = args[0] if args else kwargs.get('prompts')
|
1804
|
+
arg_index = 0
|
1805
|
+
|
1806
|
+
# Convert to strings for deduplication
|
1807
|
+
if messages_to_dedupe:
|
1808
|
+
prompt_strings = []
|
1809
|
+
for msg in (messages_to_dedupe if isinstance(messages_to_dedupe, list) else [messages_to_dedupe]):
|
1810
|
+
if hasattr(msg, 'content'):
|
1811
|
+
prompt_strings.append(msg.content)
|
1812
|
+
elif isinstance(msg, str):
|
1813
|
+
prompt_strings.append(msg)
|
1814
|
+
else:
|
1815
|
+
prompt_strings.append(str(msg))
|
1816
|
+
|
1817
|
+
# =============================================================
|
1818
|
+
# HOTPATH DEBUGGING (commented out for production)
|
1819
|
+
# =============================================================
|
1820
|
+
# print(f"\n{'='*70}")
|
1821
|
+
# print(f"[🔥 HOTPATH FULL DEBUG] {meth_name}() call")
|
1822
|
+
# print(f"{'='*70}")
|
1823
|
+
#
|
1824
|
+
# # 1. Callback state
|
1825
|
+
# current_node = getattr(callback_handler, '_current_chain_node', None)
|
1826
|
+
# current_tool = getattr(callback_handler, '_current_tool_name', None)
|
1827
|
+
# print(f"[🔥] Current node: {current_node}")
|
1828
|
+
# print(f"[🔥] Current tool: {current_tool}")
|
1829
|
+
#
|
1830
|
+
# # 2. Tools in this call
|
1831
|
+
# tools_in_call = []
|
1832
|
+
# if 'invocation_params' in kwargs:
|
1833
|
+
# tools = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions') or []
|
1834
|
+
# tools_in_call = [t.get('name', t.get('function', {}).get('name', '?')) for t in tools]
|
1835
|
+
# elif 'tools' in kwargs:
|
1836
|
+
# tools_in_call = [t.get('name', '?') for t in kwargs.get('tools', [])]
|
1837
|
+
# elif 'functions' in kwargs:
|
1838
|
+
# tools_in_call = [t.get('name', '?') for t in kwargs.get('functions', [])]
|
1839
|
+
# print(f"[🔥] Tools in call: {tools_in_call if tools_in_call else 'NONE'}")
|
1840
|
+
#
|
1841
|
+
# # 3. Prompt characteristics
|
1842
|
+
# prompt_lens = [len(s) for s in prompt_strings]
|
1843
|
+
# print(f"[🔥] Prompt count: {len(prompt_strings)}")
|
1844
|
+
# print(f"[🔥] Prompt lengths: {prompt_lens}")
|
1845
|
+
#
|
1846
|
+
# # 4. Kwargs keys (for debugging)
|
1847
|
+
# print(f"[🔥] Kwargs keys: {list(kwargs.keys())}")
|
1848
|
+
#
|
1849
|
+
# # 5. Messages structure
|
1850
|
+
# if messages_to_dedupe:
|
1851
|
+
# if isinstance(messages_to_dedupe, list):
|
1852
|
+
# msg_types = [type(m).__name__ for m in messages_to_dedupe[:3]]
|
1853
|
+
# print(f"[🔥] Message types (first 3): {msg_types}")
|
1854
|
+
# else:
|
1855
|
+
# print(f"[🔥] Messages type: {type(messages_to_dedupe).__name__}")
|
1856
|
+
#
|
1857
|
+
# print(f"{'='*70}\n")
|
1858
|
+
#
|
1859
|
+
# # Show first 200 chars to see the fingerprint
|
1860
|
+
# if prompt_strings:
|
1861
|
+
# first_200 = prompt_strings[0][:200] if len(prompt_strings[0]) > 200 else prompt_strings[0]
|
1862
|
+
# print(f"[🔥] Prompt start (200 chars): {first_200}")
|
1863
|
+
|
1864
|
+
# =============================================================
|
1865
|
+
# Extract tools from LLM call kwargs (for filter_search rules)
|
1866
|
+
# =============================================================
|
1867
|
+
tools_in_this_call = []
|
1868
|
+
|
1869
|
+
# Extract tool names from kwargs (handles multiple LLM providers' formats)
|
1870
|
+
# Pattern 1: invocation_params (some providers)
|
1871
|
+
if 'invocation_params' in kwargs:
|
1872
|
+
inv_params = kwargs['invocation_params']
|
1873
|
+
tools_param = inv_params.get('tools') or inv_params.get('functions') or []
|
1874
|
+
for t in tools_param:
|
1875
|
+
if isinstance(t, dict):
|
1876
|
+
# Try: t['name'] or t['function']['name']
|
1877
|
+
name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
|
1878
|
+
if name:
|
1879
|
+
tools_in_this_call.append(name)
|
1880
|
+
# Pattern 2: Direct 'tools' key (common)
|
1881
|
+
elif 'tools' in kwargs:
|
1882
|
+
tools_param = kwargs.get('tools', [])
|
1883
|
+
for t in tools_param:
|
1884
|
+
if isinstance(t, dict):
|
1885
|
+
# Try: t['name'] or t['function']['name']
|
1886
|
+
name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
|
1887
|
+
if name:
|
1888
|
+
tools_in_this_call.append(name)
|
1889
|
+
# Pattern 3: 'functions' key (OpenAI function calling)
|
1890
|
+
elif 'functions' in kwargs:
|
1891
|
+
funcs_param = kwargs.get('functions', [])
|
1892
|
+
for t in funcs_param:
|
1893
|
+
if isinstance(t, dict):
|
1894
|
+
name = t.get('name')
|
1895
|
+
if name:
|
1896
|
+
tools_in_this_call.append(name)
|
1897
|
+
|
1898
|
+
# Check if any filter_search rules match the tools in this LLM call
|
1899
|
+
from .pipecleaner import _find_filter_search_rules
|
1900
|
+
filter_rules = None
|
1901
|
+
should_dedupe = False
|
1902
|
+
|
1903
|
+
if hasattr(callback_handler, '_selected_rules') and prompt_strings and tools_in_this_call:
|
1904
|
+
# Get all filter_search rules (they specify which tools to target via references.tools)
|
1905
|
+
filter_rules = _find_filter_search_rules('*', callback_handler._selected_rules)
|
1906
|
+
|
1907
|
+
# Check if any tool in this call matches rule's target tools
|
1908
|
+
if filter_rules:
|
1909
|
+
for rule in filter_rules:
|
1910
|
+
# Handle both dict and object formats
|
1911
|
+
if isinstance(rule, dict):
|
1912
|
+
references = rule.get('references', {})
|
1913
|
+
rule_tools = references.get('tools', []) if isinstance(references, dict) else []
|
1914
|
+
else:
|
1915
|
+
references = getattr(rule, 'references', None)
|
1916
|
+
# references might be a dict or object, handle both
|
1917
|
+
if isinstance(references, dict):
|
1918
|
+
rule_tools = references.get('tools', [])
|
1919
|
+
elif references:
|
1920
|
+
rule_tools = getattr(references, 'tools', [])
|
1921
|
+
else:
|
1922
|
+
rule_tools = []
|
1923
|
+
|
1924
|
+
for tool_in_call in tools_in_this_call:
|
1925
|
+
if tool_in_call.lower() in [rt.lower() for rt in rule_tools]:
|
1926
|
+
should_dedupe = True
|
1927
|
+
break
|
1928
|
+
if should_dedupe:
|
1929
|
+
break
|
1930
|
+
|
1931
|
+
if should_dedupe:
|
1932
|
+
# Deduplicate each prompt
|
1933
|
+
from .pipecleaner import get_or_create_corpus
|
1934
|
+
import hashlib
|
1935
|
+
corpus = get_or_create_corpus(callback_handler.run_id, verbose=callback_handler._verbose)
|
1936
|
+
|
1937
|
+
deduplicated_strings = []
|
1938
|
+
for i, prompt_str in enumerate(prompt_strings):
|
1939
|
+
if len(prompt_str) < 2500:
|
1940
|
+
deduplicated_strings.append(prompt_str)
|
1941
|
+
continue
|
1942
|
+
|
1943
|
+
# Split system/content like in callback
|
1944
|
+
system_part = prompt_str[:2000]
|
1945
|
+
content_part = prompt_str[2000:]
|
1946
|
+
prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
|
1947
|
+
|
1948
|
+
# Deduplicate (ASYNC - allows parallel Summary calls to batch together)
|
1949
|
+
deduplicated_content = await corpus.enqueue_prompt(prompt_id, content_part)
|
1950
|
+
deduplicated_str = system_part + deduplicated_content
|
1951
|
+
deduplicated_strings.append(deduplicated_str)
|
1952
|
+
|
1953
|
+
# Convert back to original format
|
1954
|
+
if isinstance(messages_to_dedupe, list):
|
1955
|
+
for i, msg in enumerate(messages_to_dedupe):
|
1956
|
+
if i < len(deduplicated_strings) and hasattr(msg, 'content'):
|
1957
|
+
msg.content = deduplicated_strings[i]
|
1958
|
+
elif isinstance(messages_to_dedupe, str):
|
1959
|
+
messages_to_dedupe = deduplicated_strings[0] if deduplicated_strings else messages_to_dedupe
|
1960
|
+
|
1961
|
+
# Replace in args/kwargs
|
1962
|
+
if args and arg_index < len(args):
|
1963
|
+
args = list(args)
|
1964
|
+
args[arg_index] = messages_to_dedupe
|
1965
|
+
args = tuple(args)
|
1966
|
+
elif 'input' in kwargs:
|
1967
|
+
kwargs['input'] = messages_to_dedupe
|
1968
|
+
elif 'messages' in kwargs:
|
1969
|
+
kwargs['messages'] = messages_to_dedupe
|
1970
|
+
elif 'prompts' in kwargs:
|
1971
|
+
kwargs['prompts'] = messages_to_dedupe
|
1972
|
+
except Exception as e:
|
1973
|
+
print(f"[🔥 HOTPATH] ⚠️ Deduplication error: {e}")
|
1974
|
+
import traceback
|
1975
|
+
traceback.print_exc()
|
1976
|
+
|
1977
|
+
try:
|
1978
|
+
result = await orig_method(self_llm, *args, **kwargs)
|
1979
|
+
|
1980
|
+
# 🚨 MICROTURN ENFORCEMENT - DISABLED
|
1981
|
+
# Microturn can interfere with tool execution, so it's disabled
|
1982
|
+
# TODO: Re-enable with proper gating if needed for specific use cases
|
1983
|
+
|
1984
|
+
return result
|
1985
|
+
finally:
|
1986
|
+
depth_tracker.value = depth # Restore depth on exit
|
1987
|
+
# Clear processed tool calls set when returning to entry point (prevents memory leak)
|
1988
|
+
if depth == 0:
|
1989
|
+
if hasattr(_patch_depth, 'processed_tool_calls'):
|
1990
|
+
_patch_depth.processed_tool_calls.clear()
|
1991
|
+
if hasattr(_patch_depth, 'seen_tool_signatures'):
|
1992
|
+
_patch_depth.seen_tool_signatures.clear()
|
1993
|
+
if hasattr(_patch_depth, 'tool_result_cache'):
|
1994
|
+
_patch_depth.tool_result_cache.clear()
|
1995
|
+
else:
|
1996
|
+
def patched_method(self_llm, *args, **kwargs):
|
1997
|
+
# Track depth to find the leaf method
|
1998
|
+
depth = getattr(depth_tracker, 'value', 0)
|
1999
|
+
is_entry_point = (depth == 0)
|
2000
|
+
depth_tracker.value = depth + 1
|
2001
|
+
current_depth = depth_tracker.value
|
2002
|
+
|
2003
|
+
# Track max depth reached
|
2004
|
+
if is_entry_point:
|
2005
|
+
max_depth_setter(current_depth) # Reset for new entry
|
2006
|
+
else:
|
2007
|
+
# Update max if we went deeper
|
2008
|
+
if current_depth > max_depth_getter():
|
2009
|
+
max_depth_setter(current_depth)
|
2010
|
+
|
2011
|
+
# 🔥 PIPECLEANER DEDUPLICATION (only patching top-level methods, always apply)
|
2012
|
+
# Skip depth checks - they don't work with async/parallel execution
|
2013
|
+
if callback_handler:
|
2014
|
+
try:
|
2015
|
+
# Extract messages from args based on method signature
|
2016
|
+
messages_to_dedupe = None
|
2017
|
+
arg_index = 0
|
2018
|
+
|
2019
|
+
if meth_name in ['invoke', 'ainvoke']:
|
2020
|
+
messages_to_dedupe = args[0] if args else kwargs.get('input', kwargs.get('messages'))
|
2021
|
+
arg_index = 0
|
2022
|
+
elif meth_name in ['_generate', '_agenerate']:
|
2023
|
+
messages_to_dedupe = args[0] if args else kwargs.get('messages')
|
2024
|
+
arg_index = 0
|
2025
|
+
elif meth_name in ['generate', 'agenerate']:
|
2026
|
+
messages_to_dedupe = args[0] if args else kwargs.get('prompts')
|
2027
|
+
arg_index = 0
|
2028
|
+
|
2029
|
+
# Convert to strings for deduplication
|
2030
|
+
if messages_to_dedupe:
|
2031
|
+
prompt_strings = []
|
2032
|
+
for msg in (messages_to_dedupe if isinstance(messages_to_dedupe, list) else [messages_to_dedupe]):
|
2033
|
+
if hasattr(msg, 'content'):
|
2034
|
+
prompt_strings.append(msg.content)
|
2035
|
+
elif isinstance(msg, str):
|
2036
|
+
prompt_strings.append(msg)
|
2037
|
+
else:
|
2038
|
+
prompt_strings.append(str(msg))
|
2039
|
+
|
2040
|
+
# =============================================================
|
2041
|
+
# HOTPATH DEBUGGING (commented out for production)
|
2042
|
+
# =============================================================
|
2043
|
+
# print(f"\n{'='*70}")
|
2044
|
+
# print(f"[🔥 HOTPATH FULL DEBUG] {meth_name}() call")
|
2045
|
+
# print(f"{'='*70}")
|
2046
|
+
#
|
2047
|
+
# # 1. Callback state
|
2048
|
+
# current_node = getattr(callback_handler, '_current_chain_node', None)
|
2049
|
+
# current_tool = getattr(callback_handler, '_current_tool_name', None)
|
2050
|
+
# print(f"[🔥] Current node: {current_node}")
|
2051
|
+
# print(f"[🔥] Current tool: {current_tool}")
|
2052
|
+
#
|
2053
|
+
# # 2. Tools in this call
|
2054
|
+
# tools_in_call = []
|
2055
|
+
# if 'invocation_params' in kwargs:
|
2056
|
+
# tools = kwargs['invocation_params'].get('tools') or kwargs['invocation_params'].get('functions') or []
|
2057
|
+
# tools_in_call = [t.get('name', t.get('function', {}).get('name', '?')) for t in tools]
|
2058
|
+
# elif 'tools' in kwargs:
|
2059
|
+
# tools_in_call = [t.get('name', '?') for t in kwargs.get('tools', [])]
|
2060
|
+
# elif 'functions' in kwargs:
|
2061
|
+
# tools_in_call = [t.get('name', '?') for t in kwargs.get('functions', [])]
|
2062
|
+
# print(f"[🔥] Tools in call: {tools_in_call if tools_in_call else 'NONE'}")
|
2063
|
+
#
|
2064
|
+
# # 3. Prompt characteristics
|
2065
|
+
# prompt_lens = [len(s) for s in prompt_strings]
|
2066
|
+
# print(f"[🔥] Prompt count: {len(prompt_strings)}")
|
2067
|
+
# print(f"[🔥] Prompt lengths: {prompt_lens}")
|
2068
|
+
#
|
2069
|
+
# # 4. Kwargs keys (for debugging)
|
2070
|
+
# print(f"[🔥] Kwargs keys: {list(kwargs.keys())}")
|
2071
|
+
#
|
2072
|
+
# # 5. Messages structure
|
2073
|
+
# if messages_to_dedupe:
|
2074
|
+
# if isinstance(messages_to_dedupe, list):
|
2075
|
+
# msg_types = [type(m).__name__ for m in messages_to_dedupe[:3]]
|
2076
|
+
# print(f"[🔥] Message types (first 3): {msg_types}")
|
2077
|
+
# else:
|
2078
|
+
# print(f"[🔥] Messages type: {type(messages_to_dedupe).__name__}")
|
2079
|
+
#
|
2080
|
+
# print(f"{'='*70}\n")
|
2081
|
+
#
|
2082
|
+
# # Show first 200 chars to see the fingerprint
|
2083
|
+
# if prompt_strings:
|
2084
|
+
# first_200 = prompt_strings[0][:200] if len(prompt_strings[0]) > 200 else prompt_strings[0]
|
2085
|
+
# print(f"[🔥] Prompt start (200 chars): {first_200}")
|
2086
|
+
|
2087
|
+
# =============================================================
|
2088
|
+
# Extract tools from LLM call kwargs (for filter_search rules)
|
2089
|
+
# =============================================================
|
2090
|
+
tools_in_this_call = []
|
2091
|
+
|
2092
|
+
# Extract tool names from kwargs (handles multiple LLM providers' formats)
|
2093
|
+
# Pattern 1: invocation_params (some providers)
|
2094
|
+
if 'invocation_params' in kwargs:
|
2095
|
+
inv_params = kwargs['invocation_params']
|
2096
|
+
tools_param = inv_params.get('tools') or inv_params.get('functions') or []
|
2097
|
+
for t in tools_param:
|
2098
|
+
if isinstance(t, dict):
|
2099
|
+
# Try: t['name'] or t['function']['name']
|
2100
|
+
name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
|
2101
|
+
if name:
|
2102
|
+
tools_in_this_call.append(name)
|
2103
|
+
# Pattern 2: Direct 'tools' key (common)
|
2104
|
+
elif 'tools' in kwargs:
|
2105
|
+
tools_param = kwargs.get('tools', [])
|
2106
|
+
for t in tools_param:
|
2107
|
+
if isinstance(t, dict):
|
2108
|
+
# Try: t['name'] or t['function']['name']
|
2109
|
+
name = t.get('name') or (t.get('function', {}).get('name') if isinstance(t.get('function'), dict) else None)
|
2110
|
+
if name:
|
2111
|
+
tools_in_this_call.append(name)
|
2112
|
+
# Pattern 3: 'functions' key (OpenAI function calling)
|
2113
|
+
elif 'functions' in kwargs:
|
2114
|
+
funcs_param = kwargs.get('functions', [])
|
2115
|
+
for t in funcs_param:
|
2116
|
+
if isinstance(t, dict):
|
2117
|
+
name = t.get('name')
|
2118
|
+
if name:
|
2119
|
+
tools_in_this_call.append(name)
|
2120
|
+
|
2121
|
+
# Check if any filter_search rules match the tools in this LLM call
|
2122
|
+
from .pipecleaner import _find_filter_search_rules
|
2123
|
+
filter_rules = None
|
2124
|
+
should_dedupe = False
|
2125
|
+
|
2126
|
+
if hasattr(callback_handler, '_selected_rules') and prompt_strings and tools_in_this_call:
|
2127
|
+
# Get all filter_search rules (they specify which tools to target via references.tools)
|
2128
|
+
filter_rules = _find_filter_search_rules('*', callback_handler._selected_rules)
|
2129
|
+
|
2130
|
+
# Check if any tool in this call matches rule's target tools
|
2131
|
+
if filter_rules:
|
2132
|
+
for rule in filter_rules:
|
2133
|
+
# Handle both dict and object formats
|
2134
|
+
if isinstance(rule, dict):
|
2135
|
+
references = rule.get('references', {})
|
2136
|
+
rule_tools = references.get('tools', []) if isinstance(references, dict) else []
|
2137
|
+
else:
|
2138
|
+
references = getattr(rule, 'references', None)
|
2139
|
+
# references might be a dict or object, handle both
|
2140
|
+
if isinstance(references, dict):
|
2141
|
+
rule_tools = references.get('tools', [])
|
2142
|
+
elif references:
|
2143
|
+
rule_tools = getattr(references, 'tools', [])
|
2144
|
+
else:
|
2145
|
+
rule_tools = []
|
2146
|
+
|
2147
|
+
for tool_in_call in tools_in_this_call:
|
2148
|
+
if tool_in_call.lower() in [rt.lower() for rt in rule_tools]:
|
2149
|
+
should_dedupe = True
|
2150
|
+
break
|
2151
|
+
if should_dedupe:
|
2152
|
+
break
|
2153
|
+
|
2154
|
+
if should_dedupe:
|
2155
|
+
# Deduplicate each prompt
|
2156
|
+
from .pipecleaner import get_or_create_corpus
|
2157
|
+
import hashlib
|
2158
|
+
corpus = get_or_create_corpus(callback_handler.run_id, verbose=callback_handler._verbose)
|
2159
|
+
|
2160
|
+
deduplicated_strings = []
|
2161
|
+
for i, prompt_str in enumerate(prompt_strings):
|
2162
|
+
if len(prompt_str) < 2500:
|
2163
|
+
deduplicated_strings.append(prompt_str)
|
2164
|
+
continue
|
2165
|
+
|
2166
|
+
# Split system/content like in callback
|
2167
|
+
system_part = prompt_str[:2000]
|
2168
|
+
content_part = prompt_str[2000:]
|
2169
|
+
prompt_id = f"p{i}_{hashlib.md5(content_part[:100].encode()).hexdigest()[:8]}"
|
2170
|
+
|
2171
|
+
# Deduplicate (wrap async in sync context)
|
2172
|
+
import asyncio
|
2173
|
+
try:
|
2174
|
+
loop = asyncio.get_event_loop()
|
2175
|
+
except RuntimeError:
|
2176
|
+
loop = asyncio.new_event_loop()
|
2177
|
+
asyncio.set_event_loop(loop)
|
2178
|
+
|
2179
|
+
deduplicated_content = loop.run_until_complete(corpus.enqueue_prompt(prompt_id, content_part))
|
2180
|
+
deduplicated_str = system_part + deduplicated_content
|
2181
|
+
deduplicated_strings.append(deduplicated_str)
|
2182
|
+
|
2183
|
+
# Convert back to original format
|
2184
|
+
if isinstance(messages_to_dedupe, list):
|
2185
|
+
for i, msg in enumerate(messages_to_dedupe):
|
2186
|
+
if i < len(deduplicated_strings) and hasattr(msg, 'content'):
|
2187
|
+
msg.content = deduplicated_strings[i]
|
2188
|
+
elif isinstance(messages_to_dedupe, str):
|
2189
|
+
messages_to_dedupe = deduplicated_strings[0] if deduplicated_strings else messages_to_dedupe
|
2190
|
+
|
2191
|
+
# Replace in args/kwargs
|
2192
|
+
if args and arg_index < len(args):
|
2193
|
+
args = list(args)
|
2194
|
+
args[arg_index] = messages_to_dedupe
|
2195
|
+
args = tuple(args)
|
2196
|
+
elif 'input' in kwargs:
|
2197
|
+
kwargs['input'] = messages_to_dedupe
|
2198
|
+
elif 'messages' in kwargs:
|
2199
|
+
kwargs['messages'] = messages_to_dedupe
|
2200
|
+
elif 'prompts' in kwargs:
|
2201
|
+
kwargs['prompts'] = messages_to_dedupe
|
2202
|
+
except Exception as e:
|
2203
|
+
print(f"[🔥 HOTPATH] ⚠️ Deduplication error: {e}")
|
2204
|
+
import traceback
|
2205
|
+
traceback.print_exc()
|
2206
|
+
|
2207
|
+
try:
|
2208
|
+
result = orig_method(self_llm, *args, **kwargs)
|
2209
|
+
|
2210
|
+
# 🚨 MICROTURN ENFORCEMENT - DISABLED (can interfere with tool execution)
|
2211
|
+
# TODO: Re-enable with proper gating if needed
|
2212
|
+
|
2213
|
+
return result
|
2214
|
+
finally:
|
2215
|
+
depth_tracker.value = depth # Restore depth on exit
|
2216
|
+
# Clear processed tool calls set when returning to entry point (prevents memory leak)
|
2217
|
+
if depth == 0:
|
2218
|
+
if hasattr(_patch_depth, 'processed_tool_calls'):
|
2219
|
+
_patch_depth.processed_tool_calls.clear()
|
2220
|
+
if hasattr(_patch_depth, 'seen_tool_signatures'):
|
2221
|
+
_patch_depth.seen_tool_signatures.clear()
|
2222
|
+
if hasattr(_patch_depth, 'tool_result_cache'):
|
2223
|
+
_patch_depth.tool_result_cache.clear()
|
2224
|
+
return patched_method
|
2225
|
+
|
2226
|
+
patched_method = make_patched_method(original_method, method_name, is_async, _patch_depth, get_max_depth, set_max_depth, is_in_microturn, set_in_microturn, get_summary_calls_made, increment_summary_calls)
|
2227
|
+
|
2228
|
+
# Mark and apply the patch
|
2229
|
+
patched_method._dasein_patched = True
|
2230
|
+
setattr(llm_class, method_name, patched_method)
|
2231
|
+
print(f"[DASEIN][WRAPPER] Patched {method_name}")
|
2232
|
+
|
2233
|
+
# Mark this class as patched
|
2234
|
+
patched_classes.add(llm_class)
|
2235
|
+
self._wrapped_llm = llm
|
2236
|
+
print(f"[DASEIN][WRAPPER] Successfully patched {len(methods_to_patch)} methods in {llm_class.__name__}")
|
2237
|
+
|
2238
|
+
print(f"[DASEIN][WRAPPER] Finished patching {len(patched_classes)} unique LLM class(es)")
|
2239
|
+
return
|
1207
2240
|
except Exception as e:
|
1208
|
-
|
2241
|
+
print(f"[DASEIN][WRAPPER] Failed to wrap agent LLM: {e}")
|
2242
|
+
import traceback
|
2243
|
+
traceback.print_exc()
|
1209
2244
|
self._wrapped_llm = None
|
1210
2245
|
|
1211
2246
|
def _set_callback_handler_llm(self):
|
@@ -1312,7 +2347,7 @@ Follow these rules when planning your actions."""
|
|
1312
2347
|
|
1313
2348
|
return False
|
1314
2349
|
|
1315
|
-
def _replace_llm_in_structure(self, obj, original_llm, wrapped_llm, max_depth=5, path=""):
|
2350
|
+
def _replace_llm_in_structure(self, obj, original_llm, wrapped_llm, max_depth=5, path="", count=[0]):
|
1316
2351
|
"""Replace the original LLM with wrapped LLM in the structure."""
|
1317
2352
|
if max_depth <= 0:
|
1318
2353
|
return
|
@@ -1321,16 +2356,16 @@ Follow these rules when planning your actions."""
|
|
1321
2356
|
if hasattr(obj, 'steps') and hasattr(obj, '__iter__'):
|
1322
2357
|
for i, step in enumerate(obj.steps):
|
1323
2358
|
if step is original_llm:
|
1324
|
-
|
2359
|
+
count[0] += 1
|
2360
|
+
print(f"[DASEIN][WRAPPER] Replacing LLM #{count[0]} at {path}.steps[{i}]")
|
1325
2361
|
obj.steps[i] = wrapped_llm
|
1326
|
-
return
|
1327
2362
|
# Check if step has bound attribute (RunnableBinding)
|
1328
2363
|
if hasattr(step, 'bound') and step.bound is original_llm:
|
1329
|
-
|
2364
|
+
count[0] += 1
|
2365
|
+
print(f"[DASEIN][WRAPPER] Replacing LLM #{count[0]} at {path}.steps[{i}].bound")
|
1330
2366
|
step.bound = wrapped_llm
|
1331
|
-
return
|
1332
2367
|
# Recursively search in the step
|
1333
|
-
self._replace_llm_in_structure(step, original_llm, wrapped_llm, max_depth - 1, f"{path}.steps[{i}]")
|
2368
|
+
self._replace_llm_in_structure(step, original_llm, wrapped_llm, max_depth - 1, f"{path}.steps[{i}]", count)
|
1334
2369
|
|
1335
2370
|
# Search in attributes
|
1336
2371
|
for attr_name in dir(obj):
|
@@ -1339,8 +2374,12 @@ Follow these rules when planning your actions."""
|
|
1339
2374
|
try:
|
1340
2375
|
attr_value = getattr(obj, attr_name)
|
1341
2376
|
if attr_value is original_llm:
|
1342
|
-
|
2377
|
+
print(f"[DASEIN][WRAPPER] Replacing LLM at {path}.{attr_name}")
|
1343
2378
|
setattr(obj, attr_name, wrapped_llm)
|
2379
|
+
# Verify replacement
|
2380
|
+
new_value = getattr(obj, attr_name)
|
2381
|
+
print(f"[DASEIN][WRAPPER] After replacement, {path}.{attr_name} is now: {type(new_value).__name__}")
|
2382
|
+
print(f"[DASEIN][WRAPPER] Is it our wrapper? {isinstance(new_value, DaseinLLMWrapper)}")
|
1344
2383
|
return
|
1345
2384
|
# Recursively search in the attribute
|
1346
2385
|
if hasattr(attr_value, '__dict__') or hasattr(attr_value, '__iter__'):
|
@@ -1725,6 +2764,12 @@ Follow these rules when planning your actions."""
|
|
1725
2764
|
# Run the agent
|
1726
2765
|
result = self._agent.invoke(*args, **kwargs)
|
1727
2766
|
|
2767
|
+
# Print tools summary if available
|
2768
|
+
if hasattr(self._callback_handler, 'get_compiled_tools_summary'):
|
2769
|
+
summary = self._callback_handler.get_compiled_tools_summary()
|
2770
|
+
if summary:
|
2771
|
+
print(f"[DASEIN] {summary}")
|
2772
|
+
|
1728
2773
|
# FIXED: Extract trace for display but never calculate KPIs locally
|
1729
2774
|
# Service-first architecture: All KPI calculation done by distributed services
|
1730
2775
|
self._vprint(f"[DASEIN][SERVICE_FIRST] Extracting trace for display - KPIs handled by post-run API service")
|
@@ -1749,6 +2794,11 @@ Follow these rules when planning your actions."""
|
|
1749
2794
|
# Clear tool rules from system prompt after triggering async post-run
|
1750
2795
|
self._clear_tool_rules_from_system()
|
1751
2796
|
|
2797
|
+
# Cleanup run-scoped corpus (print telemetry and free memory)
|
2798
|
+
if hasattr(self, '_callback_handler') and hasattr(self._callback_handler, 'run_id'):
|
2799
|
+
from .pipecleaner import cleanup_corpus
|
2800
|
+
cleanup_corpus(self._callback_handler.run_id)
|
2801
|
+
|
1752
2802
|
return result
|
1753
2803
|
|
1754
2804
|
async def _ainvoke_single(self, *args, **kwargs):
|
@@ -1796,6 +2846,12 @@ Follow these rules when planning your actions."""
|
|
1796
2846
|
# Run the agent asynchronously
|
1797
2847
|
result = await self._agent.ainvoke(*args, **kwargs)
|
1798
2848
|
|
2849
|
+
# Print tools summary if available
|
2850
|
+
if hasattr(self._callback_handler, 'get_compiled_tools_summary'):
|
2851
|
+
summary = self._callback_handler.get_compiled_tools_summary()
|
2852
|
+
if summary:
|
2853
|
+
print(f"[DASEIN] {summary}")
|
2854
|
+
|
1799
2855
|
# FIXED: Extract trace for display but never calculate KPIs locally
|
1800
2856
|
# Service-first architecture: All KPI calculation done by distributed services
|
1801
2857
|
self._vprint(f"[DASEIN][SERVICE_FIRST] Extracting trace for display - KPIs handled by post-run API service")
|
@@ -1820,6 +2876,11 @@ Follow these rules when planning your actions."""
|
|
1820
2876
|
# Clear tool rules from system prompt after triggering async post-run
|
1821
2877
|
self._clear_tool_rules_from_system()
|
1822
2878
|
|
2879
|
+
# Cleanup run-scoped corpus (print telemetry and free memory)
|
2880
|
+
if hasattr(self, '_callback_handler') and hasattr(self._callback_handler, 'run_id'):
|
2881
|
+
from .pipecleaner import cleanup_corpus
|
2882
|
+
cleanup_corpus(self._callback_handler.run_id)
|
2883
|
+
|
1823
2884
|
return result
|
1824
2885
|
|
1825
2886
|
def _invoke_with_retry(self, *args, **kwargs):
|
@@ -2231,11 +3292,8 @@ Follow these rules when planning your actions."""
|
|
2231
3292
|
print(f" Wall Time (ms): {metrics['wall_time_ms']}")
|
2232
3293
|
print(f" Success Rate: {metrics['success_rate']:.1f}% ({metrics['total_turns']}/{metrics['total_turns']})")
|
2233
3294
|
print(f" Overall Success: {'✅' if metrics['overall_success'] else '❌'}")
|
2234
|
-
# Format final outcome
|
2235
|
-
|
2236
|
-
final_outcome_formatted = self._wrapped_llm._format_final_outcome(metrics.get('final_outcome', 'unknown'))
|
2237
|
-
else:
|
2238
|
-
final_outcome_formatted = f" {metrics.get('final_outcome', 'unknown')}"
|
3295
|
+
# Format final outcome
|
3296
|
+
final_outcome_formatted = self._format_final_outcome(metrics.get('final_outcome', 'unknown'))
|
2239
3297
|
print(f" Final Outcome: {final_outcome_formatted}")
|
2240
3298
|
print(f" Result: {str(metrics['result'])[:100]}...")
|
2241
3299
|
|
@@ -2317,12 +3375,8 @@ Follow these rules when planning your actions."""
|
|
2317
3375
|
first_outcome = first_metrics.get('final_outcome', 'unknown')
|
2318
3376
|
last_outcome = last_metrics.get('final_outcome', 'unknown')
|
2319
3377
|
# Format final outcomes using the wrapped LLM's method
|
2320
|
-
|
2321
|
-
|
2322
|
-
last_formatted = self._wrapped_llm._format_final_outcome(last_outcome)
|
2323
|
-
else:
|
2324
|
-
first_formatted = f"❓ {first_outcome}"
|
2325
|
-
last_formatted = f"❓ {last_outcome}"
|
3378
|
+
first_formatted = self._format_final_outcome(first_outcome)
|
3379
|
+
last_formatted = self._format_final_outcome(last_outcome)
|
2326
3380
|
outcome_improvement = f"{first_formatted} → {last_formatted}"
|
2327
3381
|
print(f"🎯 Final Outcome: {outcome_improvement}")
|
2328
3382
|
|
@@ -2344,13 +3398,8 @@ Follow these rules when planning your actions."""
|
|
2344
3398
|
|
2345
3399
|
# Prioritize final outcome improvement
|
2346
3400
|
if outcome_improved:
|
2347
|
-
|
2348
|
-
|
2349
|
-
first_formatted = self._wrapped_llm._format_final_outcome(first_outcome)
|
2350
|
-
last_formatted = self._wrapped_llm._format_final_outcome(last_outcome)
|
2351
|
-
else:
|
2352
|
-
first_formatted = f" {first_outcome}"
|
2353
|
-
last_formatted = f" {last_outcome}"
|
3401
|
+
first_formatted = self._format_final_outcome(first_outcome)
|
3402
|
+
last_formatted = self._format_final_outcome(last_outcome)
|
2354
3403
|
print(f"🎉 BREAKTHROUGH: Agent went from {first_formatted} to {last_formatted}!")
|
2355
3404
|
elif turns_improved or tokens_improved or time_improved or success_improved:
|
2356
3405
|
improvements = []
|
@@ -2625,6 +3674,38 @@ Follow these rules when planning your actions."""
|
|
2625
3674
|
|
2626
3675
|
print(f"[DASEIN] Pre-run service returned {len(selected_rules)} rules")
|
2627
3676
|
|
3677
|
+
# Pre-load embedding model if we have filter_search rules (avoid timeout on first batch)
|
3678
|
+
if selected_rules:
|
3679
|
+
# Check for any llm_start rules with "filter search" keywords
|
3680
|
+
has_filter_search_rules = False
|
3681
|
+
for rule_meta in selected_rules:
|
3682
|
+
# Unwrap tuple if needed
|
3683
|
+
rule_obj = rule_meta[0] if isinstance(rule_meta, tuple) and len(rule_meta) == 2 else rule_meta
|
3684
|
+
|
3685
|
+
# Check if this is an llm_start rule with filter/search keywords
|
3686
|
+
# Handle both dict and object formats
|
3687
|
+
if isinstance(rule_obj, dict):
|
3688
|
+
target_step_type = rule_obj.get('target_step_type')
|
3689
|
+
advice = rule_obj.get('advice_text') or rule_obj.get('advice', '')
|
3690
|
+
else:
|
3691
|
+
target_step_type = getattr(rule_obj, 'target_step_type', None)
|
3692
|
+
advice = getattr(rule_obj, 'advice_text', None) or getattr(rule_obj, 'advice', None) or ''
|
3693
|
+
|
3694
|
+
advice_lower = advice.lower() if advice else ''
|
3695
|
+
|
3696
|
+
if target_step_type == 'llm_start' and 'filter' in advice_lower and 'search' in advice_lower:
|
3697
|
+
has_filter_search_rules = True
|
3698
|
+
break
|
3699
|
+
|
3700
|
+
if has_filter_search_rules:
|
3701
|
+
print(f"[DASEIN] 🔧 Pre-loading embedding model for pipecleaner (found filter search rules)...")
|
3702
|
+
from .pipecleaner import _get_embedding_model
|
3703
|
+
try:
|
3704
|
+
_get_embedding_model() # Warm up the model
|
3705
|
+
print(f"[DASEIN] ✅ Embedding model pre-loaded successfully")
|
3706
|
+
except Exception as e:
|
3707
|
+
print(f"[DASEIN] ⚠️ Failed to pre-load embedding model: {e}")
|
3708
|
+
|
2628
3709
|
# CRITICAL: For LangGraph agents, recreate with injected prompt
|
2629
3710
|
if self._is_langgraph and selected_rules:
|
2630
3711
|
print(f" [DASEIN][PRERUN] LangGraph agent detected with {len(selected_rules)} rules")
|
@@ -2826,75 +3907,13 @@ Follow these rules when planning your actions."""
|
|
2826
3907
|
|
2827
3908
|
agent_fingerprint = _minimal_agent_fingerprint(self._agent)
|
2828
3909
|
|
2829
|
-
#
|
2830
|
-
|
2831
|
-
|
2832
|
-
|
2833
|
-
|
2834
|
-
|
2835
|
-
|
2836
|
-
the trace won't show the correct tool. Stage 3.5 needs to see all options
|
2837
|
-
to suggest better alternatives.
|
2838
|
-
"""
|
2839
|
-
tools_metadata = []
|
2840
|
-
tools_to_process = []
|
2841
|
-
|
2842
|
-
# Get ALL tools from agent (LangChain or LangGraph) - not filtered by trace usage
|
2843
|
-
tools_attr = getattr(agent, 'tools', None)
|
2844
|
-
if tools_attr:
|
2845
|
-
try:
|
2846
|
-
tools_to_process = list(tools_attr)
|
2847
|
-
except Exception:
|
2848
|
-
pass
|
2849
|
-
elif getattr(agent, 'toolkit', None):
|
2850
|
-
tk = getattr(agent, 'toolkit')
|
2851
|
-
tk_tools = getattr(tk, 'tools', None) or getattr(tk, 'get_tools', None)
|
2852
|
-
try:
|
2853
|
-
tools_to_process = list(tk_tools() if callable(tk_tools) else tk_tools or [])
|
2854
|
-
except Exception:
|
2855
|
-
pass
|
2856
|
-
|
2857
|
-
# Also try LangGraph tools from compiled graph
|
2858
|
-
if hasattr(agent, 'nodes') and 'tools' in agent.nodes:
|
2859
|
-
tools_node = agent.nodes['tools']
|
2860
|
-
if hasattr(tools_node, 'node') and hasattr(tools_node.node, 'steps'):
|
2861
|
-
for step in tools_node.node.steps:
|
2862
|
-
if hasattr(step, 'tools_by_name'):
|
2863
|
-
tools_to_process.extend(step.tools_by_name.values())
|
2864
|
-
break
|
2865
|
-
|
2866
|
-
# Extract metadata from each tool
|
2867
|
-
for tool in tools_to_process:
|
2868
|
-
try:
|
2869
|
-
tool_meta = {
|
2870
|
-
'name': getattr(tool, 'name', str(tool.__class__.__name__)),
|
2871
|
-
'description': getattr(tool, 'description', ''),
|
2872
|
-
}
|
2873
|
-
|
2874
|
-
# Extract args_schema if available
|
2875
|
-
if hasattr(tool, 'args_schema') and tool.args_schema:
|
2876
|
-
try:
|
2877
|
-
# Try Pydantic v2 method
|
2878
|
-
if hasattr(tool.args_schema, 'model_json_schema'):
|
2879
|
-
tool_meta['args_schema'] = tool.args_schema.model_json_schema()
|
2880
|
-
# Fallback to Pydantic v1 method
|
2881
|
-
elif hasattr(tool.args_schema, 'schema'):
|
2882
|
-
tool_meta['args_schema'] = tool.args_schema.schema()
|
2883
|
-
else:
|
2884
|
-
tool_meta['args_schema'] = {}
|
2885
|
-
except Exception:
|
2886
|
-
tool_meta['args_schema'] = {}
|
2887
|
-
else:
|
2888
|
-
tool_meta['args_schema'] = {}
|
2889
|
-
|
2890
|
-
tools_metadata.append(tool_meta)
|
2891
|
-
except Exception as e:
|
2892
|
-
# Skip tools that fail to extract
|
2893
|
-
pass
|
2894
|
-
|
2895
|
-
return tools_metadata
|
2896
|
-
|
2897
|
-
tools_metadata = _extract_tool_metadata(self._agent)
|
3910
|
+
# Get tool metadata from callback handler (extracted during runtime)
|
3911
|
+
tools_metadata = []
|
3912
|
+
if hasattr(self._callback_handler, '_compiled_tools_metadata'):
|
3913
|
+
tools_metadata = self._callback_handler._compiled_tools_metadata
|
3914
|
+
# Fallback: try extracting now (may not work if tools unbound)
|
3915
|
+
if not tools_metadata:
|
3916
|
+
tools_metadata = self._extract_tool_metadata(self._agent)
|
2898
3917
|
|
2899
3918
|
# Reuse existing graph analysis (already extracted in __init__)
|
2900
3919
|
graph_metadata = None
|
@@ -3512,11 +4531,8 @@ Follow these rules when planning your actions."""
|
|
3512
4531
|
|
3513
4532
|
# Step-by-step comparison
|
3514
4533
|
print(f"\n STEP-BY-STEP COMPARISON:")
|
3515
|
-
# Format step 1 outcome
|
3516
|
-
|
3517
|
-
step1_outcome = self._wrapped_llm._format_final_outcome(learning_metrics[0].get('final_outcome', 'unknown'))
|
3518
|
-
else:
|
3519
|
-
step1_outcome = f" {learning_metrics[0].get('final_outcome', 'unknown')}"
|
4534
|
+
# Format step 1 outcome
|
4535
|
+
step1_outcome = self._format_final_outcome(learning_metrics[0].get('final_outcome', 'unknown'))
|
3520
4536
|
print(f" Step 1: {learning_metrics[0]['total_tokens']} tokens, {learning_metrics[0]['total_turns']} turns, {learning_metrics[0]['trace_time_ms']}ms, {learning_metrics[0]['success_rate']:.1f}%, {step1_outcome} (naive baseline)")
|
3521
4537
|
|
3522
4538
|
# Compare steps 2N (learning vs baseline)
|
@@ -3541,13 +4557,8 @@ Follow these rules when planning your actions."""
|
|
3541
4557
|
failure_direction = "" if failure_change > 0 else "" if failure_change < 0 else "="
|
3542
4558
|
|
3543
4559
|
# Get final outcomes for this step
|
3544
|
-
|
3545
|
-
|
3546
|
-
base_outcome = self._wrapped_llm._format_final_outcome(base.get('final_outcome', 'unknown'))
|
3547
|
-
learn_outcome = self._wrapped_llm._format_final_outcome(learn.get('final_outcome', 'unknown'))
|
3548
|
-
else:
|
3549
|
-
base_outcome = f" {base.get('final_outcome', 'unknown')}"
|
3550
|
-
learn_outcome = f" {learn.get('final_outcome', 'unknown')}"
|
4560
|
+
base_outcome = self._format_final_outcome(base.get('final_outcome', 'unknown'))
|
4561
|
+
learn_outcome = self._format_final_outcome(learn.get('final_outcome', 'unknown'))
|
3551
4562
|
|
3552
4563
|
print(f" Step {i}: {base['total_tokens']} {learn['total_tokens']} tokens ({token_direction}{abs(token_improvement)}, {token_pct:+.1f}%)")
|
3553
4564
|
print(f" {base['total_turns']} {learn['total_turns']} turns ({turn_direction}{abs(turn_improvement)}, {turn_pct:+.1f}%)")
|