dasein-core 0.2.6__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dasein/api.py +1219 -133
- dasein/capture.py +2379 -1803
- dasein/microturn.py +475 -0
- dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
- dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
- dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
- dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
- dasein/pipecleaner.py +1917 -0
- dasein/services/post_run_client.py +4 -2
- dasein/services/service_adapter.py +4 -2
- dasein/wrappers.py +314 -0
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/METADATA +4 -1
- dasein_core-0.2.9.dist-info/RECORD +59 -0
- dasein_core-0.2.6.dist-info/RECORD +0 -21
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/WHEEL +0 -0
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {dasein_core-0.2.6.dist-info → dasein_core-0.2.9.dist-info}/top_level.txt +0 -0
dasein/microturn.py
ADDED
@@ -0,0 +1,475 @@
|
|
1
|
+
"""
|
2
|
+
Microturn enforcement system for anti-fanout rules.
|
3
|
+
|
4
|
+
This module provides real-time LLM call interception and modification
|
5
|
+
to enforce fanout prevention policies (e.g., "only 1 Summary per search").
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import List, Dict, Tuple, Any, Optional, Set
|
9
|
+
import json
|
10
|
+
import hashlib
|
11
|
+
|
12
|
+
|
13
|
+
def has_tool_end_rules(callback_handler: Any) -> bool:
|
14
|
+
"""
|
15
|
+
Check if any tool_end rules exist in the selected rules.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
callback_handler: Callback handler with _selected_rules
|
19
|
+
|
20
|
+
Returns:
|
21
|
+
True if at least one tool_end rule exists, False otherwise
|
22
|
+
"""
|
23
|
+
if not callback_handler or not hasattr(callback_handler, '_selected_rules'):
|
24
|
+
return False
|
25
|
+
|
26
|
+
rules = callback_handler._selected_rules or []
|
27
|
+
for rule in rules:
|
28
|
+
# Handle tuple format (rule, metadata)
|
29
|
+
if isinstance(rule, tuple) and len(rule) >= 1:
|
30
|
+
rule = rule[0]
|
31
|
+
|
32
|
+
# Check target_step_type
|
33
|
+
if isinstance(rule, dict):
|
34
|
+
target = rule.get('target_step_type', '')
|
35
|
+
else:
|
36
|
+
target = getattr(rule, 'target_step_type', '')
|
37
|
+
|
38
|
+
if target == 'tool_end':
|
39
|
+
return True
|
40
|
+
|
41
|
+
return False
|
42
|
+
|
43
|
+
|
44
|
+
def extract_tool_call_signatures(msg: Any) -> Dict[str, str]:
|
45
|
+
"""
|
46
|
+
Extract tool call signatures (name + argument hash) from a message.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
msg: Message object with tool_calls
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
Dict mapping tool call index to signature string (e.g., "Summary_abc123")
|
53
|
+
"""
|
54
|
+
signatures = {}
|
55
|
+
|
56
|
+
if not msg or not hasattr(msg, 'tool_calls') or not msg.tool_calls:
|
57
|
+
return signatures
|
58
|
+
|
59
|
+
for idx, tc in enumerate(msg.tool_calls):
|
60
|
+
tc_name = tc.name if hasattr(tc, 'name') else tc.get('name', '')
|
61
|
+
|
62
|
+
# Extract arguments
|
63
|
+
if hasattr(tc, 'args'):
|
64
|
+
args = tc.args
|
65
|
+
elif isinstance(tc, dict) and 'args' in tc:
|
66
|
+
args = tc['args']
|
67
|
+
else:
|
68
|
+
args = {}
|
69
|
+
|
70
|
+
# Create content hash from arguments
|
71
|
+
try:
|
72
|
+
# Serialize args to stable JSON string
|
73
|
+
args_str = json.dumps(args, sort_keys=True, default=str)
|
74
|
+
# Create short hash (first 8 chars of SHA256)
|
75
|
+
content_hash = hashlib.sha256(args_str.encode()).hexdigest()[:8]
|
76
|
+
signature = f"{tc_name}_{content_hash}"
|
77
|
+
except:
|
78
|
+
# Fallback if serialization fails
|
79
|
+
signature = f"{tc_name}_unknown"
|
80
|
+
|
81
|
+
signatures[idx] = signature
|
82
|
+
|
83
|
+
return signatures
|
84
|
+
|
85
|
+
|
86
|
+
def extract_proposed_function_calls(result: Any) -> Tuple[List[str], Optional[Any]]:
|
87
|
+
"""
|
88
|
+
Extract proposed function calls from LLM response.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
result: LLM response (AIMessage, ChatResult, or LLMResult)
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
Tuple of (list of function names, message object)
|
95
|
+
"""
|
96
|
+
proposed_func_names = []
|
97
|
+
msg = None
|
98
|
+
|
99
|
+
# Case 1: Result is AIMessage directly
|
100
|
+
if hasattr(result, 'tool_calls') or hasattr(result, 'additional_kwargs'):
|
101
|
+
msg = result
|
102
|
+
# Case 2: Result has generations
|
103
|
+
elif hasattr(result, 'generations') and result.generations:
|
104
|
+
first_gen = result.generations[0]
|
105
|
+
# Case 2a: Generation has message
|
106
|
+
if hasattr(first_gen, 'message'):
|
107
|
+
msg = first_gen.message
|
108
|
+
# Case 2b: Generation is a list (contains message)
|
109
|
+
elif isinstance(first_gen, list) and len(first_gen) > 0:
|
110
|
+
if hasattr(first_gen[0], 'message'):
|
111
|
+
msg = first_gen[0].message
|
112
|
+
|
113
|
+
# Extract tool calls from message
|
114
|
+
if msg:
|
115
|
+
# CRITICAL: Extract from tool_calls OR function_call, NOT BOTH (prevents duplicates)
|
116
|
+
# Prefer tool_calls (modern) if available
|
117
|
+
if hasattr(msg, 'tool_calls') and msg.tool_calls:
|
118
|
+
for tc in msg.tool_calls:
|
119
|
+
if hasattr(tc, 'name'):
|
120
|
+
proposed_func_names.append(tc.name)
|
121
|
+
elif isinstance(tc, dict) and 'name' in tc:
|
122
|
+
proposed_func_names.append(tc['name'])
|
123
|
+
elif hasattr(msg, 'additional_kwargs'):
|
124
|
+
# Only check function_call if tool_calls is empty (fallback)
|
125
|
+
func_call = msg.additional_kwargs.get('function_call')
|
126
|
+
if func_call and isinstance(func_call, dict) and 'name' in func_call:
|
127
|
+
proposed_func_names.append(func_call['name'])
|
128
|
+
|
129
|
+
return proposed_func_names, msg
|
130
|
+
|
131
|
+
|
132
|
+
def build_execution_state_summary(callback_handler: Any) -> str:
|
133
|
+
"""
|
134
|
+
Build a human-readable summary of all function calls made so far.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
callback_handler: DaseinCallbackHandler with _function_calls_made
|
138
|
+
|
139
|
+
Returns:
|
140
|
+
Formatted string of execution state
|
141
|
+
"""
|
142
|
+
state_summary = []
|
143
|
+
if hasattr(callback_handler, '_function_calls_made') and callback_handler._function_calls_made:
|
144
|
+
for fname in sorted(callback_handler._function_calls_made.keys()):
|
145
|
+
count = len(callback_handler._function_calls_made[fname])
|
146
|
+
if count > 0:
|
147
|
+
state_summary.append(f" • {fname}: {count}x")
|
148
|
+
|
149
|
+
return "EXECUTION STATE (all calls made so far):\n" + "\n".join(state_summary) if state_summary else "EXECUTION STATE: No calls yet"
|
150
|
+
|
151
|
+
|
152
|
+
def create_microturn_prompt(test_rule: str, full_state: str, proposed_func_names: List[str]) -> str:
|
153
|
+
"""
|
154
|
+
Create the prompt for the microturn LLM to decide on compliant calls.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
test_rule: The rule to enforce (e.g., "max 1 Summary per search")
|
158
|
+
full_state: Execution state summary
|
159
|
+
proposed_func_names: List of proposed function names
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
Formatted prompt string
|
163
|
+
"""
|
164
|
+
proposed_calls_str = "\n".join([f" {i+1}. {name}" for i, name in enumerate(proposed_func_names)])
|
165
|
+
|
166
|
+
return f"""You are an anti-fanout rule enforcement system for AI agents.
|
167
|
+
|
168
|
+
RULE TO ENFORCE:
|
169
|
+
{test_rule}
|
170
|
+
|
171
|
+
{full_state}
|
172
|
+
|
173
|
+
PROPOSED TOOL CALLS (from LLM):
|
174
|
+
{proposed_calls_str}
|
175
|
+
|
176
|
+
TASK: Check if the proposed calls violate the rule.
|
177
|
+
- The rule is ONLY about Summary calls relative to tavily_search calls
|
178
|
+
- ALL other tool calls (ResearchQuestion, think_tool, ConductResearch, etc.) should PASS THROUGH unchanged
|
179
|
+
- Only remove Summary calls if they violate the ratio
|
180
|
+
|
181
|
+
EXAMPLES:
|
182
|
+
- State: tavily_search: 1x | Proposed: [ResearchQuestion] → COMPLIANT, return: ResearchQuestion
|
183
|
+
- State: tavily_search: 1x | Proposed: [Summary, Summary] → VIOLATION, return: Summary (keep only 1)
|
184
|
+
- State: tavily_search: 1x, Summary: 1x | Proposed: [Summary] → VIOLATION, return: (empty, already have 1)
|
185
|
+
- State: No calls yet | Proposed: [ResearchQuestion, think_tool] → COMPLIANT, return: ResearchQuestion, think_tool
|
186
|
+
|
187
|
+
OUTPUT FORMAT: List the compliant function names, one per line. If all are compliant, return ALL of them.
|
188
|
+
|
189
|
+
Compliant list:"""
|
190
|
+
|
191
|
+
|
192
|
+
def parse_microturn_response(microturn_response: Any) -> List[str]:
|
193
|
+
"""
|
194
|
+
Parse the microturn LLM response to extract compliant function names.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
microturn_response: LLM response object
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
List of compliant function names (may be empty for total block)
|
201
|
+
"""
|
202
|
+
compliant_response = ""
|
203
|
+
|
204
|
+
if hasattr(microturn_response, 'generations') and microturn_response.generations:
|
205
|
+
# LLMResult with generations
|
206
|
+
first_gen = microturn_response.generations[0]
|
207
|
+
|
208
|
+
# generations[0] can be a list OR a ChatGeneration
|
209
|
+
if isinstance(first_gen, list) and len(first_gen) > 0:
|
210
|
+
# It's a list - get first item
|
211
|
+
msg_resp = first_gen[0].message if hasattr(first_gen[0], 'message') else first_gen[0]
|
212
|
+
elif hasattr(first_gen, 'message'):
|
213
|
+
# It's a ChatGeneration
|
214
|
+
msg_resp = first_gen.message
|
215
|
+
else:
|
216
|
+
msg_resp = first_gen
|
217
|
+
|
218
|
+
compliant_response = msg_resp.content.strip() if hasattr(msg_resp, 'content') else str(msg_resp).strip()
|
219
|
+
elif hasattr(microturn_response, 'content'):
|
220
|
+
# AIMessage directly
|
221
|
+
compliant_response = microturn_response.content.strip()
|
222
|
+
else:
|
223
|
+
compliant_response = str(microturn_response).strip()
|
224
|
+
|
225
|
+
# Parse compliant list - be lenient with parsing
|
226
|
+
compliant_names = []
|
227
|
+
for line in compliant_response.split('\n'):
|
228
|
+
line = line.strip()
|
229
|
+
# Skip empty lines, comments, and prompt echoes
|
230
|
+
if not line or line.startswith('#') or line.startswith('Your') or line.startswith('Compliant') or line.startswith('-') or line.startswith('OUTPUT'):
|
231
|
+
continue
|
232
|
+
# Remove markdown backticks if present
|
233
|
+
line = line.strip('`').strip()
|
234
|
+
# If line looks like a function name, add it
|
235
|
+
if any(c.isalnum() or c == '_' for c in line):
|
236
|
+
compliant_names.append(line)
|
237
|
+
|
238
|
+
return compliant_names
|
239
|
+
|
240
|
+
|
241
|
+
def modify_tool_calls_with_deadletter(
|
242
|
+
msg: Any,
|
243
|
+
compliant_names: List[str],
|
244
|
+
callback_handler: Any,
|
245
|
+
tool_result_cache: Optional[Dict[str, Any]] = None,
|
246
|
+
tool_sigs: Optional[Dict[int, str]] = None
|
247
|
+
) -> Tuple[int, List[str]]:
|
248
|
+
"""
|
249
|
+
Modify msg.tool_calls to redirect blocked calls to dasein_deadletter.
|
250
|
+
|
251
|
+
Supports transparent deduplication: if tool_result_cache contains a result
|
252
|
+
for a blocked call's signature, pass that cached result to dasein_deadletter
|
253
|
+
so it can return it seamlessly (agent never knows it was blocked).
|
254
|
+
|
255
|
+
Args:
|
256
|
+
msg: Message object with tool_calls attribute
|
257
|
+
compliant_names: List of function names that should be allowed
|
258
|
+
callback_handler: Callback handler for state tracking
|
259
|
+
tool_result_cache: Optional dict mapping signatures to cached results
|
260
|
+
tool_sigs: Optional dict mapping tool call index to signature
|
261
|
+
|
262
|
+
Returns:
|
263
|
+
Tuple of (blocked_count, blocked_call_names)
|
264
|
+
"""
|
265
|
+
if not msg or not hasattr(msg, 'tool_calls'):
|
266
|
+
return 0, []
|
267
|
+
|
268
|
+
original_tool_calls = msg.tool_calls if msg.tool_calls else []
|
269
|
+
modified_tool_calls = []
|
270
|
+
compliant_names_set = set(compliant_names)
|
271
|
+
blocked_calls = []
|
272
|
+
blocked_count = 0
|
273
|
+
tool_result_cache = tool_result_cache or {}
|
274
|
+
tool_sigs = tool_sigs or {}
|
275
|
+
|
276
|
+
# Process each tool call: keep compliant, rewrite blocked to dead-letter
|
277
|
+
for idx, tc in enumerate(original_tool_calls):
|
278
|
+
tc_name = tc.name if hasattr(tc, 'name') else tc.get('name', '')
|
279
|
+
|
280
|
+
# Idempotency: Never rewrite dasein_deadletter itself
|
281
|
+
if tc_name == 'dasein_deadletter':
|
282
|
+
modified_tool_calls.append(tc)
|
283
|
+
continue
|
284
|
+
|
285
|
+
if tc_name in compliant_names_set:
|
286
|
+
# PASS THROUGH: Compliant call
|
287
|
+
modified_tool_calls.append(tc)
|
288
|
+
compliant_names_set.remove(tc_name) # Use each name once
|
289
|
+
else:
|
290
|
+
# REDIRECT: Blocked call → rewrite to dasein_deadletter
|
291
|
+
|
292
|
+
# Create fingerprint of original args
|
293
|
+
try:
|
294
|
+
args_str = json.dumps(tc.get('args', {}) if isinstance(tc, dict) else getattr(tc, 'args', {}), sort_keys=True)
|
295
|
+
args_fingerprint = hashlib.sha256(args_str.encode()).hexdigest()[:16]
|
296
|
+
except:
|
297
|
+
args_fingerprint = "unknown"
|
298
|
+
|
299
|
+
# Estimate tokens saved (rough: 100-500 for Summary)
|
300
|
+
tokens_saved_est = 300 if 'summary' in tc_name.lower() else 50
|
301
|
+
|
302
|
+
# Check if we have a cached result for transparent deduplication
|
303
|
+
sig = tool_sigs.get(idx)
|
304
|
+
cached_result = tool_result_cache.get(sig) if sig else None
|
305
|
+
|
306
|
+
# Create new tool call for dasein_deadletter
|
307
|
+
deadletter_args = {
|
308
|
+
'original_tool': tc_name,
|
309
|
+
'original_args_fingerprint': args_fingerprint,
|
310
|
+
'reason_code': 'duplicate_detected' if cached_result else f"{tc_name}_blocked_by_policy",
|
311
|
+
'policy_trace_id': getattr(callback_handler, '_run_id', 'unknown'),
|
312
|
+
'tokens_saved_estimate': tokens_saved_est
|
313
|
+
}
|
314
|
+
|
315
|
+
# Add cached result for transparent deduplication
|
316
|
+
if cached_result is not None:
|
317
|
+
deadletter_args['cached_result'] = cached_result
|
318
|
+
|
319
|
+
deadletter_call = {
|
320
|
+
'name': 'dasein_deadletter',
|
321
|
+
'args': deadletter_args,
|
322
|
+
'id': tc.get('id') if isinstance(tc, dict) else getattr(tc, 'id', f"deadletter_{blocked_count}"),
|
323
|
+
'type': 'tool_call'
|
324
|
+
}
|
325
|
+
|
326
|
+
# Convert dict to ToolCall object if needed
|
327
|
+
if hasattr(tc, '__class__') and not isinstance(tc, dict):
|
328
|
+
# Try to create same type as original
|
329
|
+
try:
|
330
|
+
from langchain_core.messages import tool_call
|
331
|
+
deadletter_call = tool_call.ToolCall(**deadletter_call)
|
332
|
+
except:
|
333
|
+
pass # Keep as dict if conversion fails
|
334
|
+
|
335
|
+
modified_tool_calls.append(deadletter_call)
|
336
|
+
blocked_calls.append(tc_name)
|
337
|
+
blocked_count += 1
|
338
|
+
|
339
|
+
# Update with modified list (compliant + redirected)
|
340
|
+
msg.tool_calls = modified_tool_calls
|
341
|
+
|
342
|
+
return blocked_count, blocked_calls
|
343
|
+
|
344
|
+
|
345
|
+
def update_callback_state(callback_handler: Any, blocked_calls: List[str]) -> None:
|
346
|
+
"""
|
347
|
+
Update callback handler state to reflect redirected calls.
|
348
|
+
|
349
|
+
Args:
|
350
|
+
callback_handler: DaseinCallbackHandler with _function_calls_made
|
351
|
+
blocked_calls: List of function names that were blocked/redirected
|
352
|
+
"""
|
353
|
+
if not callback_handler or not hasattr(callback_handler, '_function_calls_made') or not blocked_calls:
|
354
|
+
return
|
355
|
+
|
356
|
+
if 'dasein_deadletter' not in callback_handler._function_calls_made:
|
357
|
+
callback_handler._function_calls_made['dasein_deadletter'] = []
|
358
|
+
|
359
|
+
for blocked_name in blocked_calls:
|
360
|
+
callback_handler._function_calls_made['dasein_deadletter'].append({
|
361
|
+
'original_tool': blocked_name,
|
362
|
+
'blocked_by': 'microturn'
|
363
|
+
})
|
364
|
+
|
365
|
+
|
366
|
+
async def run_microturn_enforcement(
|
367
|
+
result: Any,
|
368
|
+
callback_handler: Any,
|
369
|
+
self_llm: Any,
|
370
|
+
patch_depth: Any,
|
371
|
+
use_llm_microturn: bool = False
|
372
|
+
) -> bool:
|
373
|
+
"""
|
374
|
+
Main microturn enforcement logic - extracted from api.py for cleaner organization.
|
375
|
+
|
376
|
+
Args:
|
377
|
+
result: LLM result to potentially modify
|
378
|
+
callback_handler: DaseinCallbackHandler with state
|
379
|
+
self_llm: The LLM instance (for microturn LLM call if needed)
|
380
|
+
patch_depth: Thread-local object with seen_tool_signatures, tool_result_cache
|
381
|
+
use_llm_microturn: Whether to use LLM-based microturn (default False, uses deterministic only)
|
382
|
+
|
383
|
+
Returns:
|
384
|
+
True if enforcement was applied, False if skipped
|
385
|
+
"""
|
386
|
+
try:
|
387
|
+
# Extract proposed function calls
|
388
|
+
proposed_func_names, msg = extract_proposed_function_calls(result)
|
389
|
+
|
390
|
+
if not proposed_func_names:
|
391
|
+
return False
|
392
|
+
|
393
|
+
if all(name == 'dasein_deadletter' for name in proposed_func_names):
|
394
|
+
return False # Already processed
|
395
|
+
|
396
|
+
# Extract tool call signatures for duplicate detection
|
397
|
+
tool_sigs = {}
|
398
|
+
duplicates = []
|
399
|
+
if msg:
|
400
|
+
tool_sigs = extract_tool_call_signatures(msg)
|
401
|
+
|
402
|
+
# Initialize signature tracking and result cache
|
403
|
+
if not hasattr(patch_depth, 'seen_tool_signatures'):
|
404
|
+
patch_depth.seen_tool_signatures = set()
|
405
|
+
if not hasattr(patch_depth, 'tool_result_cache'):
|
406
|
+
patch_depth.tool_result_cache = {}
|
407
|
+
|
408
|
+
# Detect duplicates (immediate fanout within this response OR across turns)
|
409
|
+
seen_in_response = set()
|
410
|
+
for idx, sig in tool_sigs.items():
|
411
|
+
if sig in seen_in_response or sig in patch_depth.seen_tool_signatures:
|
412
|
+
# Duplicate detected
|
413
|
+
duplicates.append((idx, sig))
|
414
|
+
print(f"[DASEIN][MICROTURN] 🔄 Duplicate detected: {sig}")
|
415
|
+
else:
|
416
|
+
# First occurrence
|
417
|
+
seen_in_response.add(sig)
|
418
|
+
|
419
|
+
# DETERMINISTIC DUPLICATE BLOCKING (always on)
|
420
|
+
if duplicates and msg:
|
421
|
+
print(f"[DASEIN][MICROTURN] Blocking {len(duplicates)} duplicate call(s)")
|
422
|
+
blocked_count, blocked_calls = modify_tool_calls_with_deadletter(
|
423
|
+
msg,
|
424
|
+
[], # No LLM-based compliant names, just mark duplicates
|
425
|
+
duplicates,
|
426
|
+
tool_sigs,
|
427
|
+
patch_depth.tool_result_cache
|
428
|
+
)
|
429
|
+
|
430
|
+
if blocked_count > 0:
|
431
|
+
update_callback_state(callback_handler, blocked_calls)
|
432
|
+
print(f"[DASEIN][MICROTURN] ✅ Blocked {blocked_count} duplicate(s)")
|
433
|
+
return True
|
434
|
+
|
435
|
+
# LLM-BASED MICROTURN (behind flag)
|
436
|
+
if use_llm_microturn:
|
437
|
+
# Build prompt for microturn LLM
|
438
|
+
full_state = build_execution_state_summary(callback_handler)
|
439
|
+
rule = "ANTI-FANOUT RULE: Only allow 1 Summary call per search. Other tools should pass through."
|
440
|
+
microturn_prompt = create_microturn_prompt(rule, full_state, proposed_func_names)
|
441
|
+
|
442
|
+
# Call microturn LLM (stripped kwargs to force text response)
|
443
|
+
from langchain_core.messages import HumanMessage
|
444
|
+
if hasattr(self_llm, 'ainvoke'):
|
445
|
+
microturn_response = await self_llm.ainvoke([HumanMessage(content=microturn_prompt)])
|
446
|
+
else:
|
447
|
+
microturn_response = self_llm.invoke([HumanMessage(content=microturn_prompt)])
|
448
|
+
|
449
|
+
# Parse response
|
450
|
+
compliant_names = parse_microturn_response(microturn_response)
|
451
|
+
|
452
|
+
# Modify tool calls if needed
|
453
|
+
if msg:
|
454
|
+
blocked_count, blocked_calls = modify_tool_calls_with_deadletter(
|
455
|
+
msg,
|
456
|
+
compliant_names,
|
457
|
+
[], # No duplicates here, handled above
|
458
|
+
tool_sigs,
|
459
|
+
patch_depth.tool_result_cache
|
460
|
+
)
|
461
|
+
|
462
|
+
if blocked_count > 0:
|
463
|
+
update_callback_state(callback_handler, blocked_calls)
|
464
|
+
print(f"[DASEIN][MICROTURN] ✅ LLM blocked {blocked_count} call(s): {blocked_calls}")
|
465
|
+
return True
|
466
|
+
|
467
|
+
# No enforcement applied
|
468
|
+
return False
|
469
|
+
|
470
|
+
except Exception as e:
|
471
|
+
print(f"[DASEIN][MICROTURN] ⚠️ Error during enforcement: {e}")
|
472
|
+
import traceback
|
473
|
+
traceback.print_exc()
|
474
|
+
return False
|
475
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright 2021 ExplosionAI GmbH
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
5
|
+
the Software without restriction, including without limitation the rights to
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
8
|
+
so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# OntoNotes 5
|
2
|
+
|
3
|
+
* Author: Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston
|
4
|
+
* URL: https://catalog.ldc.upenn.edu/LDC2013T19
|
5
|
+
* License: commercial (licensed by Explosion)
|
6
|
+
|
7
|
+
```
|
8
|
+
```
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
# ClearNLP Constituent-to-Dependency Conversion
|
14
|
+
|
15
|
+
* Author: Emory University
|
16
|
+
* URL: https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md
|
17
|
+
* License: Citation provided for reference, no code packaged with model
|
18
|
+
|
19
|
+
```
|
20
|
+
```
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
# WordNet 3.0
|
26
|
+
|
27
|
+
* Author: Princeton University
|
28
|
+
* URL: https://wordnet.princeton.edu/
|
29
|
+
* License: WordNet 3.0 License
|
30
|
+
|
31
|
+
```
|
32
|
+
WordNet Release 3.0
|
33
|
+
|
34
|
+
This software and database is being provided to you, the LICENSEE, by
|
35
|
+
Princeton University under the following license. By obtaining, using
|
36
|
+
and/or copying this software and database, you agree that you have
|
37
|
+
read, understood, and will comply with these terms and conditions.:
|
38
|
+
|
39
|
+
Permission to use, copy, modify and distribute this software and
|
40
|
+
database and its documentation for any purpose and without fee or
|
41
|
+
royalty is hereby granted, provided that you agree to comply with
|
42
|
+
the following copyright notice and statements, including the disclaimer,
|
43
|
+
and that the same appear on ALL copies of the software, database and
|
44
|
+
documentation, including modifications that you make for internal
|
45
|
+
use or for distribution.
|
46
|
+
|
47
|
+
WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
|
48
|
+
|
49
|
+
THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
|
50
|
+
UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
|
51
|
+
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
|
52
|
+
UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
|
53
|
+
ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
|
54
|
+
OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
|
55
|
+
INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
|
56
|
+
OTHER RIGHTS.
|
57
|
+
|
58
|
+
The name of Princeton University or Princeton may not be used in
|
59
|
+
advertising or publicity pertaining to distribution of the software
|
60
|
+
and/or database. Title to copyright in this software, database and
|
61
|
+
any associated documentation shall at all times remain with
|
62
|
+
Princeton University and LICENSEE agrees to preserve same.```
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
### Details: https://spacy.io/models/en#en_core_web_sm
|
2
|
+
|
3
|
+
English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
|
4
|
+
|
5
|
+
| Feature | Description |
|
6
|
+
| --- | --- |
|
7
|
+
| **Name** | `en_core_web_sm` |
|
8
|
+
| **Version** | `3.7.1` |
|
9
|
+
| **spaCy** | `>=3.7.2,<3.8.0` |
|
10
|
+
| **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
11
|
+
| **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
12
|
+
| **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
|
13
|
+
| **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University) |
|
14
|
+
| **License** | `MIT` |
|
15
|
+
| **Author** | [Explosion](https://explosion.ai) |
|
16
|
+
|
17
|
+
### Label Scheme
|
18
|
+
|
19
|
+
<details>
|
20
|
+
|
21
|
+
<summary>View label scheme (113 labels for 3 components)</summary>
|
22
|
+
|
23
|
+
| Component | Labels |
|
24
|
+
| --- | --- |
|
25
|
+
| **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
|
26
|
+
| **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
|
27
|
+
| **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
|
28
|
+
|
29
|
+
</details>
|
30
|
+
|
31
|
+
### Accuracy
|
32
|
+
|
33
|
+
| Type | Score |
|
34
|
+
| --- | --- |
|
35
|
+
| `TOKEN_ACC` | 99.86 |
|
36
|
+
| `TOKEN_P` | 99.57 |
|
37
|
+
| `TOKEN_R` | 99.58 |
|
38
|
+
| `TOKEN_F` | 99.57 |
|
39
|
+
| `TAG_ACC` | 97.25 |
|
40
|
+
| `SENTS_P` | 92.02 |
|
41
|
+
| `SENTS_R` | 89.21 |
|
42
|
+
| `SENTS_F` | 90.59 |
|
43
|
+
| `DEP_UAS` | 91.75 |
|
44
|
+
| `DEP_LAS` | 89.87 |
|
45
|
+
| `ENTS_P` | 84.55 |
|
46
|
+
| `ENTS_R` | 84.57 |
|
47
|
+
| `ENTS_F` | 84.56 |
|