dasein-core 0.2.7__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dasein/api.py +1144 -133
  2. dasein/capture.py +2325 -1803
  3. dasein/microturn.py +475 -0
  4. dasein/models/en_core_web_sm/en_core_web_sm/__init__.py +10 -0
  5. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSE +19 -0
  6. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/LICENSES_SOURCES +66 -0
  7. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/README.md +47 -0
  8. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/accuracy.json +330 -0
  9. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/attribute_ruler/patterns +0 -0
  10. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/config.cfg +269 -0
  11. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/lemmatizer/lookups/lookups.bin +1 -0
  12. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/meta.json +521 -0
  13. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/cfg +13 -0
  14. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/model +0 -0
  15. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/ner/moves +1 -0
  16. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/cfg +13 -0
  17. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/model +0 -0
  18. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/parser/moves +1 -0
  19. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/cfg +3 -0
  20. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/senter/model +0 -0
  21. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/cfg +57 -0
  22. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tagger/model +0 -0
  23. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/cfg +3 -0
  24. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tok2vec/model +0 -0
  25. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/tokenizer +3 -0
  26. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/key2row +1 -0
  27. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/lookups.bin +0 -0
  28. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/strings.json +84782 -0
  29. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors +0 -0
  30. dasein/models/en_core_web_sm/en_core_web_sm/en_core_web_sm-3.7.1/vocab/vectors.cfg +3 -0
  31. dasein/models/en_core_web_sm/en_core_web_sm/meta.json +521 -0
  32. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSE +19 -0
  33. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/LICENSES_SOURCES +66 -0
  34. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/METADATA +59 -0
  35. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/RECORD +35 -0
  36. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/WHEEL +5 -0
  37. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/entry_points.txt +2 -0
  38. dasein/models/en_core_web_sm/en_core_web_sm-3.7.1.dist-info/top_level.txt +1 -0
  39. dasein/pipecleaner.py +1917 -0
  40. dasein/wrappers.py +315 -0
  41. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/METADATA +4 -1
  42. dasein_core-0.2.10.dist-info/RECORD +59 -0
  43. dasein_core-0.2.7.dist-info/RECORD +0 -21
  44. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/WHEEL +0 -0
  45. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/licenses/LICENSE +0 -0
  46. {dasein_core-0.2.7.dist-info → dasein_core-0.2.10.dist-info}/top_level.txt +0 -0
dasein/microturn.py ADDED
@@ -0,0 +1,475 @@
1
+ """
2
+ Microturn enforcement system for anti-fanout rules.
3
+
4
+ This module provides real-time LLM call interception and modification
5
+ to enforce fanout prevention policies (e.g., "only 1 Summary per search").
6
+ """
7
+
8
+ from typing import List, Dict, Tuple, Any, Optional, Set
9
+ import json
10
+ import hashlib
11
+
12
+
13
+ def has_tool_end_rules(callback_handler: Any) -> bool:
14
+ """
15
+ Check if any tool_end rules exist in the selected rules.
16
+
17
+ Args:
18
+ callback_handler: Callback handler with _selected_rules
19
+
20
+ Returns:
21
+ True if at least one tool_end rule exists, False otherwise
22
+ """
23
+ if not callback_handler or not hasattr(callback_handler, '_selected_rules'):
24
+ return False
25
+
26
+ rules = callback_handler._selected_rules or []
27
+ for rule in rules:
28
+ # Handle tuple format (rule, metadata)
29
+ if isinstance(rule, tuple) and len(rule) >= 1:
30
+ rule = rule[0]
31
+
32
+ # Check target_step_type
33
+ if isinstance(rule, dict):
34
+ target = rule.get('target_step_type', '')
35
+ else:
36
+ target = getattr(rule, 'target_step_type', '')
37
+
38
+ if target == 'tool_end':
39
+ return True
40
+
41
+ return False
42
+
43
+
44
+ def extract_tool_call_signatures(msg: Any) -> Dict[str, str]:
45
+ """
46
+ Extract tool call signatures (name + argument hash) from a message.
47
+
48
+ Args:
49
+ msg: Message object with tool_calls
50
+
51
+ Returns:
52
+ Dict mapping tool call index to signature string (e.g., "Summary_abc123")
53
+ """
54
+ signatures = {}
55
+
56
+ if not msg or not hasattr(msg, 'tool_calls') or not msg.tool_calls:
57
+ return signatures
58
+
59
+ for idx, tc in enumerate(msg.tool_calls):
60
+ tc_name = tc.name if hasattr(tc, 'name') else tc.get('name', '')
61
+
62
+ # Extract arguments
63
+ if hasattr(tc, 'args'):
64
+ args = tc.args
65
+ elif isinstance(tc, dict) and 'args' in tc:
66
+ args = tc['args']
67
+ else:
68
+ args = {}
69
+
70
+ # Create content hash from arguments
71
+ try:
72
+ # Serialize args to stable JSON string
73
+ args_str = json.dumps(args, sort_keys=True, default=str)
74
+ # Create short hash (first 8 chars of SHA256)
75
+ content_hash = hashlib.sha256(args_str.encode()).hexdigest()[:8]
76
+ signature = f"{tc_name}_{content_hash}"
77
+ except:
78
+ # Fallback if serialization fails
79
+ signature = f"{tc_name}_unknown"
80
+
81
+ signatures[idx] = signature
82
+
83
+ return signatures
84
+
85
+
86
+ def extract_proposed_function_calls(result: Any) -> Tuple[List[str], Optional[Any]]:
87
+ """
88
+ Extract proposed function calls from LLM response.
89
+
90
+ Args:
91
+ result: LLM response (AIMessage, ChatResult, or LLMResult)
92
+
93
+ Returns:
94
+ Tuple of (list of function names, message object)
95
+ """
96
+ proposed_func_names = []
97
+ msg = None
98
+
99
+ # Case 1: Result is AIMessage directly
100
+ if hasattr(result, 'tool_calls') or hasattr(result, 'additional_kwargs'):
101
+ msg = result
102
+ # Case 2: Result has generations
103
+ elif hasattr(result, 'generations') and result.generations:
104
+ first_gen = result.generations[0]
105
+ # Case 2a: Generation has message
106
+ if hasattr(first_gen, 'message'):
107
+ msg = first_gen.message
108
+ # Case 2b: Generation is a list (contains message)
109
+ elif isinstance(first_gen, list) and len(first_gen) > 0:
110
+ if hasattr(first_gen[0], 'message'):
111
+ msg = first_gen[0].message
112
+
113
+ # Extract tool calls from message
114
+ if msg:
115
+ # CRITICAL: Extract from tool_calls OR function_call, NOT BOTH (prevents duplicates)
116
+ # Prefer tool_calls (modern) if available
117
+ if hasattr(msg, 'tool_calls') and msg.tool_calls:
118
+ for tc in msg.tool_calls:
119
+ if hasattr(tc, 'name'):
120
+ proposed_func_names.append(tc.name)
121
+ elif isinstance(tc, dict) and 'name' in tc:
122
+ proposed_func_names.append(tc['name'])
123
+ elif hasattr(msg, 'additional_kwargs'):
124
+ # Only check function_call if tool_calls is empty (fallback)
125
+ func_call = msg.additional_kwargs.get('function_call')
126
+ if func_call and isinstance(func_call, dict) and 'name' in func_call:
127
+ proposed_func_names.append(func_call['name'])
128
+
129
+ return proposed_func_names, msg
130
+
131
+
132
+ def build_execution_state_summary(callback_handler: Any) -> str:
133
+ """
134
+ Build a human-readable summary of all function calls made so far.
135
+
136
+ Args:
137
+ callback_handler: DaseinCallbackHandler with _function_calls_made
138
+
139
+ Returns:
140
+ Formatted string of execution state
141
+ """
142
+ state_summary = []
143
+ if hasattr(callback_handler, '_function_calls_made') and callback_handler._function_calls_made:
144
+ for fname in sorted(callback_handler._function_calls_made.keys()):
145
+ count = len(callback_handler._function_calls_made[fname])
146
+ if count > 0:
147
+ state_summary.append(f" • {fname}: {count}x")
148
+
149
+ return "EXECUTION STATE (all calls made so far):\n" + "\n".join(state_summary) if state_summary else "EXECUTION STATE: No calls yet"
150
+
151
+
152
+ def create_microturn_prompt(test_rule: str, full_state: str, proposed_func_names: List[str]) -> str:
153
+ """
154
+ Create the prompt for the microturn LLM to decide on compliant calls.
155
+
156
+ Args:
157
+ test_rule: The rule to enforce (e.g., "max 1 Summary per search")
158
+ full_state: Execution state summary
159
+ proposed_func_names: List of proposed function names
160
+
161
+ Returns:
162
+ Formatted prompt string
163
+ """
164
+ proposed_calls_str = "\n".join([f" {i+1}. {name}" for i, name in enumerate(proposed_func_names)])
165
+
166
+ return f"""You are an anti-fanout rule enforcement system for AI agents.
167
+
168
+ RULE TO ENFORCE:
169
+ {test_rule}
170
+
171
+ {full_state}
172
+
173
+ PROPOSED TOOL CALLS (from LLM):
174
+ {proposed_calls_str}
175
+
176
+ TASK: Check if the proposed calls violate the rule.
177
+ - The rule is ONLY about Summary calls relative to tavily_search calls
178
+ - ALL other tool calls (ResearchQuestion, think_tool, ConductResearch, etc.) should PASS THROUGH unchanged
179
+ - Only remove Summary calls if they violate the ratio
180
+
181
+ EXAMPLES:
182
+ - State: tavily_search: 1x | Proposed: [ResearchQuestion] → COMPLIANT, return: ResearchQuestion
183
+ - State: tavily_search: 1x | Proposed: [Summary, Summary] → VIOLATION, return: Summary (keep only 1)
184
+ - State: tavily_search: 1x, Summary: 1x | Proposed: [Summary] → VIOLATION, return: (empty, already have 1)
185
+ - State: No calls yet | Proposed: [ResearchQuestion, think_tool] → COMPLIANT, return: ResearchQuestion, think_tool
186
+
187
+ OUTPUT FORMAT: List the compliant function names, one per line. If all are compliant, return ALL of them.
188
+
189
+ Compliant list:"""
190
+
191
+
192
+ def parse_microturn_response(microturn_response: Any) -> List[str]:
193
+ """
194
+ Parse the microturn LLM response to extract compliant function names.
195
+
196
+ Args:
197
+ microturn_response: LLM response object
198
+
199
+ Returns:
200
+ List of compliant function names (may be empty for total block)
201
+ """
202
+ compliant_response = ""
203
+
204
+ if hasattr(microturn_response, 'generations') and microturn_response.generations:
205
+ # LLMResult with generations
206
+ first_gen = microturn_response.generations[0]
207
+
208
+ # generations[0] can be a list OR a ChatGeneration
209
+ if isinstance(first_gen, list) and len(first_gen) > 0:
210
+ # It's a list - get first item
211
+ msg_resp = first_gen[0].message if hasattr(first_gen[0], 'message') else first_gen[0]
212
+ elif hasattr(first_gen, 'message'):
213
+ # It's a ChatGeneration
214
+ msg_resp = first_gen.message
215
+ else:
216
+ msg_resp = first_gen
217
+
218
+ compliant_response = msg_resp.content.strip() if hasattr(msg_resp, 'content') else str(msg_resp).strip()
219
+ elif hasattr(microturn_response, 'content'):
220
+ # AIMessage directly
221
+ compliant_response = microturn_response.content.strip()
222
+ else:
223
+ compliant_response = str(microturn_response).strip()
224
+
225
+ # Parse compliant list - be lenient with parsing
226
+ compliant_names = []
227
+ for line in compliant_response.split('\n'):
228
+ line = line.strip()
229
+ # Skip empty lines, comments, and prompt echoes
230
+ if not line or line.startswith('#') or line.startswith('Your') or line.startswith('Compliant') or line.startswith('-') or line.startswith('OUTPUT'):
231
+ continue
232
+ # Remove markdown backticks if present
233
+ line = line.strip('`').strip()
234
+ # If line looks like a function name, add it
235
+ if any(c.isalnum() or c == '_' for c in line):
236
+ compliant_names.append(line)
237
+
238
+ return compliant_names
239
+
240
+
241
+ def modify_tool_calls_with_deadletter(
242
+ msg: Any,
243
+ compliant_names: List[str],
244
+ callback_handler: Any,
245
+ tool_result_cache: Optional[Dict[str, Any]] = None,
246
+ tool_sigs: Optional[Dict[int, str]] = None
247
+ ) -> Tuple[int, List[str]]:
248
+ """
249
+ Modify msg.tool_calls to redirect blocked calls to dasein_deadletter.
250
+
251
+ Supports transparent deduplication: if tool_result_cache contains a result
252
+ for a blocked call's signature, pass that cached result to dasein_deadletter
253
+ so it can return it seamlessly (agent never knows it was blocked).
254
+
255
+ Args:
256
+ msg: Message object with tool_calls attribute
257
+ compliant_names: List of function names that should be allowed
258
+ callback_handler: Callback handler for state tracking
259
+ tool_result_cache: Optional dict mapping signatures to cached results
260
+ tool_sigs: Optional dict mapping tool call index to signature
261
+
262
+ Returns:
263
+ Tuple of (blocked_count, blocked_call_names)
264
+ """
265
+ if not msg or not hasattr(msg, 'tool_calls'):
266
+ return 0, []
267
+
268
+ original_tool_calls = msg.tool_calls if msg.tool_calls else []
269
+ modified_tool_calls = []
270
+ compliant_names_set = set(compliant_names)
271
+ blocked_calls = []
272
+ blocked_count = 0
273
+ tool_result_cache = tool_result_cache or {}
274
+ tool_sigs = tool_sigs or {}
275
+
276
+ # Process each tool call: keep compliant, rewrite blocked to dead-letter
277
+ for idx, tc in enumerate(original_tool_calls):
278
+ tc_name = tc.name if hasattr(tc, 'name') else tc.get('name', '')
279
+
280
+ # Idempotency: Never rewrite dasein_deadletter itself
281
+ if tc_name == 'dasein_deadletter':
282
+ modified_tool_calls.append(tc)
283
+ continue
284
+
285
+ if tc_name in compliant_names_set:
286
+ # PASS THROUGH: Compliant call
287
+ modified_tool_calls.append(tc)
288
+ compliant_names_set.remove(tc_name) # Use each name once
289
+ else:
290
+ # REDIRECT: Blocked call → rewrite to dasein_deadletter
291
+
292
+ # Create fingerprint of original args
293
+ try:
294
+ args_str = json.dumps(tc.get('args', {}) if isinstance(tc, dict) else getattr(tc, 'args', {}), sort_keys=True)
295
+ args_fingerprint = hashlib.sha256(args_str.encode()).hexdigest()[:16]
296
+ except:
297
+ args_fingerprint = "unknown"
298
+
299
+ # Estimate tokens saved (rough: 100-500 for Summary)
300
+ tokens_saved_est = 300 if 'summary' in tc_name.lower() else 50
301
+
302
+ # Check if we have a cached result for transparent deduplication
303
+ sig = tool_sigs.get(idx)
304
+ cached_result = tool_result_cache.get(sig) if sig else None
305
+
306
+ # Create new tool call for dasein_deadletter
307
+ deadletter_args = {
308
+ 'original_tool': tc_name,
309
+ 'original_args_fingerprint': args_fingerprint,
310
+ 'reason_code': 'duplicate_detected' if cached_result else f"{tc_name}_blocked_by_policy",
311
+ 'policy_trace_id': getattr(callback_handler, '_run_id', 'unknown'),
312
+ 'tokens_saved_estimate': tokens_saved_est
313
+ }
314
+
315
+ # Add cached result for transparent deduplication
316
+ if cached_result is not None:
317
+ deadletter_args['cached_result'] = cached_result
318
+
319
+ deadletter_call = {
320
+ 'name': 'dasein_deadletter',
321
+ 'args': deadletter_args,
322
+ 'id': tc.get('id') if isinstance(tc, dict) else getattr(tc, 'id', f"deadletter_{blocked_count}"),
323
+ 'type': 'tool_call'
324
+ }
325
+
326
+ # Convert dict to ToolCall object if needed
327
+ if hasattr(tc, '__class__') and not isinstance(tc, dict):
328
+ # Try to create same type as original
329
+ try:
330
+ from langchain_core.messages import tool_call
331
+ deadletter_call = tool_call.ToolCall(**deadletter_call)
332
+ except:
333
+ pass # Keep as dict if conversion fails
334
+
335
+ modified_tool_calls.append(deadletter_call)
336
+ blocked_calls.append(tc_name)
337
+ blocked_count += 1
338
+
339
+ # Update with modified list (compliant + redirected)
340
+ msg.tool_calls = modified_tool_calls
341
+
342
+ return blocked_count, blocked_calls
343
+
344
+
345
+ def update_callback_state(callback_handler: Any, blocked_calls: List[str]) -> None:
346
+ """
347
+ Update callback handler state to reflect redirected calls.
348
+
349
+ Args:
350
+ callback_handler: DaseinCallbackHandler with _function_calls_made
351
+ blocked_calls: List of function names that were blocked/redirected
352
+ """
353
+ if not callback_handler or not hasattr(callback_handler, '_function_calls_made') or not blocked_calls:
354
+ return
355
+
356
+ if 'dasein_deadletter' not in callback_handler._function_calls_made:
357
+ callback_handler._function_calls_made['dasein_deadletter'] = []
358
+
359
+ for blocked_name in blocked_calls:
360
+ callback_handler._function_calls_made['dasein_deadletter'].append({
361
+ 'original_tool': blocked_name,
362
+ 'blocked_by': 'microturn'
363
+ })
364
+
365
+
366
+ async def run_microturn_enforcement(
367
+ result: Any,
368
+ callback_handler: Any,
369
+ self_llm: Any,
370
+ patch_depth: Any,
371
+ use_llm_microturn: bool = False
372
+ ) -> bool:
373
+ """
374
+ Main microturn enforcement logic - extracted from api.py for cleaner organization.
375
+
376
+ Args:
377
+ result: LLM result to potentially modify
378
+ callback_handler: DaseinCallbackHandler with state
379
+ self_llm: The LLM instance (for microturn LLM call if needed)
380
+ patch_depth: Thread-local object with seen_tool_signatures, tool_result_cache
381
+ use_llm_microturn: Whether to use LLM-based microturn (default False, uses deterministic only)
382
+
383
+ Returns:
384
+ True if enforcement was applied, False if skipped
385
+ """
386
+ try:
387
+ # Extract proposed function calls
388
+ proposed_func_names, msg = extract_proposed_function_calls(result)
389
+
390
+ if not proposed_func_names:
391
+ return False
392
+
393
+ if all(name == 'dasein_deadletter' for name in proposed_func_names):
394
+ return False # Already processed
395
+
396
+ # Extract tool call signatures for duplicate detection
397
+ tool_sigs = {}
398
+ duplicates = []
399
+ if msg:
400
+ tool_sigs = extract_tool_call_signatures(msg)
401
+
402
+ # Initialize signature tracking and result cache
403
+ if not hasattr(patch_depth, 'seen_tool_signatures'):
404
+ patch_depth.seen_tool_signatures = set()
405
+ if not hasattr(patch_depth, 'tool_result_cache'):
406
+ patch_depth.tool_result_cache = {}
407
+
408
+ # Detect duplicates (immediate fanout within this response OR across turns)
409
+ seen_in_response = set()
410
+ for idx, sig in tool_sigs.items():
411
+ if sig in seen_in_response or sig in patch_depth.seen_tool_signatures:
412
+ # Duplicate detected
413
+ duplicates.append((idx, sig))
414
+ print(f"[DASEIN][MICROTURN] 🔄 Duplicate detected: {sig}")
415
+ else:
416
+ # First occurrence
417
+ seen_in_response.add(sig)
418
+
419
+ # DETERMINISTIC DUPLICATE BLOCKING (always on)
420
+ if duplicates and msg:
421
+ print(f"[DASEIN][MICROTURN] Blocking {len(duplicates)} duplicate call(s)")
422
+ blocked_count, blocked_calls = modify_tool_calls_with_deadletter(
423
+ msg,
424
+ [], # No LLM-based compliant names, just mark duplicates
425
+ duplicates,
426
+ tool_sigs,
427
+ patch_depth.tool_result_cache
428
+ )
429
+
430
+ if blocked_count > 0:
431
+ update_callback_state(callback_handler, blocked_calls)
432
+ print(f"[DASEIN][MICROTURN] ✅ Blocked {blocked_count} duplicate(s)")
433
+ return True
434
+
435
+ # LLM-BASED MICROTURN (behind flag)
436
+ if use_llm_microturn:
437
+ # Build prompt for microturn LLM
438
+ full_state = build_execution_state_summary(callback_handler)
439
+ rule = "ANTI-FANOUT RULE: Only allow 1 Summary call per search. Other tools should pass through."
440
+ microturn_prompt = create_microturn_prompt(rule, full_state, proposed_func_names)
441
+
442
+ # Call microturn LLM (stripped kwargs to force text response)
443
+ from langchain_core.messages import HumanMessage
444
+ if hasattr(self_llm, 'ainvoke'):
445
+ microturn_response = await self_llm.ainvoke([HumanMessage(content=microturn_prompt)])
446
+ else:
447
+ microturn_response = self_llm.invoke([HumanMessage(content=microturn_prompt)])
448
+
449
+ # Parse response
450
+ compliant_names = parse_microturn_response(microturn_response)
451
+
452
+ # Modify tool calls if needed
453
+ if msg:
454
+ blocked_count, blocked_calls = modify_tool_calls_with_deadletter(
455
+ msg,
456
+ compliant_names,
457
+ [], # No duplicates here, handled above
458
+ tool_sigs,
459
+ patch_depth.tool_result_cache
460
+ )
461
+
462
+ if blocked_count > 0:
463
+ update_callback_state(callback_handler, blocked_calls)
464
+ print(f"[DASEIN][MICROTURN] ✅ LLM blocked {blocked_count} call(s): {blocked_calls}")
465
+ return True
466
+
467
+ # No enforcement applied
468
+ return False
469
+
470
+ except Exception as e:
471
+ print(f"[DASEIN][MICROTURN] ⚠️ Error during enforcement: {e}")
472
+ import traceback
473
+ traceback.print_exc()
474
+ return False
475
+
@@ -0,0 +1,10 @@
1
+ from pathlib import Path
2
+ from spacy.util import load_model_from_init_py, get_model_meta
3
+
4
+
5
+
6
+ __version__ = get_model_meta(Path(__file__).parent)['version']
7
+
8
+
9
+ def load(**overrides):
10
+ return load_model_from_init_py(__file__, **overrides)
@@ -0,0 +1,19 @@
1
+ Copyright 2021 ExplosionAI GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
@@ -0,0 +1,66 @@
1
+ # OntoNotes 5
2
+
3
+ * Author: Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston
4
+ * URL: https://catalog.ldc.upenn.edu/LDC2013T19
5
+ * License: commercial (licensed by Explosion)
6
+
7
+ ```
8
+ ```
9
+
10
+
11
+
12
+
13
+ # ClearNLP Constituent-to-Dependency Conversion
14
+
15
+ * Author: Emory University
16
+ * URL: https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md
17
+ * License: Citation provided for reference, no code packaged with model
18
+
19
+ ```
20
+ ```
21
+
22
+
23
+
24
+
25
+ # WordNet 3.0
26
+
27
+ * Author: Princeton University
28
+ * URL: https://wordnet.princeton.edu/
29
+ * License: WordNet 3.0 License
30
+
31
+ ```
32
+ WordNet Release 3.0
33
+
34
+ This software and database is being provided to you, the LICENSEE, by
35
+ Princeton University under the following license. By obtaining, using
36
+ and/or copying this software and database, you agree that you have
37
+ read, understood, and will comply with these terms and conditions.:
38
+
39
+ Permission to use, copy, modify and distribute this software and
40
+ database and its documentation for any purpose and without fee or
41
+ royalty is hereby granted, provided that you agree to comply with
42
+ the following copyright notice and statements, including the disclaimer,
43
+ and that the same appear on ALL copies of the software, database and
44
+ documentation, including modifications that you make for internal
45
+ use or for distribution.
46
+
47
+ WordNet 3.0 Copyright 2006 by Princeton University. All rights reserved.
48
+
49
+ THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND PRINCETON
50
+ UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
51
+ IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON
52
+ UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-
53
+ ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
54
+ OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT
55
+ INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR
56
+ OTHER RIGHTS.
57
+
58
+ The name of Princeton University or Princeton may not be used in
59
+ advertising or publicity pertaining to distribution of the software
60
+ and/or database. Title to copyright in this software, database and
61
+ any associated documentation shall at all times remain with
62
+ Princeton University and LICENSEE agrees to preserve same.```
63
+
64
+
65
+
66
+
@@ -0,0 +1,47 @@
1
+ ### Details: https://spacy.io/models/en#en_core_web_sm
2
+
3
+ English pipeline optimized for CPU. Components: tok2vec, tagger, parser, senter, ner, attribute_ruler, lemmatizer.
4
+
5
+ | Feature | Description |
6
+ | --- | --- |
7
+ | **Name** | `en_core_web_sm` |
8
+ | **Version** | `3.7.1` |
9
+ | **spaCy** | `>=3.7.2,<3.8.0` |
10
+ | **Default Pipeline** | `tok2vec`, `tagger`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
11
+ | **Components** | `tok2vec`, `tagger`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
12
+ | **Vectors** | 0 keys, 0 unique vectors (0 dimensions) |
13
+ | **Sources** | [OntoNotes 5](https://catalog.ldc.upenn.edu/LDC2013T19) (Ralph Weischedel, Martha Palmer, Mitchell Marcus, Eduard Hovy, Sameer Pradhan, Lance Ramshaw, Nianwen Xue, Ann Taylor, Jeff Kaufman, Michelle Franchini, Mohammed El-Bachouti, Robert Belvin, Ann Houston)<br />[ClearNLP Constituent-to-Dependency Conversion](https://github.com/clir/clearnlp-guidelines/blob/master/md/components/dependency_conversion.md) (Emory University)<br />[WordNet 3.0](https://wordnet.princeton.edu/) (Princeton University) |
14
+ | **License** | `MIT` |
15
+ | **Author** | [Explosion](https://explosion.ai) |
16
+
17
+ ### Label Scheme
18
+
19
+ <details>
20
+
21
+ <summary>View label scheme (113 labels for 3 components)</summary>
22
+
23
+ | Component | Labels |
24
+ | --- | --- |
25
+ | **`tagger`** | `$`, `''`, `,`, `-LRB-`, `-RRB-`, `.`, `:`, `ADD`, `AFX`, `CC`, `CD`, `DT`, `EX`, `FW`, `HYPH`, `IN`, `JJ`, `JJR`, `JJS`, `LS`, `MD`, `NFP`, `NN`, `NNP`, `NNPS`, `NNS`, `PDT`, `POS`, `PRP`, `PRP$`, `RB`, `RBR`, `RBS`, `RP`, `SYM`, `TO`, `UH`, `VB`, `VBD`, `VBG`, `VBN`, `VBP`, `VBZ`, `WDT`, `WP`, `WP$`, `WRB`, `XX`, `_SP`, ```` |
26
+ | **`parser`** | `ROOT`, `acl`, `acomp`, `advcl`, `advmod`, `agent`, `amod`, `appos`, `attr`, `aux`, `auxpass`, `case`, `cc`, `ccomp`, `compound`, `conj`, `csubj`, `csubjpass`, `dative`, `dep`, `det`, `dobj`, `expl`, `intj`, `mark`, `meta`, `neg`, `nmod`, `npadvmod`, `nsubj`, `nsubjpass`, `nummod`, `oprd`, `parataxis`, `pcomp`, `pobj`, `poss`, `preconj`, `predet`, `prep`, `prt`, `punct`, `quantmod`, `relcl`, `xcomp` |
27
+ | **`ner`** | `CARDINAL`, `DATE`, `EVENT`, `FAC`, `GPE`, `LANGUAGE`, `LAW`, `LOC`, `MONEY`, `NORP`, `ORDINAL`, `ORG`, `PERCENT`, `PERSON`, `PRODUCT`, `QUANTITY`, `TIME`, `WORK_OF_ART` |
28
+
29
+ </details>
30
+
31
+ ### Accuracy
32
+
33
+ | Type | Score |
34
+ | --- | --- |
35
+ | `TOKEN_ACC` | 99.86 |
36
+ | `TOKEN_P` | 99.57 |
37
+ | `TOKEN_R` | 99.58 |
38
+ | `TOKEN_F` | 99.57 |
39
+ | `TAG_ACC` | 97.25 |
40
+ | `SENTS_P` | 92.02 |
41
+ | `SENTS_R` | 89.21 |
42
+ | `SENTS_F` | 90.59 |
43
+ | `DEP_UAS` | 91.75 |
44
+ | `DEP_LAS` | 89.87 |
45
+ | `ENTS_P` | 84.55 |
46
+ | `ENTS_R` | 84.57 |
47
+ | `ENTS_F` | 84.56 |