proxilion 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. proxilion/__init__.py +136 -0
  2. proxilion/audit/__init__.py +133 -0
  3. proxilion/audit/base_exporters.py +527 -0
  4. proxilion/audit/compliance/__init__.py +130 -0
  5. proxilion/audit/compliance/base.py +457 -0
  6. proxilion/audit/compliance/eu_ai_act.py +603 -0
  7. proxilion/audit/compliance/iso27001.py +544 -0
  8. proxilion/audit/compliance/soc2.py +491 -0
  9. proxilion/audit/events.py +493 -0
  10. proxilion/audit/explainability.py +1173 -0
  11. proxilion/audit/exporters/__init__.py +58 -0
  12. proxilion/audit/exporters/aws_s3.py +636 -0
  13. proxilion/audit/exporters/azure_storage.py +608 -0
  14. proxilion/audit/exporters/cloud_base.py +468 -0
  15. proxilion/audit/exporters/gcp_storage.py +570 -0
  16. proxilion/audit/exporters/multi_exporter.py +498 -0
  17. proxilion/audit/hash_chain.py +652 -0
  18. proxilion/audit/logger.py +543 -0
  19. proxilion/caching/__init__.py +49 -0
  20. proxilion/caching/tool_cache.py +633 -0
  21. proxilion/context/__init__.py +73 -0
  22. proxilion/context/context_window.py +556 -0
  23. proxilion/context/message_history.py +505 -0
  24. proxilion/context/session.py +735 -0
  25. proxilion/contrib/__init__.py +51 -0
  26. proxilion/contrib/anthropic.py +609 -0
  27. proxilion/contrib/google.py +1012 -0
  28. proxilion/contrib/langchain.py +641 -0
  29. proxilion/contrib/mcp.py +893 -0
  30. proxilion/contrib/openai.py +646 -0
  31. proxilion/core.py +3058 -0
  32. proxilion/decorators.py +966 -0
  33. proxilion/engines/__init__.py +287 -0
  34. proxilion/engines/base.py +266 -0
  35. proxilion/engines/casbin_engine.py +412 -0
  36. proxilion/engines/opa_engine.py +493 -0
  37. proxilion/engines/simple.py +437 -0
  38. proxilion/exceptions.py +887 -0
  39. proxilion/guards/__init__.py +54 -0
  40. proxilion/guards/input_guard.py +522 -0
  41. proxilion/guards/output_guard.py +634 -0
  42. proxilion/observability/__init__.py +198 -0
  43. proxilion/observability/cost_tracker.py +866 -0
  44. proxilion/observability/hooks.py +683 -0
  45. proxilion/observability/metrics.py +798 -0
  46. proxilion/observability/session_cost_tracker.py +1063 -0
  47. proxilion/policies/__init__.py +67 -0
  48. proxilion/policies/base.py +304 -0
  49. proxilion/policies/builtin.py +486 -0
  50. proxilion/policies/registry.py +376 -0
  51. proxilion/providers/__init__.py +201 -0
  52. proxilion/providers/adapter.py +468 -0
  53. proxilion/providers/anthropic_adapter.py +330 -0
  54. proxilion/providers/gemini_adapter.py +391 -0
  55. proxilion/providers/openai_adapter.py +294 -0
  56. proxilion/py.typed +0 -0
  57. proxilion/resilience/__init__.py +81 -0
  58. proxilion/resilience/degradation.py +615 -0
  59. proxilion/resilience/fallback.py +555 -0
  60. proxilion/resilience/retry.py +554 -0
  61. proxilion/scheduling/__init__.py +57 -0
  62. proxilion/scheduling/priority_queue.py +419 -0
  63. proxilion/scheduling/scheduler.py +459 -0
  64. proxilion/security/__init__.py +244 -0
  65. proxilion/security/agent_trust.py +968 -0
  66. proxilion/security/behavioral_drift.py +794 -0
  67. proxilion/security/cascade_protection.py +869 -0
  68. proxilion/security/circuit_breaker.py +428 -0
  69. proxilion/security/cost_limiter.py +690 -0
  70. proxilion/security/idor_protection.py +460 -0
  71. proxilion/security/intent_capsule.py +849 -0
  72. proxilion/security/intent_validator.py +495 -0
  73. proxilion/security/memory_integrity.py +767 -0
  74. proxilion/security/rate_limiter.py +509 -0
  75. proxilion/security/scope_enforcer.py +680 -0
  76. proxilion/security/sequence_validator.py +636 -0
  77. proxilion/security/trust_boundaries.py +784 -0
  78. proxilion/streaming/__init__.py +70 -0
  79. proxilion/streaming/detector.py +761 -0
  80. proxilion/streaming/transformer.py +674 -0
  81. proxilion/timeouts/__init__.py +55 -0
  82. proxilion/timeouts/decorators.py +477 -0
  83. proxilion/timeouts/manager.py +545 -0
  84. proxilion/tools/__init__.py +69 -0
  85. proxilion/tools/decorators.py +493 -0
  86. proxilion/tools/registry.py +732 -0
  87. proxilion/types.py +339 -0
  88. proxilion/validation/__init__.py +93 -0
  89. proxilion/validation/pydantic_schema.py +351 -0
  90. proxilion/validation/schema.py +651 -0
  91. proxilion-0.0.1.dist-info/METADATA +872 -0
  92. proxilion-0.0.1.dist-info/RECORD +94 -0
  93. proxilion-0.0.1.dist-info/WHEEL +4 -0
  94. proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,849 @@
1
+ """
2
+ Intent Capsule for Proxilion.
3
+
4
+ Addresses OWASP ASI01: Agent Goal Hijack.
5
+
6
+ This module provides cryptographic binding of the original user intent
7
+ to every execution cycle, making goal hijacking detectable.
8
+
9
+ The Intent Capsule pattern ensures that:
10
+ - The original user request is signed at creation
11
+ - Every tool call is validated against the original intent
12
+ - Mid-execution hijacking attempts are detected
13
+ - The agent cannot deviate from its mandate
14
+
15
+ Example:
16
+ >>> from proxilion.security.intent_capsule import (
17
+ ... IntentCapsule,
18
+ ... IntentGuard,
19
+ ... IntentValidator,
20
+ ... )
21
+ >>>
22
+ >>> # Create capsule from user request
23
+ >>> capsule = IntentCapsule.create(
24
+ ... user_id="alice",
25
+ ... intent="Help me find documents about Python",
26
+ ... allowed_tools=["search_documents", "read_document"],
27
+ ... secret_key="your-secret",
28
+ ... )
29
+ >>>
30
+ >>> # Validate each tool call against intent
31
+ >>> guard = IntentGuard(capsule)
32
+ >>>
33
+ >>> if guard.validate_tool_call("search_documents", {"query": "Python"}):
34
+ ... result = search_documents(query="Python")
35
+ ... else:
36
+ ... raise IntentHijackError("Tool call not aligned with intent")
37
+ >>>
38
+ >>> # Detect hijacking attempts
39
+ >>> validator = IntentValidator()
40
+ >>> is_hijack = validator.detect_hijack(
41
+ ... original_intent="Find documents",
42
+ ... current_action="Delete all files",
43
+ ... )
44
+ """
45
+
46
+ from __future__ import annotations
47
+
48
+ import hashlib
49
+ import hmac
50
+ import json
51
+ import logging
52
+ import re
53
+ import threading
54
+ import time
55
+ import uuid
56
+ from dataclasses import dataclass, field
57
+ from datetime import datetime, timedelta, timezone
58
+ from enum import Enum
59
+ from typing import Any
60
+
61
+ from proxilion.exceptions import IntentHijackError
62
+
63
+ logger = logging.getLogger(__name__)
64
+
65
+
66
+ class IntentCategory(Enum):
67
+ """Categories of user intent."""
68
+
69
+ QUERY = "query"
70
+ """Information retrieval (read-only)."""
71
+
72
+ CREATE = "create"
73
+ """Creating new resources."""
74
+
75
+ UPDATE = "update"
76
+ """Modifying existing resources."""
77
+
78
+ DELETE = "delete"
79
+ """Removing resources."""
80
+
81
+ EXECUTE = "execute"
82
+ """Running code or processes."""
83
+
84
+ COMMUNICATE = "communicate"
85
+ """Sending messages or notifications."""
86
+
87
+ ANALYZE = "analyze"
88
+ """Processing or analyzing data."""
89
+
90
+ UNKNOWN = "unknown"
91
+ """Unable to categorize."""
92
+
93
+
94
+ @dataclass
95
+ class IntentCapsule:
96
+ """
97
+ Cryptographically signed container for user intent.
98
+
99
+ The capsule binds the original intent to a specific execution
100
+ context, making it impossible for the agent to deviate without
101
+ detection.
102
+ """
103
+
104
+ capsule_id: str
105
+ user_id: str
106
+ intent: str
107
+ intent_category: IntentCategory
108
+ allowed_tools: set[str]
109
+ allowed_actions: set[str]
110
+ constraints: dict[str, Any]
111
+ created_at: datetime
112
+ expires_at: datetime
113
+ signature: str
114
+ metadata: dict[str, Any] = field(default_factory=dict)
115
+
116
+ # Execution tracking
117
+ tool_calls: list[dict[str, Any]] = field(default_factory=list)
118
+ _max_tool_calls: int = 100
119
+
120
+ def is_expired(self) -> bool:
121
+ """Check if capsule has expired."""
122
+ return datetime.now(timezone.utc) > self.expires_at
123
+
124
+ def is_tool_allowed(self, tool_name: str) -> bool:
125
+ """Check if a tool is allowed by this capsule."""
126
+ if "*" in self.allowed_tools:
127
+ return True
128
+ if tool_name in self.allowed_tools:
129
+ return True
130
+ # Pattern matching (e.g., "read_*")
131
+ for pattern in self.allowed_tools:
132
+ if "*" in pattern:
133
+ regex = pattern.replace("*", ".*")
134
+ if re.match(f"^{regex}$", tool_name):
135
+ return True
136
+ return False
137
+
138
+ def is_action_allowed(self, action: str) -> bool:
139
+ """Check if an action is allowed by this capsule."""
140
+ if "*" in self.allowed_actions:
141
+ return True
142
+ return action in self.allowed_actions
143
+
144
+ def record_tool_call(
145
+ self,
146
+ tool_name: str,
147
+ arguments: dict[str, Any],
148
+ result: Any = None,
149
+ ) -> None:
150
+ """Record a tool call for tracking."""
151
+ if len(self.tool_calls) >= self._max_tool_calls:
152
+ # Remove oldest to prevent unbounded growth
153
+ self.tool_calls = self.tool_calls[-self._max_tool_calls + 1:]
154
+
155
+ self.tool_calls.append({
156
+ "tool_name": tool_name,
157
+ "arguments": arguments,
158
+ "result_type": type(result).__name__ if result else None,
159
+ "timestamp": time.time(),
160
+ })
161
+
162
+ def to_dict(self) -> dict[str, Any]:
163
+ """Serialize to dict."""
164
+ return {
165
+ "capsule_id": self.capsule_id,
166
+ "user_id": self.user_id,
167
+ "intent": self.intent,
168
+ "intent_category": self.intent_category.value,
169
+ "allowed_tools": list(self.allowed_tools),
170
+ "allowed_actions": list(self.allowed_actions),
171
+ "constraints": self.constraints,
172
+ "created_at": self.created_at.isoformat(),
173
+ "expires_at": self.expires_at.isoformat(),
174
+ "signature": self.signature,
175
+ "metadata": self.metadata,
176
+ "tool_call_count": len(self.tool_calls),
177
+ }
178
+
179
+ @classmethod
180
+ def create(
181
+ cls,
182
+ user_id: str,
183
+ intent: str,
184
+ secret_key: str | bytes,
185
+ allowed_tools: set[str] | list[str] | None = None,
186
+ allowed_actions: set[str] | list[str] | None = None,
187
+ constraints: dict[str, Any] | None = None,
188
+ ttl_seconds: int = 3600,
189
+ metadata: dict[str, Any] | None = None,
190
+ intent_category: IntentCategory | None = None,
191
+ ) -> IntentCapsule:
192
+ """
193
+ Create a new intent capsule.
194
+
195
+ Args:
196
+ user_id: ID of the user making the request.
197
+ intent: Natural language description of user intent.
198
+ secret_key: Secret key for signing.
199
+ allowed_tools: Tools allowed for this intent.
200
+ allowed_actions: Actions allowed (read, write, delete, etc.).
201
+ constraints: Additional constraints (max_results, allowed_paths, etc.).
202
+ ttl_seconds: Time-to-live for the capsule.
203
+ metadata: Optional metadata.
204
+ intent_category: Category of intent (auto-detected if not provided).
205
+
206
+ Returns:
207
+ Signed IntentCapsule.
208
+ """
209
+ if isinstance(secret_key, str):
210
+ secret_key = secret_key.encode()
211
+
212
+ capsule_id = str(uuid.uuid4())
213
+ now = datetime.now(timezone.utc)
214
+ expires_at = now + timedelta(seconds=ttl_seconds)
215
+
216
+ # Normalize sets
217
+ if allowed_tools is None:
218
+ allowed_tools = set()
219
+ elif isinstance(allowed_tools, list):
220
+ allowed_tools = set(allowed_tools)
221
+
222
+ if allowed_actions is None:
223
+ allowed_actions = set()
224
+ elif isinstance(allowed_actions, list):
225
+ allowed_actions = set(allowed_actions)
226
+
227
+ # Auto-detect category if not provided
228
+ if intent_category is None:
229
+ intent_category = cls._detect_intent_category(intent)
230
+
231
+ # Create signature
232
+ sig_data = (
233
+ f"{capsule_id}|{user_id}|{intent}|{intent_category.value}|"
234
+ f"{sorted(allowed_tools)}|{sorted(allowed_actions)}|"
235
+ f"{json.dumps(constraints or {}, sort_keys=True)}|{now.isoformat()}"
236
+ )
237
+ signature = hmac.new(
238
+ secret_key,
239
+ sig_data.encode(),
240
+ hashlib.sha256,
241
+ ).hexdigest()
242
+
243
+ return cls(
244
+ capsule_id=capsule_id,
245
+ user_id=user_id,
246
+ intent=intent,
247
+ intent_category=intent_category,
248
+ allowed_tools=allowed_tools,
249
+ allowed_actions=allowed_actions,
250
+ constraints=constraints or {},
251
+ created_at=now,
252
+ expires_at=expires_at,
253
+ signature=signature,
254
+ metadata=metadata or {},
255
+ )
256
+
257
+ @staticmethod
258
+ def _detect_intent_category(intent: str) -> IntentCategory:
259
+ """Auto-detect intent category from natural language."""
260
+ intent_lower = intent.lower()
261
+
262
+ # Delete patterns
263
+ if any(word in intent_lower for word in [
264
+ "delete", "remove", "destroy", "erase", "drop", "clear"
265
+ ]):
266
+ return IntentCategory.DELETE
267
+
268
+ # Create patterns
269
+ if any(word in intent_lower for word in [
270
+ "create", "make", "generate", "build", "add", "new", "write"
271
+ ]):
272
+ return IntentCategory.CREATE
273
+
274
+ # Update patterns
275
+ if any(word in intent_lower for word in [
276
+ "update", "modify", "change", "edit", "fix", "correct"
277
+ ]):
278
+ return IntentCategory.UPDATE
279
+
280
+ # Execute patterns
281
+ if any(word in intent_lower for word in [
282
+ "run", "execute", "start", "launch", "deploy", "install"
283
+ ]):
284
+ return IntentCategory.EXECUTE
285
+
286
+ # Communicate patterns
287
+ if any(word in intent_lower for word in [
288
+ "send", "email", "message", "notify", "alert", "share"
289
+ ]):
290
+ return IntentCategory.COMMUNICATE
291
+
292
+ # Analyze patterns
293
+ if any(word in intent_lower for word in [
294
+ "analyze", "process", "calculate", "compute", "summarize"
295
+ ]):
296
+ return IntentCategory.ANALYZE
297
+
298
+ # Query patterns (most common, check last)
299
+ if any(word in intent_lower for word in [
300
+ "find", "search", "get", "show", "list", "display", "fetch",
301
+ "what", "where", "when", "who", "how", "help", "tell"
302
+ ]):
303
+ return IntentCategory.QUERY
304
+
305
+ return IntentCategory.UNKNOWN
306
+
307
+ def verify(self, secret_key: str | bytes) -> bool:
308
+ """Verify the capsule signature."""
309
+ if isinstance(secret_key, str):
310
+ secret_key = secret_key.encode()
311
+
312
+ sig_data = (
313
+ f"{self.capsule_id}|{self.user_id}|{self.intent}|{self.intent_category.value}|"
314
+ f"{sorted(self.allowed_tools)}|{sorted(self.allowed_actions)}|"
315
+ f"{json.dumps(self.constraints, sort_keys=True)}|{self.created_at.isoformat()}"
316
+ )
317
+ expected_sig = hmac.new(
318
+ secret_key,
319
+ sig_data.encode(),
320
+ hashlib.sha256,
321
+ ).hexdigest()
322
+
323
+ return hmac.compare_digest(expected_sig, self.signature)
324
+
325
+
326
+ # Hijacking detection patterns
327
+ HIJACK_PATTERNS: list[tuple[str, str, float]] = [
328
+ # (pattern, description, severity)
329
+ (r"(?i)ignore\s+(all\s+)?(previous|prior|original)\s+(intent|instructions?|goals?)",
330
+ "Intent override attempt", 0.95),
331
+ (r"(?i)new\s+(goal|objective|task|mission)\s*:",
332
+ "Goal replacement attempt", 0.9),
333
+ (r"(?i)forget\s+(your|the)\s+(original|primary|main)\s+(purpose|goal|task)",
334
+ "Purpose erasure attempt", 0.9),
335
+ (r"(?i)your\s+(real|true|actual)\s+(purpose|goal|mission)\s+is",
336
+ "False purpose injection", 0.95),
337
+ (r"(?i)override\s+(priority|directive|command)",
338
+ "Priority override attempt", 0.85),
339
+ (r"(?i)emergency\s+(override|protocol|mode)",
340
+ "Emergency bypass attempt", 0.8),
341
+ (r"(?i)admin(istrator)?\s+(mode|override|access)",
342
+ "Admin escalation attempt", 0.85),
343
+ (r"(?i)disregard\s+(user|original)\s+(request|intent)",
344
+ "Disregard user intent", 0.9),
345
+ ]
346
+
347
+
348
+ @dataclass
349
+ class HijackDetection:
350
+ """Result of hijack detection analysis."""
351
+
352
+ is_hijack: bool
353
+ confidence: float
354
+ original_intent: str
355
+ detected_action: str
356
+ matched_patterns: list[str] = field(default_factory=list)
357
+ reasoning: str = ""
358
+
359
+
360
+ class IntentValidator:
361
+ """
362
+ Validates that current actions align with original intent.
363
+
364
+ Uses pattern matching and semantic analysis to detect
365
+ when an agent's behavior deviates from the user's intent.
366
+ """
367
+
368
+ def __init__(
369
+ self,
370
+ custom_patterns: list[tuple[str, str, float]] | None = None,
371
+ semantic_threshold: float = 0.5,
372
+ ) -> None:
373
+ """
374
+ Initialize the validator.
375
+
376
+ Args:
377
+ custom_patterns: Additional hijack patterns.
378
+ semantic_threshold: Threshold for semantic similarity (0-1).
379
+ """
380
+ self._patterns: list[tuple[re.Pattern[str], str, float]] = []
381
+ for pattern, desc, severity in HIJACK_PATTERNS:
382
+ self._patterns.append((re.compile(pattern), desc, severity))
383
+
384
+ if custom_patterns:
385
+ for pattern, desc, severity in custom_patterns:
386
+ self._patterns.append((re.compile(pattern), desc, severity))
387
+
388
+ self._semantic_threshold = semantic_threshold
389
+
390
+ def detect_hijack(
391
+ self,
392
+ original_intent: str,
393
+ current_action: str,
394
+ tool_name: str | None = None,
395
+ context: dict[str, Any] | None = None,
396
+ ) -> HijackDetection:
397
+ """
398
+ Detect if current action represents a goal hijack.
399
+
400
+ Args:
401
+ original_intent: The original user intent.
402
+ current_action: What the agent is currently doing/planning.
403
+ tool_name: Optional tool being called.
404
+ context: Optional additional context.
405
+
406
+ Returns:
407
+ HijackDetection with analysis results.
408
+ """
409
+ matched_patterns: list[str] = []
410
+ max_severity = 0.0
411
+
412
+ # Check for explicit hijack patterns in current action
413
+ for pattern, description, severity in self._patterns:
414
+ if pattern.search(current_action):
415
+ matched_patterns.append(description)
416
+ max_severity = max(max_severity, severity)
417
+
418
+ # Check for category mismatch
419
+ category_mismatch = self._check_category_mismatch(
420
+ original_intent, current_action, tool_name
421
+ )
422
+ if category_mismatch:
423
+ matched_patterns.append(category_mismatch)
424
+ max_severity = max(max_severity, 0.7)
425
+
426
+ # Build reasoning
427
+ if matched_patterns:
428
+ reasoning = f"Detected patterns: {', '.join(matched_patterns)}"
429
+ else:
430
+ reasoning = "No hijacking patterns detected"
431
+
432
+ is_hijack = max_severity >= self._semantic_threshold
433
+
434
+ return HijackDetection(
435
+ is_hijack=is_hijack,
436
+ confidence=max_severity,
437
+ original_intent=original_intent,
438
+ detected_action=current_action,
439
+ matched_patterns=matched_patterns,
440
+ reasoning=reasoning,
441
+ )
442
+
443
+ def _check_category_mismatch(
444
+ self,
445
+ original_intent: str,
446
+ current_action: str,
447
+ tool_name: str | None,
448
+ ) -> str | None:
449
+ """Check for intent category mismatch."""
450
+ original_category = IntentCapsule._detect_intent_category(original_intent)
451
+ action_category = IntentCapsule._detect_intent_category(current_action)
452
+
453
+ # Dangerous category escalations
454
+ dangerous_transitions = {
455
+ (IntentCategory.QUERY, IntentCategory.DELETE): "Read intent escalated to delete",
456
+ (IntentCategory.QUERY, IntentCategory.EXECUTE): "Read intent escalated to execute",
457
+ (IntentCategory.ANALYZE, IntentCategory.DELETE): "Analyze intent escalated to delete",
458
+ (IntentCategory.ANALYZE, IntentCategory.EXECUTE): "Analyze intent escalated to execute",
459
+ }
460
+
461
+ transition = (original_category, action_category)
462
+ return dangerous_transitions.get(transition)
463
+
464
+
465
+ class IntentGuard:
466
+ """
467
+ Guards agent execution against intent violations.
468
+
469
+ Wraps an IntentCapsule and validates each action against
470
+ the original intent before allowing execution.
471
+
472
+ Example:
473
+ >>> capsule = IntentCapsule.create(
474
+ ... user_id="alice",
475
+ ... intent="Search for documents",
476
+ ... secret_key="secret",
477
+ ... allowed_tools=["search"],
478
+ ... )
479
+ >>> guard = IntentGuard(capsule)
480
+ >>>
481
+ >>> # This will pass
482
+ >>> guard.validate_tool_call("search", {"query": "python"})
483
+ >>>
484
+ >>> # This will fail
485
+ >>> guard.validate_tool_call("delete_all", {})
486
+ """
487
+
488
+ def __init__(
489
+ self,
490
+ capsule: IntentCapsule,
491
+ secret_key: str | bytes | None = None,
492
+ validator: IntentValidator | None = None,
493
+ strict_mode: bool = False,
494
+ ) -> None:
495
+ """
496
+ Initialize the guard.
497
+
498
+ Args:
499
+ capsule: The intent capsule to guard.
500
+ secret_key: Secret key for capsule verification.
501
+ validator: Custom intent validator.
502
+ strict_mode: If True, raise exceptions on violations.
503
+ """
504
+ self._capsule = capsule
505
+ self._secret_key = secret_key
506
+ self._validator = validator or IntentValidator()
507
+ self._strict_mode = strict_mode
508
+ self._lock = threading.RLock()
509
+
510
+ # Verify capsule if key provided
511
+ if secret_key:
512
+ if not capsule.verify(secret_key):
513
+ raise IntentHijackError(
514
+ original_intent=capsule.intent,
515
+ detected_intent="Capsule signature verification failed",
516
+ confidence=1.0,
517
+ )
518
+
519
+ @property
520
+ def capsule(self) -> IntentCapsule:
521
+ """Get the protected capsule."""
522
+ return self._capsule
523
+
524
+ def validate_tool_call(
525
+ self,
526
+ tool_name: str,
527
+ arguments: dict[str, Any],
528
+ description: str | None = None,
529
+ ) -> bool:
530
+ """
531
+ Validate a tool call against the intent.
532
+
533
+ Args:
534
+ tool_name: Name of the tool being called.
535
+ arguments: Tool arguments.
536
+ description: Optional description of what the call does.
537
+
538
+ Returns:
539
+ True if the call is allowed.
540
+
541
+ Raises:
542
+ IntentHijackError: If strict_mode and violation detected.
543
+ """
544
+ with self._lock:
545
+ # Check expiration
546
+ if self._capsule.is_expired():
547
+ return self._handle_violation(
548
+ "Intent capsule has expired",
549
+ 0.9,
550
+ )
551
+
552
+ # Check tool is allowed
553
+ if not self._capsule.is_tool_allowed(tool_name):
554
+ return self._handle_violation(
555
+ f"Tool '{tool_name}' not allowed by intent",
556
+ 0.8,
557
+ )
558
+
559
+ # Check for hijacking patterns if description provided
560
+ if description:
561
+ detection = self._validator.detect_hijack(
562
+ original_intent=self._capsule.intent,
563
+ current_action=description,
564
+ tool_name=tool_name,
565
+ )
566
+ if detection.is_hijack:
567
+ return self._handle_violation(
568
+ detection.reasoning,
569
+ detection.confidence,
570
+ )
571
+
572
+ # Check constraints
573
+ constraint_violation = self._check_constraints(tool_name, arguments)
574
+ if constraint_violation:
575
+ return self._handle_violation(
576
+ constraint_violation,
577
+ 0.7,
578
+ )
579
+
580
+ # Record the call
581
+ self._capsule.record_tool_call(tool_name, arguments)
582
+
583
+ return True
584
+
585
+ def _check_constraints(
586
+ self,
587
+ tool_name: str,
588
+ arguments: dict[str, Any],
589
+ ) -> str | None:
590
+ """Check if tool call violates constraints."""
591
+ constraints = self._capsule.constraints
592
+
593
+ # Check max results
594
+ if "max_results" in constraints:
595
+ limit = arguments.get("limit") or arguments.get("max_results")
596
+ if limit and limit > constraints["max_results"]:
597
+ return f"Result limit {limit} exceeds max {constraints['max_results']}"
598
+
599
+ # Check allowed paths
600
+ if "allowed_paths" in constraints:
601
+ path = arguments.get("path") or arguments.get("file_path")
602
+ if path:
603
+ allowed = constraints["allowed_paths"]
604
+ if not any(path.startswith(p) for p in allowed):
605
+ return f"Path '{path}' not in allowed paths"
606
+
607
+ # Check forbidden arguments
608
+ if "forbidden_args" in constraints:
609
+ forbidden = constraints["forbidden_args"]
610
+ for key in arguments:
611
+ if key in forbidden:
612
+ return f"Argument '{key}' is forbidden"
613
+
614
+ # Check resource limits
615
+ if "max_tool_calls" in constraints:
616
+ if len(self._capsule.tool_calls) >= constraints["max_tool_calls"]:
617
+ return f"Exceeded max tool calls ({constraints['max_tool_calls']})"
618
+
619
+ return None
620
+
621
+ def _handle_violation(self, reason: str, confidence: float) -> bool:
622
+ """Handle an intent violation."""
623
+ logger.warning(f"Intent violation: {reason} (confidence: {confidence:.1%})")
624
+
625
+ if self._strict_mode:
626
+ raise IntentHijackError(
627
+ original_intent=self._capsule.intent,
628
+ detected_intent=reason,
629
+ confidence=confidence,
630
+ )
631
+
632
+ return False
633
+
634
+ def get_allowed_tools(self) -> set[str]:
635
+ """Get the set of allowed tools."""
636
+ return self._capsule.allowed_tools.copy()
637
+
638
+ def get_intent_summary(self) -> dict[str, Any]:
639
+ """Get a summary of the guarded intent."""
640
+ return {
641
+ "intent": self._capsule.intent,
642
+ "category": self._capsule.intent_category.value,
643
+ "allowed_tools": list(self._capsule.allowed_tools),
644
+ "tool_calls_made": len(self._capsule.tool_calls),
645
+ "expires_in_seconds": max(
646
+ 0,
647
+ (self._capsule.expires_at - datetime.now(timezone.utc)).total_seconds()
648
+ ),
649
+ }
650
+
651
+
652
+ class IntentCapsuleManager:
653
+ """
654
+ Manages intent capsules for multiple sessions.
655
+
656
+ Provides centralized management of intent capsules with
657
+ automatic expiration and cleanup.
658
+ """
659
+
660
+ def __init__(
661
+ self,
662
+ secret_key: str | bytes,
663
+ default_ttl: int = 3600,
664
+ max_capsules: int = 10000,
665
+ ) -> None:
666
+ """
667
+ Initialize the manager.
668
+
669
+ Args:
670
+ secret_key: Master secret key for signing capsules.
671
+ default_ttl: Default TTL for capsules.
672
+ max_capsules: Maximum capsules to track.
673
+ """
674
+ if isinstance(secret_key, str):
675
+ secret_key = secret_key.encode()
676
+
677
+ self._secret_key = secret_key
678
+ self._default_ttl = default_ttl
679
+ self._max_capsules = max_capsules
680
+
681
+ self._capsules: dict[str, IntentCapsule] = {}
682
+ self._user_capsules: dict[str, list[str]] = {} # user_id -> capsule_ids
683
+ self._lock = threading.RLock()
684
+
685
+ def create_capsule(
686
+ self,
687
+ user_id: str,
688
+ intent: str,
689
+ allowed_tools: set[str] | list[str] | None = None,
690
+ allowed_actions: set[str] | list[str] | None = None,
691
+ constraints: dict[str, Any] | None = None,
692
+ ttl_seconds: int | None = None,
693
+ metadata: dict[str, Any] | None = None,
694
+ ) -> IntentCapsule:
695
+ """
696
+ Create and register a new intent capsule.
697
+
698
+ Args:
699
+ user_id: ID of the user.
700
+ intent: User's intent.
701
+ allowed_tools: Tools allowed for this intent.
702
+ allowed_actions: Actions allowed.
703
+ constraints: Additional constraints.
704
+ ttl_seconds: Time-to-live.
705
+ metadata: Optional metadata.
706
+
707
+ Returns:
708
+ The created IntentCapsule.
709
+ """
710
+ capsule = IntentCapsule.create(
711
+ user_id=user_id,
712
+ intent=intent,
713
+ secret_key=self._secret_key,
714
+ allowed_tools=allowed_tools,
715
+ allowed_actions=allowed_actions,
716
+ constraints=constraints,
717
+ ttl_seconds=ttl_seconds or self._default_ttl,
718
+ metadata=metadata,
719
+ )
720
+
721
+ with self._lock:
722
+ # Cleanup if at capacity
723
+ if len(self._capsules) >= self._max_capsules:
724
+ self._cleanup_expired()
725
+
726
+ self._capsules[capsule.capsule_id] = capsule
727
+
728
+ if user_id not in self._user_capsules:
729
+ self._user_capsules[user_id] = []
730
+ self._user_capsules[user_id].append(capsule.capsule_id)
731
+
732
+ logger.debug(f"Created intent capsule: {capsule.capsule_id} for user {user_id}")
733
+ return capsule
734
+
735
+ def get_capsule(self, capsule_id: str) -> IntentCapsule | None:
736
+ """Get a capsule by ID."""
737
+ with self._lock:
738
+ capsule = self._capsules.get(capsule_id)
739
+ if capsule and capsule.is_expired():
740
+ del self._capsules[capsule_id]
741
+ return None
742
+ return capsule
743
+
744
+ def get_user_capsules(self, user_id: str) -> list[IntentCapsule]:
745
+ """Get all active capsules for a user."""
746
+ with self._lock:
747
+ capsule_ids = self._user_capsules.get(user_id, [])
748
+ capsules = []
749
+ for cid in capsule_ids:
750
+ capsule = self._capsules.get(cid)
751
+ if capsule and not capsule.is_expired():
752
+ capsules.append(capsule)
753
+ return capsules
754
+
755
+ def revoke_capsule(self, capsule_id: str) -> bool:
756
+ """Revoke a capsule."""
757
+ with self._lock:
758
+ if capsule_id in self._capsules:
759
+ capsule = self._capsules[capsule_id]
760
+ del self._capsules[capsule_id]
761
+
762
+ # Remove from user's list
763
+ user_ids = list(self._user_capsules.keys())
764
+ for uid in user_ids:
765
+ if capsule_id in self._user_capsules[uid]:
766
+ self._user_capsules[uid].remove(capsule_id)
767
+
768
+ logger.info(f"Revoked capsule: {capsule_id}")
769
+ return True
770
+ return False
771
+
772
+ def verify_capsule(self, capsule_id: str) -> bool:
773
+ """Verify a capsule's signature."""
774
+ with self._lock:
775
+ capsule = self._capsules.get(capsule_id)
776
+ if not capsule:
777
+ return False
778
+ return capsule.verify(self._secret_key)
779
+
780
+ def create_guard(
781
+ self,
782
+ capsule_id: str,
783
+ strict_mode: bool = False,
784
+ ) -> IntentGuard | None:
785
+ """
786
+ Create a guard for a capsule.
787
+
788
+ Args:
789
+ capsule_id: ID of the capsule.
790
+ strict_mode: If True, guard raises exceptions on violations.
791
+
792
+ Returns:
793
+ IntentGuard for the capsule, or None if not found.
794
+ """
795
+ capsule = self.get_capsule(capsule_id)
796
+ if not capsule:
797
+ return None
798
+
799
+ return IntentGuard(
800
+ capsule=capsule,
801
+ secret_key=self._secret_key,
802
+ strict_mode=strict_mode,
803
+ )
804
+
805
+ def _cleanup_expired(self) -> int:
806
+ """Clean up expired capsules."""
807
+ now = datetime.now(timezone.utc)
808
+ expired = [
809
+ cid for cid, capsule in self._capsules.items()
810
+ if capsule.expires_at < now
811
+ ]
812
+
813
+ for cid in expired:
814
+ del self._capsules[cid]
815
+
816
+ # Clean up user mappings
817
+ for uid in list(self._user_capsules.keys()):
818
+ self._user_capsules[uid] = [
819
+ cid for cid in self._user_capsules[uid]
820
+ if cid in self._capsules
821
+ ]
822
+ if not self._user_capsules[uid]:
823
+ del self._user_capsules[uid]
824
+
825
+ return len(expired)
826
+
827
+ def get_stats(self) -> dict[str, Any]:
828
+ """Get manager statistics."""
829
+ with self._lock:
830
+ return {
831
+ "total_capsules": len(self._capsules),
832
+ "total_users": len(self._user_capsules),
833
+ "max_capsules": self._max_capsules,
834
+ }
835
+
836
+
837
+ # Convenience exports
838
+ __all__ = [
839
+ # Core classes
840
+ "IntentCapsule",
841
+ "IntentGuard",
842
+ "IntentValidator",
843
+ "IntentCapsuleManager",
844
+ # Data classes
845
+ "HijackDetection",
846
+ "IntentCategory",
847
+ # Patterns
848
+ "HIJACK_PATTERNS",
849
+ ]