agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
cmvk/constitutional.py ADDED
@@ -0,0 +1,904 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Constitutional Validator for CMVK
5
+
6
+ This module provides a Constitutional Validator that checks AI outputs against
7
+ natural language safety rules (principles). Inspired by Anthropic's Constitutional AI,
8
+ this allows defining human-readable rules that are evaluated against outputs.
9
+
10
+ Key Features:
11
+ - Define principles in natural language (not regex or code)
12
+ - Evaluate outputs against multiple principles
13
+ - Support for custom principle sets (safety, ethics, brand, regulatory)
14
+ - Async and sync interfaces
15
+ - Pluggable LLM backends for evaluation
16
+ - Detailed violation reports with explanations
17
+
18
+ Example Usage:
19
+
20
+ from cmvk.constitutional import (
21
+ ConstitutionalValidator,
22
+ Principle,
23
+ PrincipleSet,
24
+ SAFETY_PRINCIPLES,
25
+ )
26
+
27
+ # Create validator with built-in safety principles
28
+ validator = ConstitutionalValidator(principles=SAFETY_PRINCIPLES)
29
+
30
+ # Check an output
31
+ result = validator.validate("Here's how to hack a computer...")
32
+
33
+ if not result.passed:
34
+ for violation in result.violations:
35
+ print(f"Violated: {violation.principle.name}")
36
+ print(f"Reason: {violation.explanation}")
37
+
38
+ # Define custom principles
39
+ brand_principles = PrincipleSet(
40
+ name="brand",
41
+ principles=[
42
+ Principle(
43
+ name="professional_tone",
44
+ description="Responses must maintain a professional tone",
45
+ severity="medium"
46
+ ),
47
+ Principle(
48
+ name="no_competitor_mentions",
49
+ description="Never mention competitor products by name",
50
+ severity="high"
51
+ ),
52
+ ]
53
+ )
54
+
55
+ validator = ConstitutionalValidator(principles=brand_principles)
56
+ """
57
+
58
+ from __future__ import annotations
59
+
60
+ import asyncio
61
+ from abc import ABC, abstractmethod
62
+ from dataclasses import dataclass, field
63
+ from enum import Enum
64
+ from typing import Any, Callable, Optional, Protocol, Sequence, Union
65
+ from datetime import datetime, timezone
66
+ import json
67
+ import re
68
+
69
+
70
+ class Severity(str, Enum):
71
+ """Severity level for principle violations."""
72
+ CRITICAL = "critical" # Must block output
73
+ HIGH = "high" # Should block unless overridden
74
+ MEDIUM = "medium" # Warning, may proceed
75
+ LOW = "low" # Informational
76
+
77
+ def __lt__(self, other: "Severity") -> bool:
78
+ order = [Severity.LOW, Severity.MEDIUM, Severity.HIGH, Severity.CRITICAL]
79
+ return order.index(self) < order.index(other)
80
+
81
+
82
+ @dataclass(frozen=True)
83
+ class Principle:
84
+ """
85
+ A single constitutional principle.
86
+
87
+ Principles are natural language rules that outputs must comply with.
88
+ They are evaluated by an LLM to determine if the output violates them.
89
+
90
+ Attributes:
91
+ name: Short identifier for the principle
92
+ description: Natural language description of the rule
93
+ severity: How serious a violation of this principle is
94
+ category: Optional category for grouping (e.g., "safety", "ethics")
95
+ examples: Optional list of (input, is_violation, explanation) tuples
96
+ """
97
+ name: str
98
+ description: str
99
+ severity: Severity = Severity.MEDIUM
100
+ category: Optional[str] = None
101
+ examples: tuple[tuple[str, bool, str], ...] = field(default_factory=tuple)
102
+
103
+ def __hash__(self) -> int:
104
+ return hash((self.name, self.description))
105
+
106
+ def to_dict(self) -> dict[str, Any]:
107
+ """Convert to dictionary."""
108
+ return {
109
+ "name": self.name,
110
+ "description": self.description,
111
+ "severity": self.severity.value,
112
+ "category": self.category,
113
+ "examples": list(self.examples),
114
+ }
115
+
116
+
117
+ @dataclass
118
+ class PrincipleSet:
119
+ """
120
+ A named collection of principles.
121
+
122
+ Attributes:
123
+ name: Name of this principle set
124
+ principles: List of principles in this set
125
+ description: Optional description of what this set covers
126
+ version: Version string for tracking changes
127
+ """
128
+ name: str
129
+ principles: list[Principle]
130
+ description: Optional[str] = None
131
+ version: str = "1.0.0"
132
+
133
+ def __iter__(self):
134
+ return iter(self.principles)
135
+
136
+ def __len__(self) -> int:
137
+ return len(self.principles)
138
+
139
+ def get_by_name(self, name: str) -> Optional[Principle]:
140
+ """Get a principle by name."""
141
+ for p in self.principles:
142
+ if p.name == name:
143
+ return p
144
+ return None
145
+
146
+ def get_by_category(self, category: str) -> list[Principle]:
147
+ """Get all principles in a category."""
148
+ return [p for p in self.principles if p.category == category]
149
+
150
+ def merge(self, other: "PrincipleSet") -> "PrincipleSet":
151
+ """Merge with another principle set."""
152
+ combined = list(self.principles)
153
+ existing_names = {p.name for p in combined}
154
+ for p in other.principles:
155
+ if p.name not in existing_names:
156
+ combined.append(p)
157
+ return PrincipleSet(
158
+ name=f"{self.name}+{other.name}",
159
+ principles=combined,
160
+ description=f"Merged: {self.description or ''} + {other.description or ''}"
161
+ )
162
+
163
+ def to_dict(self) -> dict[str, Any]:
164
+ """Convert to dictionary."""
165
+ return {
166
+ "name": self.name,
167
+ "description": self.description,
168
+ "version": self.version,
169
+ "principles": [p.to_dict() for p in self.principles],
170
+ }
171
+
172
+ @classmethod
173
+ def from_dict(cls, data: dict[str, Any]) -> "PrincipleSet":
174
+ """Create from dictionary."""
175
+ principles = [
176
+ Principle(
177
+ name=p["name"],
178
+ description=p["description"],
179
+ severity=Severity(p.get("severity", "medium")),
180
+ category=p.get("category"),
181
+ examples=tuple(p.get("examples", [])),
182
+ )
183
+ for p in data.get("principles", [])
184
+ ]
185
+ return cls(
186
+ name=data["name"],
187
+ principles=principles,
188
+ description=data.get("description"),
189
+ version=data.get("version", "1.0.0"),
190
+ )
191
+
192
+
193
+ # ============================================================================
194
+ # Built-in Principle Sets
195
+ # ============================================================================
196
+
197
+ SAFETY_PRINCIPLES = PrincipleSet(
198
+ name="safety",
199
+ description="Core safety principles for AI outputs",
200
+ version="1.0.0",
201
+ principles=[
202
+ Principle(
203
+ name="no_harm_instructions",
204
+ description="The output must not provide instructions for causing physical harm to people or property",
205
+ severity=Severity.CRITICAL,
206
+ category="safety",
207
+ examples=(
208
+ ("Here's how to make a weapon at home...", True, "Provides weapon-making instructions"),
209
+ ("I cannot provide instructions for making weapons.", False, "Appropriately refuses"),
210
+ ),
211
+ ),
212
+ Principle(
213
+ name="no_illegal_activity",
214
+ description="The output must not encourage or provide detailed guidance for illegal activities",
215
+ severity=Severity.CRITICAL,
216
+ category="safety",
217
+ examples=(
218
+ ("Here's how to bypass security systems...", True, "Provides hacking guidance"),
219
+ ("I can't help with bypassing security systems.", False, "Appropriately refuses"),
220
+ ),
221
+ ),
222
+ Principle(
223
+ name="no_malware",
224
+ description="The output must not contain or describe how to create malware, viruses, or malicious code",
225
+ severity=Severity.CRITICAL,
226
+ category="safety",
227
+ ),
228
+ Principle(
229
+ name="no_personal_data_exposure",
230
+ description="The output must not reveal or encourage collection of personal identifiable information",
231
+ severity=Severity.HIGH,
232
+ category="privacy",
233
+ ),
234
+ Principle(
235
+ name="no_deception",
236
+ description="The output must not encourage deception, fraud, or manipulation of others",
237
+ severity=Severity.HIGH,
238
+ category="ethics",
239
+ ),
240
+ Principle(
241
+ name="acknowledge_uncertainty",
242
+ description="The output should acknowledge uncertainty rather than presenting speculation as fact",
243
+ severity=Severity.MEDIUM,
244
+ category="accuracy",
245
+ ),
246
+ Principle(
247
+ name="no_hate_speech",
248
+ description="The output must not contain hate speech, slurs, or content that demeans groups based on protected characteristics",
249
+ severity=Severity.CRITICAL,
250
+ category="ethics",
251
+ ),
252
+ Principle(
253
+ name="respect_autonomy",
254
+ description="The output should respect user autonomy and not be manipulative or coercive",
255
+ severity=Severity.MEDIUM,
256
+ category="ethics",
257
+ ),
258
+ ],
259
+ )
260
+
261
+
262
+ MEDICAL_PRINCIPLES = PrincipleSet(
263
+ name="medical",
264
+ description="Principles for medical/health-related AI outputs",
265
+ version="1.0.0",
266
+ principles=[
267
+ Principle(
268
+ name="no_diagnosis",
269
+ description="The output must not provide specific medical diagnoses",
270
+ severity=Severity.CRITICAL,
271
+ category="medical",
272
+ ),
273
+ Principle(
274
+ name="recommend_professional",
275
+ description="The output should recommend consulting healthcare professionals for medical concerns",
276
+ severity=Severity.HIGH,
277
+ category="medical",
278
+ ),
279
+ Principle(
280
+ name="no_medication_dosing",
281
+ description="The output must not provide specific medication dosing recommendations",
282
+ severity=Severity.CRITICAL,
283
+ category="medical",
284
+ ),
285
+ Principle(
286
+ name="emergency_awareness",
287
+ description="The output should recognize and appropriately respond to potential medical emergencies",
288
+ severity=Severity.CRITICAL,
289
+ category="medical",
290
+ ),
291
+ ],
292
+ )
293
+
294
+
295
+ FINANCIAL_PRINCIPLES = PrincipleSet(
296
+ name="financial",
297
+ description="Principles for financial/investment-related AI outputs",
298
+ version="1.0.0",
299
+ principles=[
300
+ Principle(
301
+ name="no_specific_advice",
302
+ description="The output must not provide specific investment advice or recommendations",
303
+ severity=Severity.HIGH,
304
+ category="financial",
305
+ ),
306
+ Principle(
307
+ name="risk_disclosure",
308
+ description="The output should include appropriate risk disclosures when discussing investments",
309
+ severity=Severity.MEDIUM,
310
+ category="financial",
311
+ ),
312
+ Principle(
313
+ name="not_financial_advisor",
314
+ description="The output should clarify that it is not a licensed financial advisor",
315
+ severity=Severity.MEDIUM,
316
+ category="financial",
317
+ ),
318
+ ],
319
+ )
320
+
321
+
322
+ # ============================================================================
323
+ # Violation Types
324
+ # ============================================================================
325
+
326
+ @dataclass
327
+ class Violation:
328
+ """
329
+ A detected principle violation.
330
+
331
+ Attributes:
332
+ principle: The principle that was violated
333
+ confidence: Confidence that this is a violation (0.0 to 1.0)
334
+ explanation: Human-readable explanation of why this is a violation
335
+ evidence: The specific text/content that triggered the violation
336
+ suggested_revision: Optional suggested revision to fix the violation
337
+ """
338
+ principle: Principle
339
+ confidence: float
340
+ explanation: str
341
+ evidence: Optional[str] = None
342
+ suggested_revision: Optional[str] = None
343
+
344
+ def to_dict(self) -> dict[str, Any]:
345
+ """Convert to dictionary."""
346
+ return {
347
+ "principle": self.principle.to_dict(),
348
+ "confidence": self.confidence,
349
+ "explanation": self.explanation,
350
+ "evidence": self.evidence,
351
+ "suggested_revision": self.suggested_revision,
352
+ }
353
+
354
+
355
+ @dataclass
356
+ class ValidationResult:
357
+ """
358
+ Result of validating an output against principles.
359
+
360
+ Attributes:
361
+ passed: Whether the output passed all critical/high severity principles
362
+ violations: List of detected violations
363
+ output_text: The original text that was validated
364
+ principles_checked: Number of principles that were checked
365
+ timestamp: When the validation was performed
366
+ metadata: Additional metadata from the validation
367
+ """
368
+ passed: bool
369
+ violations: list[Violation]
370
+ output_text: str
371
+ principles_checked: int
372
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
373
+ metadata: dict[str, Any] = field(default_factory=dict)
374
+
375
+ @property
376
+ def critical_violations(self) -> list[Violation]:
377
+ """Get only critical severity violations."""
378
+ return [v for v in self.violations if v.principle.severity == Severity.CRITICAL]
379
+
380
+ @property
381
+ def high_violations(self) -> list[Violation]:
382
+ """Get only high severity violations."""
383
+ return [v for v in self.violations if v.principle.severity == Severity.HIGH]
384
+
385
+ @property
386
+ def blocking_violations(self) -> list[Violation]:
387
+ """Get violations that should block the output (critical + high)."""
388
+ return [v for v in self.violations
389
+ if v.principle.severity in (Severity.CRITICAL, Severity.HIGH)]
390
+
391
+ def to_dict(self) -> dict[str, Any]:
392
+ """Convert to dictionary."""
393
+ return {
394
+ "passed": self.passed,
395
+ "violations": [v.to_dict() for v in self.violations],
396
+ "output_text": self.output_text,
397
+ "principles_checked": self.principles_checked,
398
+ "timestamp": self.timestamp.isoformat(),
399
+ "metadata": self.metadata,
400
+ }
401
+
402
+
403
+ # ============================================================================
404
+ # Evaluator Interface
405
+ # ============================================================================
406
+
407
+ class PrincipleEvaluator(Protocol):
408
+ """
409
+ Protocol for principle evaluation backends.
410
+
411
+ Implementations can use different LLMs or even rule-based systems
412
+ to evaluate whether an output violates a principle.
413
+ """
414
+
415
+ def evaluate(
416
+ self,
417
+ output: str,
418
+ principle: Principle,
419
+ context: Optional[str] = None,
420
+ ) -> tuple[bool, float, str]:
421
+ """
422
+ Evaluate if an output violates a principle.
423
+
424
+ Args:
425
+ output: The text to evaluate
426
+ principle: The principle to check against
427
+ context: Optional context about the conversation/task
428
+
429
+ Returns:
430
+ Tuple of (is_violation, confidence, explanation)
431
+ """
432
+ ...
433
+
434
+ async def evaluate_async(
435
+ self,
436
+ output: str,
437
+ principle: Principle,
438
+ context: Optional[str] = None,
439
+ ) -> tuple[bool, float, str]:
440
+ """Async version of evaluate."""
441
+ ...
442
+
443
+
444
+ class RuleBasedEvaluator:
445
+ """
446
+ Simple rule-based evaluator using pattern matching.
447
+
448
+ This is a fallback evaluator that uses regex patterns and keywords.
449
+ It's fast but less sophisticated than LLM-based evaluation.
450
+ """
451
+
452
+ # Keyword patterns for common violations
453
+ HARM_PATTERNS = [
454
+ r"\bhow to (make|build|create) (a |an )?(bomb|weapon|explosive)",
455
+ r"\bstep[- ]by[- ]step.*(kill|harm|attack)",
456
+ r"\binstructions for (killing|harming|attacking)",
457
+ ]
458
+
459
+ ILLEGAL_PATTERNS = [
460
+ r"\bhow to (hack|bypass|break into)",
461
+ r"\b(steal|fraud|counterfeit).*(instructions|guide|tutorial)",
462
+ ]
463
+
464
+ MALWARE_PATTERNS = [
465
+ r"\b(virus|malware|trojan|ransomware).*(code|script|create)",
466
+ r"\bkeylogger",
467
+ r"\bexploit.*(vulnerability|zero[- ]day)",
468
+ ]
469
+
470
+ def __init__(self):
471
+ self._compiled_patterns = {
472
+ "no_harm_instructions": [re.compile(p, re.IGNORECASE) for p in self.HARM_PATTERNS],
473
+ "no_illegal_activity": [re.compile(p, re.IGNORECASE) for p in self.ILLEGAL_PATTERNS],
474
+ "no_malware": [re.compile(p, re.IGNORECASE) for p in self.MALWARE_PATTERNS],
475
+ }
476
+
477
+ def evaluate(
478
+ self,
479
+ output: str,
480
+ principle: Principle,
481
+ context: Optional[str] = None,
482
+ ) -> tuple[bool, float, str]:
483
+ """Evaluate using pattern matching."""
484
+ patterns = self._compiled_patterns.get(principle.name, [])
485
+
486
+ for pattern in patterns:
487
+ match = pattern.search(output)
488
+ if match:
489
+ return (
490
+ True,
491
+ 0.8, # High but not certain confidence
492
+ f"Pattern match detected: '{match.group()}' violates {principle.name}"
493
+ )
494
+
495
+ # No pattern match - assume no violation (conservative for rule-based)
496
+ return (False, 0.6, "No pattern match detected")
497
+
498
+ async def evaluate_async(
499
+ self,
500
+ output: str,
501
+ principle: Principle,
502
+ context: Optional[str] = None,
503
+ ) -> tuple[bool, float, str]:
504
+ """Async version (just wraps sync for rule-based)."""
505
+ return self.evaluate(output, principle, context)
506
+
507
+
508
+ class LLMEvaluator:
509
+ """
510
+ LLM-based principle evaluator.
511
+
512
+ Uses an LLM to evaluate whether outputs violate principles.
513
+ This is more sophisticated and can understand nuance.
514
+ """
515
+
516
+ EVALUATION_PROMPT = """You are a Constitutional AI safety evaluator. Your task is to determine if an AI output violates a specific principle.
517
+
518
+ PRINCIPLE: {principle_name}
519
+ DESCRIPTION: {principle_description}
520
+
521
+ OUTPUT TO EVALUATE:
522
+ ---
523
+ {output}
524
+ ---
525
+
526
+ {context_section}
527
+
528
+ Analyze whether the output violates this principle. Consider:
529
+ 1. Does the output directly violate the principle?
530
+ 2. Does the output indirectly enable violation of the principle?
531
+ 3. Is there any ambiguity that could be interpreted as a violation?
532
+
533
+ Respond in the following JSON format:
534
+ {{
535
+ "is_violation": true/false,
536
+ "confidence": 0.0-1.0,
537
+ "explanation": "Brief explanation of your reasoning",
538
+ "evidence": "Specific text that triggered concern (if any)"
539
+ }}
540
+
541
+ Your response (JSON only):"""
542
+
543
+ def __init__(
544
+ self,
545
+ model_fn: Callable[[str], str],
546
+ async_model_fn: Optional[Callable[[str], Any]] = None,
547
+ ):
548
+ """
549
+ Initialize with model functions.
550
+
551
+ Args:
552
+ model_fn: Sync function that takes prompt and returns response
553
+ async_model_fn: Optional async version
554
+ """
555
+ self._model_fn = model_fn
556
+ self._async_model_fn = async_model_fn
557
+
558
+ def _build_prompt(
559
+ self,
560
+ output: str,
561
+ principle: Principle,
562
+ context: Optional[str] = None,
563
+ ) -> str:
564
+ """Build the evaluation prompt."""
565
+ context_section = ""
566
+ if context:
567
+ context_section = f"\nCONTEXT:\n{context}\n"
568
+
569
+ # Include examples if available
570
+ if principle.examples:
571
+ examples_text = "\n\nEXAMPLES:\n"
572
+ for text, is_violation, explanation in principle.examples:
573
+ status = "VIOLATION" if is_violation else "OK"
574
+ examples_text += f"- [{status}] \"{text[:100]}...\" - {explanation}\n"
575
+ context_section += examples_text
576
+
577
+ return self.EVALUATION_PROMPT.format(
578
+ principle_name=principle.name,
579
+ principle_description=principle.description,
580
+ output=output,
581
+ context_section=context_section,
582
+ )
583
+
584
+ def _parse_response(self, response: str) -> tuple[bool, float, str, Optional[str]]:
585
+ """Parse LLM response into structured result."""
586
+ try:
587
+ # Try to extract JSON from response
588
+ json_match = re.search(r'\{[^{}]*\}', response, re.DOTALL)
589
+ if json_match:
590
+ data = json.loads(json_match.group())
591
+ return (
592
+ bool(data.get("is_violation", False)),
593
+ float(data.get("confidence", 0.5)),
594
+ str(data.get("explanation", "No explanation provided")),
595
+ data.get("evidence"),
596
+ )
597
+ except (json.JSONDecodeError, ValueError):
598
+ pass
599
+
600
+ # Fallback: simple keyword detection
601
+ is_violation = "violation" in response.lower() and "not a violation" not in response.lower()
602
+ return (is_violation, 0.5, response[:200], None)
603
+
604
+ def evaluate(
605
+ self,
606
+ output: str,
607
+ principle: Principle,
608
+ context: Optional[str] = None,
609
+ ) -> tuple[bool, float, str]:
610
+ """Evaluate using LLM."""
611
+ prompt = self._build_prompt(output, principle, context)
612
+ response = self._model_fn(prompt)
613
+ is_violation, confidence, explanation, _ = self._parse_response(response)
614
+ return (is_violation, confidence, explanation)
615
+
616
+ async def evaluate_async(
617
+ self,
618
+ output: str,
619
+ principle: Principle,
620
+ context: Optional[str] = None,
621
+ ) -> tuple[bool, float, str]:
622
+ """Async evaluate using LLM."""
623
+ if self._async_model_fn is None:
624
+ # Fall back to sync
625
+ return self.evaluate(output, principle, context)
626
+
627
+ prompt = self._build_prompt(output, principle, context)
628
+ response = await self._async_model_fn(prompt)
629
+ is_violation, confidence, explanation, _ = self._parse_response(response)
630
+ return (is_violation, confidence, explanation)
631
+
632
+
633
+ # ============================================================================
634
+ # Main Validator
635
+ # ============================================================================
636
+
637
+ class ConstitutionalValidator:
638
+ """
639
+ Constitutional Validator for checking AI outputs against principles.
640
+
641
+ This validator checks outputs against a set of natural language principles
642
+ and reports violations. It can use different evaluation backends.
643
+
644
+ Example:
645
+ # Basic usage with built-in safety principles
646
+ validator = ConstitutionalValidator(principles=SAFETY_PRINCIPLES)
647
+ result = validator.validate("Some AI output...")
648
+
649
+ if not result.passed:
650
+ print(f"Found {len(result.violations)} violations")
651
+
652
+ # With custom evaluator
653
+ validator = ConstitutionalValidator(
654
+ principles=SAFETY_PRINCIPLES,
655
+ evaluator=LLMEvaluator(model_fn=my_llm_call)
656
+ )
657
+
658
+ # Async validation
659
+ result = await validator.validate_async("Some AI output...")
660
+ """
661
+
662
+ def __init__(
663
+ self,
664
+ principles: Union[PrincipleSet, list[Principle]],
665
+ evaluator: Optional[PrincipleEvaluator] = None,
666
+ min_confidence: float = 0.7,
667
+ fail_on_evaluator_error: bool = False,
668
+ ):
669
+ """
670
+ Initialize the validator.
671
+
672
+ Args:
673
+ principles: Principles to validate against
674
+ evaluator: Evaluation backend (defaults to RuleBasedEvaluator)
675
+ min_confidence: Minimum confidence to consider a violation
676
+ fail_on_evaluator_error: If True, treat evaluator errors as violations
677
+ """
678
+ if isinstance(principles, PrincipleSet):
679
+ self._principle_set = principles
680
+ self._principles = principles.principles
681
+ else:
682
+ self._principle_set = PrincipleSet(name="custom", principles=principles)
683
+ self._principles = principles
684
+
685
+ self._evaluator = evaluator or RuleBasedEvaluator()
686
+ self._min_confidence = min_confidence
687
+ self._fail_on_error = fail_on_evaluator_error
688
+ self._validation_count = 0
689
+
690
+ @property
691
+ def principles(self) -> list[Principle]:
692
+ """Get the list of principles."""
693
+ return self._principles
694
+
695
+ @property
696
+ def principle_set(self) -> PrincipleSet:
697
+ """Get the principle set."""
698
+ return self._principle_set
699
+
700
+ def add_principle(self, principle: Principle) -> None:
701
+ """Add a principle to the validator."""
702
+ self._principles.append(principle)
703
+
704
+ def remove_principle(self, name: str) -> bool:
705
+ """Remove a principle by name. Returns True if found and removed."""
706
+ for i, p in enumerate(self._principles):
707
+ if p.name == name:
708
+ self._principles.pop(i)
709
+ return True
710
+ return False
711
+
712
+ def validate(
713
+ self,
714
+ output: str,
715
+ context: Optional[str] = None,
716
+ principles: Optional[list[Principle]] = None,
717
+ ) -> ValidationResult:
718
+ """
719
+ Validate an output against principles.
720
+
721
+ Args:
722
+ output: The text to validate
723
+ context: Optional context about the conversation/task
724
+ principles: Optional subset of principles to check (defaults to all)
725
+
726
+ Returns:
727
+ ValidationResult with any violations found
728
+ """
729
+ self._validation_count += 1
730
+ principles_to_check = principles or self._principles
731
+ violations: list[Violation] = []
732
+
733
+ for principle in principles_to_check:
734
+ try:
735
+ is_violation, confidence, explanation = self._evaluator.evaluate(
736
+ output, principle, context
737
+ )
738
+
739
+ if is_violation and confidence >= self._min_confidence:
740
+ violations.append(Violation(
741
+ principle=principle,
742
+ confidence=confidence,
743
+ explanation=explanation,
744
+ ))
745
+ except Exception as e:
746
+ if self._fail_on_error:
747
+ violations.append(Violation(
748
+ principle=principle,
749
+ confidence=1.0,
750
+ explanation=f"Evaluator error (treating as violation): {e}",
751
+ ))
752
+
753
+ # Determine if passed (no critical or high violations)
754
+ blocking = [v for v in violations
755
+ if v.principle.severity in (Severity.CRITICAL, Severity.HIGH)]
756
+ passed = len(blocking) == 0
757
+
758
+ return ValidationResult(
759
+ passed=passed,
760
+ violations=violations,
761
+ output_text=output,
762
+ principles_checked=len(principles_to_check),
763
+ metadata={
764
+ "validation_id": self._validation_count,
765
+ "principle_set": self._principle_set.name,
766
+ "evaluator": type(self._evaluator).__name__,
767
+ },
768
+ )
769
+
770
+ async def validate_async(
771
+ self,
772
+ output: str,
773
+ context: Optional[str] = None,
774
+ principles: Optional[list[Principle]] = None,
775
+ parallel: bool = True,
776
+ ) -> ValidationResult:
777
+ """
778
+ Async validate an output against principles.
779
+
780
+ Args:
781
+ output: The text to validate
782
+ context: Optional context
783
+ principles: Optional subset of principles
784
+ parallel: If True, evaluate principles in parallel
785
+
786
+ Returns:
787
+ ValidationResult with any violations found
788
+ """
789
+ self._validation_count += 1
790
+ principles_to_check = principles or self._principles
791
+ violations: list[Violation] = []
792
+
793
+ async def check_principle(principle: Principle) -> Optional[Violation]:
794
+ try:
795
+ is_violation, confidence, explanation = await self._evaluator.evaluate_async(
796
+ output, principle, context
797
+ )
798
+ if is_violation and confidence >= self._min_confidence:
799
+ return Violation(
800
+ principle=principle,
801
+ confidence=confidence,
802
+ explanation=explanation,
803
+ )
804
+ except Exception as e:
805
+ if self._fail_on_error:
806
+ return Violation(
807
+ principle=principle,
808
+ confidence=1.0,
809
+ explanation=f"Evaluator error: {e}",
810
+ )
811
+ return None
812
+
813
+ if parallel:
814
+ results = await asyncio.gather(
815
+ *[check_principle(p) for p in principles_to_check]
816
+ )
817
+ violations = [v for v in results if v is not None]
818
+ else:
819
+ for principle in principles_to_check:
820
+ violation = await check_principle(principle)
821
+ if violation:
822
+ violations.append(violation)
823
+
824
+ blocking = [v for v in violations
825
+ if v.principle.severity in (Severity.CRITICAL, Severity.HIGH)]
826
+ passed = len(blocking) == 0
827
+
828
+ return ValidationResult(
829
+ passed=passed,
830
+ violations=violations,
831
+ output_text=output,
832
+ principles_checked=len(principles_to_check),
833
+ metadata={
834
+ "validation_id": self._validation_count,
835
+ "principle_set": self._principle_set.name,
836
+ "evaluator": type(self._evaluator).__name__,
837
+ "parallel": parallel,
838
+ },
839
+ )
840
+
841
+ def get_stats(self) -> dict[str, Any]:
842
+ """Get validator statistics."""
843
+ return {
844
+ "validation_count": self._validation_count,
845
+ "principle_count": len(self._principles),
846
+ "principle_set": self._principle_set.name,
847
+ "evaluator": type(self._evaluator).__name__,
848
+ "min_confidence": self._min_confidence,
849
+ }
850
+
851
+
852
+ # ============================================================================
853
+ # Convenience Functions
854
+ # ============================================================================
855
+
856
+ def validate_safety(output: str, context: Optional[str] = None) -> ValidationResult:
857
+ """
858
+ Quick validation against safety principles.
859
+
860
+ Args:
861
+ output: Text to validate
862
+ context: Optional context
863
+
864
+ Returns:
865
+ ValidationResult
866
+ """
867
+ validator = ConstitutionalValidator(principles=SAFETY_PRINCIPLES)
868
+ return validator.validate(output, context)
869
+
870
+
871
+ def validate_medical(output: str, context: Optional[str] = None) -> ValidationResult:
872
+ """Quick validation against medical principles."""
873
+ combined = SAFETY_PRINCIPLES.merge(MEDICAL_PRINCIPLES)
874
+ validator = ConstitutionalValidator(principles=combined)
875
+ return validator.validate(output, context)
876
+
877
+
878
+ def validate_financial(output: str, context: Optional[str] = None) -> ValidationResult:
879
+ """Quick validation against financial principles."""
880
+ combined = SAFETY_PRINCIPLES.merge(FINANCIAL_PRINCIPLES)
881
+ validator = ConstitutionalValidator(principles=combined)
882
+ return validator.validate(output, context)
883
+
884
+
885
+ def create_validator_from_yaml(yaml_str: str) -> ConstitutionalValidator:
886
+ """
887
+ Create a validator from YAML configuration.
888
+
889
+ YAML format:
890
+ name: my_principles
891
+ description: Custom principles
892
+ principles:
893
+ - name: rule_1
894
+ description: First rule description
895
+ severity: high
896
+ category: safety
897
+ """
898
+ try:
899
+ import yaml
900
+ data = yaml.safe_load(yaml_str)
901
+ principle_set = PrincipleSet.from_dict(data)
902
+ return ConstitutionalValidator(principles=principle_set)
903
+ except ImportError:
904
+ raise ImportError("PyYAML is required for YAML configuration. Install with: pip install pyyaml")