agent-alignment-protocol 0.1.7__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/.gitignore +4 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/PKG-INFO +46 -13
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/README.md +45 -12
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/pyproject.toml +1 -1
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/__init__.py +26 -4
- agent_alignment_protocol-0.2.0/src/aap/compliance.py +56 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/__init__.py +15 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/api.py +268 -2
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/constants.py +9 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/models.py +105 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/LICENSE +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/schemas/alignment-card.schema.json +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/schemas/ap-trace.schema.json +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/schemas/value-coherence.schema.json +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/cli/__init__.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/cli/main.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/py.typed +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/__init__.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/alignment_card.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/ap_trace.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/value_coherence.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/tracing.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/divergence.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/features.py +0 -0
- {agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/ssm.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agent-alignment-protocol
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: Agent Alignment Protocol - The missing alignment layer for the agent protocol stack
|
|
5
5
|
Project-URL: Homepage, https://github.com/mnemom/aap
|
|
6
6
|
Project-URL: Documentation, https://github.com/mnemom/aap#readme
|
|
@@ -46,7 +46,7 @@ Description-Content-Type: text/markdown
|
|
|
46
46
|
[](https://pypi.org/project/agent-alignment-protocol/)
|
|
47
47
|
[](https://www.npmjs.com/package/@mnemom/agent-alignment-protocol)
|
|
48
48
|
[](LICENSE)
|
|
49
|
-
[](docs/
|
|
49
|
+
[](https://docs.mnemom.ai/protocols/aap/specification)
|
|
50
50
|
|
|
51
51
|
**A transparency protocol for autonomous agents.**
|
|
52
52
|
|
|
@@ -207,7 +207,7 @@ This matters. Read it.
|
|
|
207
207
|
|
|
208
208
|
5. **Tested on transformer-based agents.** Other architectures may exhibit behaviors AAP doesn't capture.
|
|
209
209
|
|
|
210
|
-
For the complete limitations disclosure, see [Section 10 of the Specification](docs/
|
|
210
|
+
For the complete limitations disclosure, see [Section 10 of the Specification](https://docs.mnemom.ai/protocols/aap/specification#10-limitations).
|
|
211
211
|
|
|
212
212
|
## Installation
|
|
213
213
|
|
|
@@ -216,7 +216,7 @@ For the complete limitations disclosure, see [Section 10 of the Specification](d
|
|
|
216
216
|
pip install agent-alignment-protocol
|
|
217
217
|
|
|
218
218
|
# TypeScript
|
|
219
|
-
npm install agent-alignment-protocol
|
|
219
|
+
npm install @mnemom/agent-alignment-protocol
|
|
220
220
|
```
|
|
221
221
|
|
|
222
222
|
## Integration
|
|
@@ -236,7 +236,7 @@ AAP extends the A2A Agent Card with an `alignment` block:
|
|
|
236
236
|
}
|
|
237
237
|
```
|
|
238
238
|
|
|
239
|
-
See [A2A
|
|
239
|
+
See [A2A Integration Guide](https://docs.mnemom.ai/protocols/aap/a2a-integration).
|
|
240
240
|
|
|
241
241
|
### With MCP Tools
|
|
242
242
|
|
|
@@ -250,7 +250,7 @@ def my_tool(params):
|
|
|
250
250
|
...
|
|
251
251
|
```
|
|
252
252
|
|
|
253
|
-
See [MCP Migration Guide](docs/mcp-migration
|
|
253
|
+
See [MCP Migration Guide](https://docs.mnemom.ai/protocols/aap/mcp-migration).
|
|
254
254
|
|
|
255
255
|
## Verification
|
|
256
256
|
|
|
@@ -280,6 +280,26 @@ for alert in alerts:
|
|
|
280
280
|
- Forbidden action compliance (no forbidden actions taken)
|
|
281
281
|
- Behavioral similarity (semantic alignment using SSM analysis)
|
|
282
282
|
|
|
283
|
+
### Action Name Matching
|
|
284
|
+
|
|
285
|
+
Bounded and forbidden action lists support flexible matching (v0.1.7+):
|
|
286
|
+
|
|
287
|
+
```json
|
|
288
|
+
{
|
|
289
|
+
"bounded_actions": [
|
|
290
|
+
"search",
|
|
291
|
+
"exec: execute shell commands",
|
|
292
|
+
"cron: manage scheduled patrol jobs"
|
|
293
|
+
]
|
|
294
|
+
}
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
- **Exact match**: `"search"` matches action name `"search"`
|
|
298
|
+
- **Colon-prefix match**: `"exec: execute shell commands"` matches action name `"exec"`
|
|
299
|
+
- **Compound actions**: action name `"exec, read"` matches when *each* component is in the bounded list
|
|
300
|
+
|
|
301
|
+
This allows descriptive entries in alignment cards while keeping verification flexible.
|
|
302
|
+
|
|
283
303
|
**Similarity scoring:** Each verification returns a `similarity_score` (0.0-1.0) measuring semantic similarity between the trace and declared alignment. If a trace passes structural checks but has `similarity_score < 0.50`, a `low_behavioral_similarity` warning is generated.
|
|
284
304
|
|
|
285
305
|
## Try It
|
|
@@ -297,24 +317,24 @@ No server required — runs entirely client-side via WebAssembly.
|
|
|
297
317
|
|
|
298
318
|
| Document | Description |
|
|
299
319
|
|----------|-------------|
|
|
300
|
-
| [**
|
|
301
|
-
| [**
|
|
302
|
-
| [**
|
|
303
|
-
| [**
|
|
304
|
-
| [**
|
|
320
|
+
| [**Specification**](https://docs.mnemom.ai/protocols/aap/specification) | Full protocol specification (IETF-style) |
|
|
321
|
+
| [**Quick Start**](https://docs.mnemom.ai/protocols/aap/quickstart) | Zero to compliant in 5 minutes |
|
|
322
|
+
| [**Limitations**](https://docs.mnemom.ai/protocols/aap/limitations) | What AAP guarantees and doesn't |
|
|
323
|
+
| [**Security**](https://docs.mnemom.ai/protocols/aap/security) | Threat model and security considerations |
|
|
324
|
+
| [**Calibration**](https://docs.mnemom.ai/protocols/aap/calibration) | How verification thresholds were derived |
|
|
305
325
|
|
|
306
326
|
## Examples
|
|
307
327
|
|
|
308
328
|
| Example | Description |
|
|
309
329
|
|---------|-------------|
|
|
310
330
|
| [`simple-agent/`](examples/simple-agent/) | Minimal AAP implementation |
|
|
311
|
-
| [`a2a-integration/`](examples/a2a-integration/) | A2A agent with AAP |
|
|
331
|
+
| [`a2a-integration/`](examples/a2a-integration/) | A2A agent with AAP (Python + TypeScript) |
|
|
312
332
|
| [`mcp-integration/`](examples/mcp-integration/) | MCP tools with alignment |
|
|
313
333
|
| [`alignment-failure/`](examples/alignment-failure/) | Deliberate failure for testing |
|
|
314
334
|
|
|
315
335
|
## Status
|
|
316
336
|
|
|
317
|
-
**Current Version**: 0.1.
|
|
337
|
+
**Current Version**: 0.1.8
|
|
318
338
|
|
|
319
339
|
| Component | Status |
|
|
320
340
|
|-----------|--------|
|
|
@@ -354,6 +374,19 @@ from aap import (
|
|
|
354
374
|
# aap drift --card CARD --traces TRACES_DIR → Uses SSM analysis
|
|
355
375
|
```
|
|
356
376
|
|
|
377
|
+
## Standards & Compliance
|
|
378
|
+
|
|
379
|
+
AAP aligns with and supports compliance for the following international standards and regulatory frameworks:
|
|
380
|
+
|
|
381
|
+
| Standard | Relevance to AAP |
|
|
382
|
+
|----------|-----------------|
|
|
383
|
+
| **[ISO/IEC 42001:2023](https://www.iso.org/standard/42001)** — AI Management Systems | Alignment Card provides the structured AI system documentation required by 42001 management systems |
|
|
384
|
+
| **[ISO/IEC 42005:2025](https://www.iso.org/standard/42005)** — AI System Impact Assessment | AP-Trace and drift detection support ongoing impact assessment and monitoring |
|
|
385
|
+
| **[IEEE 7001-2021](https://standards.ieee.org/ieee/7001/6929/)** — Transparency of Autonomous Systems | AAP's core design goal — making agent decisions observable — directly implements IEEE 7001 transparency requirements |
|
|
386
|
+
| **[IEEE 3152-2024](https://standards.ieee.org/ieee/3152/11718/)** — Transparent Human and Machine Agency Identification | Alignment Card `agent_id`, `principal` block, and relationship types map to IEEE 3152 agency identification |
|
|
387
|
+
| **[Singapore IMDA Model AI Governance Framework for Agentic AI](https://www.imda.gov.sg/-/media/imda/files/about/emerging-tech-and-research/artificial-intelligence/mgf-for-agentic-ai.pdf)** (Jan 2026) | Alignment Card + Value Coherence Handshake address IMDA's agentic AI governance principles for multi-agent coordination |
|
|
388
|
+
| **[EU AI Act Article 50](https://artificialintelligenceact.eu/article/50/)** — Transparency Obligations (enforcement Aug 2026) | Alignment Card `principal` + disclosure fields, AP-Trace structured audit trails, and `audit_commitment.retention_days` support Article 50 compliance. See [EU AI Act Compliance Guide](https://docs.mnemom.ai/guides/eu-compliance) |
|
|
389
|
+
|
|
357
390
|
## Contributing
|
|
358
391
|
|
|
359
392
|
We welcome contributions. See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
[](https://pypi.org/project/agent-alignment-protocol/)
|
|
7
7
|
[](https://www.npmjs.com/package/@mnemom/agent-alignment-protocol)
|
|
8
8
|
[](LICENSE)
|
|
9
|
-
[](docs/
|
|
9
|
+
[](https://docs.mnemom.ai/protocols/aap/specification)
|
|
10
10
|
|
|
11
11
|
**A transparency protocol for autonomous agents.**
|
|
12
12
|
|
|
@@ -167,7 +167,7 @@ This matters. Read it.
|
|
|
167
167
|
|
|
168
168
|
5. **Tested on transformer-based agents.** Other architectures may exhibit behaviors AAP doesn't capture.
|
|
169
169
|
|
|
170
|
-
For the complete limitations disclosure, see [Section 10 of the Specification](docs/
|
|
170
|
+
For the complete limitations disclosure, see [Section 10 of the Specification](https://docs.mnemom.ai/protocols/aap/specification#10-limitations).
|
|
171
171
|
|
|
172
172
|
## Installation
|
|
173
173
|
|
|
@@ -176,7 +176,7 @@ For the complete limitations disclosure, see [Section 10 of the Specification](d
|
|
|
176
176
|
pip install agent-alignment-protocol
|
|
177
177
|
|
|
178
178
|
# TypeScript
|
|
179
|
-
npm install agent-alignment-protocol
|
|
179
|
+
npm install @mnemom/agent-alignment-protocol
|
|
180
180
|
```
|
|
181
181
|
|
|
182
182
|
## Integration
|
|
@@ -196,7 +196,7 @@ AAP extends the A2A Agent Card with an `alignment` block:
|
|
|
196
196
|
}
|
|
197
197
|
```
|
|
198
198
|
|
|
199
|
-
See [A2A
|
|
199
|
+
See [A2A Integration Guide](https://docs.mnemom.ai/protocols/aap/a2a-integration).
|
|
200
200
|
|
|
201
201
|
### With MCP Tools
|
|
202
202
|
|
|
@@ -210,7 +210,7 @@ def my_tool(params):
|
|
|
210
210
|
...
|
|
211
211
|
```
|
|
212
212
|
|
|
213
|
-
See [MCP Migration Guide](docs/mcp-migration
|
|
213
|
+
See [MCP Migration Guide](https://docs.mnemom.ai/protocols/aap/mcp-migration).
|
|
214
214
|
|
|
215
215
|
## Verification
|
|
216
216
|
|
|
@@ -240,6 +240,26 @@ for alert in alerts:
|
|
|
240
240
|
- Forbidden action compliance (no forbidden actions taken)
|
|
241
241
|
- Behavioral similarity (semantic alignment using SSM analysis)
|
|
242
242
|
|
|
243
|
+
### Action Name Matching
|
|
244
|
+
|
|
245
|
+
Bounded and forbidden action lists support flexible matching (v0.1.7+):
|
|
246
|
+
|
|
247
|
+
```json
|
|
248
|
+
{
|
|
249
|
+
"bounded_actions": [
|
|
250
|
+
"search",
|
|
251
|
+
"exec: execute shell commands",
|
|
252
|
+
"cron: manage scheduled patrol jobs"
|
|
253
|
+
]
|
|
254
|
+
}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
- **Exact match**: `"search"` matches action name `"search"`
|
|
258
|
+
- **Colon-prefix match**: `"exec: execute shell commands"` matches action name `"exec"`
|
|
259
|
+
- **Compound actions**: action name `"exec, read"` matches when *each* component is in the bounded list
|
|
260
|
+
|
|
261
|
+
This allows descriptive entries in alignment cards while keeping verification flexible.
|
|
262
|
+
|
|
243
263
|
**Similarity scoring:** Each verification returns a `similarity_score` (0.0-1.0) measuring semantic similarity between the trace and declared alignment. If a trace passes structural checks but has `similarity_score < 0.50`, a `low_behavioral_similarity` warning is generated.
|
|
244
264
|
|
|
245
265
|
## Try It
|
|
@@ -257,24 +277,24 @@ No server required — runs entirely client-side via WebAssembly.
|
|
|
257
277
|
|
|
258
278
|
| Document | Description |
|
|
259
279
|
|----------|-------------|
|
|
260
|
-
| [**
|
|
261
|
-
| [**
|
|
262
|
-
| [**
|
|
263
|
-
| [**
|
|
264
|
-
| [**
|
|
280
|
+
| [**Specification**](https://docs.mnemom.ai/protocols/aap/specification) | Full protocol specification (IETF-style) |
|
|
281
|
+
| [**Quick Start**](https://docs.mnemom.ai/protocols/aap/quickstart) | Zero to compliant in 5 minutes |
|
|
282
|
+
| [**Limitations**](https://docs.mnemom.ai/protocols/aap/limitations) | What AAP guarantees and doesn't |
|
|
283
|
+
| [**Security**](https://docs.mnemom.ai/protocols/aap/security) | Threat model and security considerations |
|
|
284
|
+
| [**Calibration**](https://docs.mnemom.ai/protocols/aap/calibration) | How verification thresholds were derived |
|
|
265
285
|
|
|
266
286
|
## Examples
|
|
267
287
|
|
|
268
288
|
| Example | Description |
|
|
269
289
|
|---------|-------------|
|
|
270
290
|
| [`simple-agent/`](examples/simple-agent/) | Minimal AAP implementation |
|
|
271
|
-
| [`a2a-integration/`](examples/a2a-integration/) | A2A agent with AAP |
|
|
291
|
+
| [`a2a-integration/`](examples/a2a-integration/) | A2A agent with AAP (Python + TypeScript) |
|
|
272
292
|
| [`mcp-integration/`](examples/mcp-integration/) | MCP tools with alignment |
|
|
273
293
|
| [`alignment-failure/`](examples/alignment-failure/) | Deliberate failure for testing |
|
|
274
294
|
|
|
275
295
|
## Status
|
|
276
296
|
|
|
277
|
-
**Current Version**: 0.1.
|
|
297
|
+
**Current Version**: 0.1.8
|
|
278
298
|
|
|
279
299
|
| Component | Status |
|
|
280
300
|
|-----------|--------|
|
|
@@ -314,6 +334,19 @@ from aap import (
|
|
|
314
334
|
# aap drift --card CARD --traces TRACES_DIR → Uses SSM analysis
|
|
315
335
|
```
|
|
316
336
|
|
|
337
|
+
## Standards & Compliance
|
|
338
|
+
|
|
339
|
+
AAP aligns with and supports compliance for the following international standards and regulatory frameworks:
|
|
340
|
+
|
|
341
|
+
| Standard | Relevance to AAP |
|
|
342
|
+
|----------|-----------------|
|
|
343
|
+
| **[ISO/IEC 42001:2023](https://www.iso.org/standard/42001)** — AI Management Systems | Alignment Card provides the structured AI system documentation required by 42001 management systems |
|
|
344
|
+
| **[ISO/IEC 42005:2025](https://www.iso.org/standard/42005)** — AI System Impact Assessment | AP-Trace and drift detection support ongoing impact assessment and monitoring |
|
|
345
|
+
| **[IEEE 7001-2021](https://standards.ieee.org/ieee/7001/6929/)** — Transparency of Autonomous Systems | AAP's core design goal — making agent decisions observable — directly implements IEEE 7001 transparency requirements |
|
|
346
|
+
| **[IEEE 3152-2024](https://standards.ieee.org/ieee/3152/11718/)** — Transparent Human and Machine Agency Identification | Alignment Card `agent_id`, `principal` block, and relationship types map to IEEE 3152 agency identification |
|
|
347
|
+
| **[Singapore IMDA Model AI Governance Framework for Agentic AI](https://www.imda.gov.sg/-/media/imda/files/about/emerging-tech-and-research/artificial-intelligence/mgf-for-agentic-ai.pdf)** (Jan 2026) | Alignment Card + Value Coherence Handshake address IMDA's agentic AI governance principles for multi-agent coordination |
|
|
348
|
+
| **[EU AI Act Article 50](https://artificialintelligenceact.eu/article/50/)** — Transparency Obligations (enforcement Aug 2026) | Alignment Card `principal` + disclosure fields, AP-Trace structured audit trails, and `audit_commitment.retention_days` support Article 50 compliance. See [EU AI Act Compliance Guide](https://docs.mnemom.ai/guides/eu-compliance) |
|
|
349
|
+
|
|
317
350
|
## Contributing
|
|
318
351
|
|
|
319
352
|
We welcome contributions. See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "agent-alignment-protocol"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "Agent Alignment Protocol - The missing alignment layer for the agent protocol stack"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -38,16 +38,19 @@ See docs/SPEC.md for the full protocol specification.
|
|
|
38
38
|
|
|
39
39
|
__version__ = "0.1.0"
|
|
40
40
|
|
|
41
|
-
#
|
|
41
|
+
# EU AI Act compliance presets
|
|
42
|
+
from aap.compliance import (
|
|
43
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
44
|
+
EU_COMPLIANCE_EXTENSIONS,
|
|
45
|
+
EU_COMPLIANCE_VALUES,
|
|
46
|
+
)
|
|
47
|
+
|
|
42
48
|
# Schema models
|
|
43
49
|
from aap.schemas import (
|
|
44
|
-
# AP-Trace
|
|
45
50
|
Action,
|
|
46
51
|
ActionCategory,
|
|
47
52
|
ActionType,
|
|
48
|
-
# Alignment Card
|
|
49
53
|
AlignmentCard,
|
|
50
|
-
# Value Coherence Handshake
|
|
51
54
|
AlignmentCardRequest,
|
|
52
55
|
AlignmentCardResponse,
|
|
53
56
|
Alternative,
|
|
@@ -82,20 +85,27 @@ from aap.tracing import (
|
|
|
82
85
|
|
|
83
86
|
# Verification result models
|
|
84
87
|
from aap.verification import (
|
|
88
|
+
AgentCoherenceSummary,
|
|
85
89
|
CoherenceResult,
|
|
86
90
|
DriftAlert,
|
|
87
91
|
DriftAnalysis,
|
|
88
92
|
DriftDirection,
|
|
89
93
|
DriftIndicator,
|
|
94
|
+
FleetCluster,
|
|
95
|
+
FleetCoherenceResult,
|
|
96
|
+
FleetOutlier,
|
|
97
|
+
PairwiseEntry,
|
|
90
98
|
Severity,
|
|
91
99
|
ValueAlignment,
|
|
92
100
|
ValueConflict,
|
|
101
|
+
ValueDivergence,
|
|
93
102
|
VerificationMetadata,
|
|
94
103
|
VerificationResult,
|
|
95
104
|
Violation,
|
|
96
105
|
ViolationType,
|
|
97
106
|
Warning,
|
|
98
107
|
check_coherence,
|
|
108
|
+
check_fleet_coherence,
|
|
99
109
|
detect_drift,
|
|
100
110
|
verify_trace,
|
|
101
111
|
)
|
|
@@ -106,6 +116,7 @@ __all__ = [
|
|
|
106
116
|
# Core API
|
|
107
117
|
"verify_trace",
|
|
108
118
|
"check_coherence",
|
|
119
|
+
"check_fleet_coherence",
|
|
109
120
|
"detect_drift",
|
|
110
121
|
# Tracing Decorators
|
|
111
122
|
"trace_decision",
|
|
@@ -126,6 +137,13 @@ __all__ = [
|
|
|
126
137
|
"CoherenceResult",
|
|
127
138
|
"ValueAlignment",
|
|
128
139
|
"ValueConflict",
|
|
140
|
+
# Fleet Coherence (E-05)
|
|
141
|
+
"FleetCoherenceResult",
|
|
142
|
+
"PairwiseEntry",
|
|
143
|
+
"FleetOutlier",
|
|
144
|
+
"FleetCluster",
|
|
145
|
+
"ValueDivergence",
|
|
146
|
+
"AgentCoherenceSummary",
|
|
129
147
|
"DriftAlert",
|
|
130
148
|
"DriftAnalysis",
|
|
131
149
|
"DriftDirection",
|
|
@@ -155,4 +173,8 @@ __all__ = [
|
|
|
155
173
|
"ValueCoherenceCheck",
|
|
156
174
|
"CoherenceResultMessage",
|
|
157
175
|
"ProposedCollaboration",
|
|
176
|
+
# EU AI Act Compliance
|
|
177
|
+
"EU_COMPLIANCE_AUDIT_COMMITMENT",
|
|
178
|
+
"EU_COMPLIANCE_EXTENSIONS",
|
|
179
|
+
"EU_COMPLIANCE_VALUES",
|
|
158
180
|
]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""EU AI Act Article 50 compliance presets for AAP.
|
|
2
|
+
|
|
3
|
+
These presets provide recommended configuration values for deploying
|
|
4
|
+
AAP-instrumented agents in EU jurisdictions subject to AI Act
|
|
5
|
+
transparency obligations. Spread them into your AlignmentCard fields.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from aap.compliance import (
|
|
9
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
10
|
+
EU_COMPLIANCE_EXTENSIONS,
|
|
11
|
+
EU_COMPLIANCE_VALUES,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
card = AlignmentCard(
|
|
15
|
+
...,
|
|
16
|
+
audit_commitment=AuditCommitment(**EU_COMPLIANCE_AUDIT_COMMITMENT),
|
|
17
|
+
values=Values(declared=EU_COMPLIANCE_VALUES, ...),
|
|
18
|
+
extensions=EU_COMPLIANCE_EXTENSIONS,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
DISCLAIMER: These presets reflect a technical mapping of AAP features to
|
|
22
|
+
Article 50 requirements. They do not constitute legal advice. Consult
|
|
23
|
+
qualified legal counsel for your specific compliance obligations.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
# Audit commitment values that satisfy Article 50(4) audit trail requirements.
|
|
29
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT: dict = {
|
|
30
|
+
"retention_days": 90,
|
|
31
|
+
"queryable": True,
|
|
32
|
+
"query_endpoint": "https://audit.example.com/traces",
|
|
33
|
+
"tamper_evidence": "append_only",
|
|
34
|
+
"trace_format": "ap-trace-v1",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Extension block for EU AI Act metadata on the Alignment Card.
|
|
38
|
+
EU_COMPLIANCE_EXTENSIONS: dict = {
|
|
39
|
+
"eu_ai_act": {
|
|
40
|
+
"article_50_compliant": True,
|
|
41
|
+
"ai_system_classification": "general_purpose",
|
|
42
|
+
"disclosure_text": (
|
|
43
|
+
"This system is powered by an AI agent. Its decisions are logged "
|
|
44
|
+
"and auditable. You may request a human review of any decision."
|
|
45
|
+
),
|
|
46
|
+
"compliance_version": "2026-08",
|
|
47
|
+
},
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Recommended declared values for Article 50 transparency obligations.
|
|
51
|
+
EU_COMPLIANCE_VALUES: list[str] = [
|
|
52
|
+
"transparency",
|
|
53
|
+
"honesty",
|
|
54
|
+
"user_control",
|
|
55
|
+
"principal_benefit",
|
|
56
|
+
]
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/__init__.py
RENAMED
|
@@ -36,6 +36,7 @@ See SPEC.md Sections 7, 6.4, and 8 for protocol specification.
|
|
|
36
36
|
|
|
37
37
|
from aap.verification.api import (
|
|
38
38
|
check_coherence,
|
|
39
|
+
check_fleet_coherence,
|
|
39
40
|
detect_drift,
|
|
40
41
|
verify_trace,
|
|
41
42
|
)
|
|
@@ -57,14 +58,20 @@ from aap.verification.features import (
|
|
|
57
58
|
cosine_similarity,
|
|
58
59
|
)
|
|
59
60
|
from aap.verification.models import (
|
|
61
|
+
AgentCoherenceSummary,
|
|
60
62
|
CoherenceResult,
|
|
61
63
|
DriftAlert,
|
|
62
64
|
DriftAnalysis,
|
|
63
65
|
DriftDirection,
|
|
64
66
|
DriftIndicator,
|
|
67
|
+
FleetCluster,
|
|
68
|
+
FleetCoherenceResult,
|
|
69
|
+
FleetOutlier,
|
|
70
|
+
PairwiseEntry,
|
|
65
71
|
Severity,
|
|
66
72
|
ValueAlignment,
|
|
67
73
|
ValueConflict,
|
|
74
|
+
ValueDivergence,
|
|
68
75
|
VerificationMetadata,
|
|
69
76
|
VerificationResult,
|
|
70
77
|
Violation,
|
|
@@ -80,6 +87,7 @@ __all__ = [
|
|
|
80
87
|
# Core functions
|
|
81
88
|
"verify_trace",
|
|
82
89
|
"check_coherence",
|
|
90
|
+
"check_fleet_coherence",
|
|
83
91
|
"detect_drift",
|
|
84
92
|
# SSM and Divergence (Phase 1 Braid extraction)
|
|
85
93
|
"SSMAnalyzer",
|
|
@@ -91,6 +99,13 @@ __all__ = [
|
|
|
91
99
|
"compute_centroid",
|
|
92
100
|
"cosine_similarity",
|
|
93
101
|
"compute_similarity_with_tfidf",
|
|
102
|
+
# Fleet Coherence (E-05)
|
|
103
|
+
"FleetCoherenceResult",
|
|
104
|
+
"PairwiseEntry",
|
|
105
|
+
"FleetOutlier",
|
|
106
|
+
"FleetCluster",
|
|
107
|
+
"ValueDivergence",
|
|
108
|
+
"AgentCoherenceSummary",
|
|
94
109
|
# Result models
|
|
95
110
|
"VerificationResult",
|
|
96
111
|
"Violation",
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/api.py
RENAMED
|
@@ -10,8 +10,10 @@ See SPEC.md Sections 7, 6.4, and 8 for protocol specification.
|
|
|
10
10
|
|
|
11
11
|
from __future__ import annotations
|
|
12
12
|
|
|
13
|
+
import math
|
|
13
14
|
import re
|
|
14
15
|
import time
|
|
16
|
+
from collections import defaultdict
|
|
15
17
|
from datetime import datetime
|
|
16
18
|
from typing import Any
|
|
17
19
|
|
|
@@ -23,14 +25,21 @@ from aap.verification.constants import (
|
|
|
23
25
|
DEFAULT_SUSTAINED_TURNS_THRESHOLD,
|
|
24
26
|
MIN_COHERENCE_FOR_PROCEED,
|
|
25
27
|
NEAR_BOUNDARY_THRESHOLD,
|
|
28
|
+
OUTLIER_STD_DEV_THRESHOLD,
|
|
26
29
|
)
|
|
27
30
|
from aap.verification.models import (
|
|
31
|
+
AgentCoherenceSummary,
|
|
28
32
|
CoherenceResult,
|
|
29
33
|
DriftAlert,
|
|
30
34
|
DriftDirection,
|
|
31
35
|
DriftIndicator,
|
|
36
|
+
FleetCluster,
|
|
37
|
+
FleetCoherenceResult,
|
|
38
|
+
FleetOutlier,
|
|
39
|
+
PairwiseEntry,
|
|
32
40
|
ValueAlignment,
|
|
33
41
|
ValueConflict,
|
|
42
|
+
ValueDivergence,
|
|
34
43
|
VerificationMetadata,
|
|
35
44
|
VerificationResult,
|
|
36
45
|
Violation,
|
|
@@ -39,6 +48,42 @@ from aap.verification.models import (
|
|
|
39
48
|
)
|
|
40
49
|
|
|
41
50
|
|
|
51
|
+
def action_matches_list(action_name: str, action_list: list[str]) -> bool:
|
|
52
|
+
"""Check if a (possibly compound) action name matches any entry in a list.
|
|
53
|
+
|
|
54
|
+
Supports exact match, prefix match (before ':'), and compound name splitting.
|
|
55
|
+
Port of the TypeScript actionMatchesList() for SDK parity.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
action_name: Action name, possibly compound (e.g. "exec, read")
|
|
59
|
+
action_list: List of allowed/forbidden action entries,
|
|
60
|
+
possibly with colon descriptions (e.g. "exec: execute shell commands")
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
True if the action name matches an entry in the list
|
|
64
|
+
"""
|
|
65
|
+
components = action_name.split(", ") if ", " in action_name else [action_name]
|
|
66
|
+
|
|
67
|
+
return all(
|
|
68
|
+
_action_component_matches(component.strip(), action_list)
|
|
69
|
+
for component in components
|
|
70
|
+
if component.strip()
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _action_component_matches(component: str, action_list: list[str]) -> bool:
|
|
75
|
+
"""Check if a single action component matches any entry in the list."""
|
|
76
|
+
for entry in action_list:
|
|
77
|
+
if entry == component:
|
|
78
|
+
return True
|
|
79
|
+
colon_index = entry.find(":")
|
|
80
|
+
if colon_index > 0:
|
|
81
|
+
prefix = entry[:colon_index].strip()
|
|
82
|
+
if prefix == component:
|
|
83
|
+
return True
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
|
|
42
87
|
def verify_trace(
|
|
43
88
|
trace: dict[str, Any],
|
|
44
89
|
card: dict[str, Any],
|
|
@@ -103,7 +148,7 @@ def verify_trace(
|
|
|
103
148
|
|
|
104
149
|
if action_category == "bounded":
|
|
105
150
|
bounded_actions = envelope.get("bounded_actions", [])
|
|
106
|
-
if action_name and
|
|
151
|
+
if action_name and not action_matches_list(action_name, bounded_actions):
|
|
107
152
|
violations.append(Violation.create(
|
|
108
153
|
ViolationType.UNBOUNDED_ACTION,
|
|
109
154
|
f"Action '{action_name}' not in bounded_actions: {bounded_actions}",
|
|
@@ -113,7 +158,7 @@ def verify_trace(
|
|
|
113
158
|
# Check forbidden actions
|
|
114
159
|
checks_performed.append("forbidden")
|
|
115
160
|
forbidden_actions = envelope.get("forbidden_actions", [])
|
|
116
|
-
if action_name and action_name
|
|
161
|
+
if action_name and action_matches_list(action_name, forbidden_actions):
|
|
117
162
|
violations.append(Violation.create(
|
|
118
163
|
ViolationType.FORBIDDEN_ACTION,
|
|
119
164
|
f"Action '{action_name}' is in forbidden_actions",
|
|
@@ -298,6 +343,227 @@ def check_coherence(
|
|
|
298
343
|
)
|
|
299
344
|
|
|
300
345
|
|
|
346
|
+
def check_fleet_coherence(
|
|
347
|
+
cards: list[dict[str, Any]],
|
|
348
|
+
task_values: list[str] | None = None,
|
|
349
|
+
) -> FleetCoherenceResult:
|
|
350
|
+
"""Check fleet-level value coherence across N agents.
|
|
351
|
+
|
|
352
|
+
Computes all C(n,2) pairwise coherence scores, then derives:
|
|
353
|
+
- Fleet score: mean of all pairwise scores
|
|
354
|
+
- Outlier detection: agents >1 std dev below fleet mean
|
|
355
|
+
- Cluster analysis: connected components at compatibility threshold
|
|
356
|
+
- Divergence report: values where agents disagree
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
cards: List of dicts with "agent_id" and "card" keys
|
|
360
|
+
task_values: Optional list of values required for the task
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
FleetCoherenceResult with full analysis
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
ValueError: If fewer than 2 agents provided
|
|
367
|
+
"""
|
|
368
|
+
if len(cards) < 2:
|
|
369
|
+
raise ValueError("Fleet coherence requires at least 2 agents")
|
|
370
|
+
|
|
371
|
+
agent_ids = [c["agent_id"] for c in cards]
|
|
372
|
+
|
|
373
|
+
# Step 1: Compute all pairwise coherence scores
|
|
374
|
+
pairwise_matrix: list[PairwiseEntry] = []
|
|
375
|
+
for i in range(len(cards)):
|
|
376
|
+
for j in range(i + 1, len(cards)):
|
|
377
|
+
result = check_coherence(cards[i]["card"], cards[j]["card"], task_values)
|
|
378
|
+
pairwise_matrix.append(PairwiseEntry(
|
|
379
|
+
agent_a=cards[i]["agent_id"],
|
|
380
|
+
agent_b=cards[j]["agent_id"],
|
|
381
|
+
result=result,
|
|
382
|
+
))
|
|
383
|
+
|
|
384
|
+
# Step 2: Fleet score (mean of all pairwise scores) + min/max
|
|
385
|
+
all_scores = [p.result.score for p in pairwise_matrix]
|
|
386
|
+
fleet_score = sum(all_scores) / len(all_scores)
|
|
387
|
+
min_pair_score = min(all_scores)
|
|
388
|
+
max_pair_score = max(all_scores)
|
|
389
|
+
|
|
390
|
+
# Step 3: Per-agent summaries
|
|
391
|
+
agent_scores: dict[str, list[float]] = defaultdict(list)
|
|
392
|
+
agent_compatible: dict[str, int] = defaultdict(int)
|
|
393
|
+
agent_conflict: dict[str, int] = defaultdict(int)
|
|
394
|
+
|
|
395
|
+
for pair in pairwise_matrix:
|
|
396
|
+
agent_scores[pair.agent_a].append(pair.result.score)
|
|
397
|
+
agent_scores[pair.agent_b].append(pair.result.score)
|
|
398
|
+
if pair.result.compatible:
|
|
399
|
+
agent_compatible[pair.agent_a] += 1
|
|
400
|
+
agent_compatible[pair.agent_b] += 1
|
|
401
|
+
if len(pair.result.value_alignment.conflicts) > 0:
|
|
402
|
+
agent_conflict[pair.agent_a] += 1
|
|
403
|
+
agent_conflict[pair.agent_b] += 1
|
|
404
|
+
|
|
405
|
+
agent_means: dict[str, float] = {}
|
|
406
|
+
for aid in agent_ids:
|
|
407
|
+
scores = agent_scores[aid]
|
|
408
|
+
agent_means[aid] = sum(scores) / len(scores) if scores else 0.0
|
|
409
|
+
|
|
410
|
+
# Step 4: Outlier detection
|
|
411
|
+
mean_values = list(agent_means.values())
|
|
412
|
+
fleet_mean_of_means = sum(mean_values) / len(mean_values)
|
|
413
|
+
variance = sum((v - fleet_mean_of_means) ** 2 for v in mean_values) / len(mean_values)
|
|
414
|
+
stddev = math.sqrt(variance)
|
|
415
|
+
|
|
416
|
+
outliers: list[FleetOutlier] = []
|
|
417
|
+
if stddev > 0 and len(agent_ids) >= 3:
|
|
418
|
+
for aid in agent_ids:
|
|
419
|
+
agent_mean = agent_means[aid]
|
|
420
|
+
deviation = (fleet_mean_of_means - agent_mean) / stddev
|
|
421
|
+
if deviation >= OUTLIER_STD_DEV_THRESHOLD:
|
|
422
|
+
primary_conflicts: set[str] = set()
|
|
423
|
+
for pair in pairwise_matrix:
|
|
424
|
+
if pair.agent_a == aid or pair.agent_b == aid:
|
|
425
|
+
for conflict in pair.result.value_alignment.conflicts:
|
|
426
|
+
if conflict.initiator_value != "(conflicts_with)":
|
|
427
|
+
primary_conflicts.add(conflict.initiator_value)
|
|
428
|
+
if conflict.responder_value != "(conflicts_with)":
|
|
429
|
+
primary_conflicts.add(conflict.responder_value)
|
|
430
|
+
outliers.append(FleetOutlier(
|
|
431
|
+
agent_id=aid,
|
|
432
|
+
agent_mean_score=round(agent_mean, 4),
|
|
433
|
+
fleet_mean_score=round(fleet_mean_of_means, 4),
|
|
434
|
+
deviation=round(deviation, 4),
|
|
435
|
+
primary_conflicts=sorted(primary_conflicts),
|
|
436
|
+
))
|
|
437
|
+
|
|
438
|
+
# Step 5: Cluster analysis (connected components at compatibility threshold)
|
|
439
|
+
adjacency: dict[str, set[str]] = {aid: set() for aid in agent_ids}
|
|
440
|
+
for pair in pairwise_matrix:
|
|
441
|
+
if pair.result.compatible:
|
|
442
|
+
adjacency[pair.agent_a].add(pair.agent_b)
|
|
443
|
+
adjacency[pair.agent_b].add(pair.agent_a)
|
|
444
|
+
|
|
445
|
+
visited: set[str] = set()
|
|
446
|
+
clusters: list[FleetCluster] = []
|
|
447
|
+
cluster_id = 0
|
|
448
|
+
|
|
449
|
+
for aid in agent_ids:
|
|
450
|
+
if aid in visited:
|
|
451
|
+
continue
|
|
452
|
+
component: list[str] = []
|
|
453
|
+
queue = [aid]
|
|
454
|
+
visited.add(aid)
|
|
455
|
+
while queue:
|
|
456
|
+
current = queue.pop(0)
|
|
457
|
+
component.append(current)
|
|
458
|
+
for neighbor in adjacency[current]:
|
|
459
|
+
if neighbor not in visited:
|
|
460
|
+
visited.add(neighbor)
|
|
461
|
+
queue.append(neighbor)
|
|
462
|
+
|
|
463
|
+
# Compute internal coherence
|
|
464
|
+
internal_sum = 0.0
|
|
465
|
+
internal_count = 0
|
|
466
|
+
for ci in range(len(component)):
|
|
467
|
+
for cj in range(ci + 1, len(component)):
|
|
468
|
+
for pair in pairwise_matrix:
|
|
469
|
+
if ((pair.agent_a == component[ci] and pair.agent_b == component[cj]) or
|
|
470
|
+
(pair.agent_a == component[cj] and pair.agent_b == component[ci])):
|
|
471
|
+
internal_sum += pair.result.score
|
|
472
|
+
internal_count += 1
|
|
473
|
+
break
|
|
474
|
+
internal_coherence = internal_sum / internal_count if internal_count > 0 else 1.0
|
|
475
|
+
|
|
476
|
+
# Find shared values
|
|
477
|
+
cluster_cards_list = [c for c in cards if c["agent_id"] in component]
|
|
478
|
+
shared: set[str] | None = None
|
|
479
|
+
for entry in cluster_cards_list:
|
|
480
|
+
declared = set(entry["card"].get("values", {}).get("declared", []))
|
|
481
|
+
shared = declared if shared is None else shared & declared
|
|
482
|
+
shared_values = sorted(shared or set())
|
|
483
|
+
|
|
484
|
+
# Find distinguishing values
|
|
485
|
+
other_values: set[str] = set()
|
|
486
|
+
for entry in cards:
|
|
487
|
+
if entry["agent_id"] not in component:
|
|
488
|
+
for v in entry["card"].get("values", {}).get("declared", []):
|
|
489
|
+
other_values.add(v)
|
|
490
|
+
distinguishing = [v for v in shared_values if v not in other_values]
|
|
491
|
+
|
|
492
|
+
clusters.append(FleetCluster(
|
|
493
|
+
cluster_id=cluster_id,
|
|
494
|
+
agent_ids=component,
|
|
495
|
+
internal_coherence=round(internal_coherence, 4),
|
|
496
|
+
shared_values=shared_values,
|
|
497
|
+
distinguishing_values=distinguishing,
|
|
498
|
+
))
|
|
499
|
+
cluster_id += 1
|
|
500
|
+
|
|
501
|
+
# Step 6: Divergence report
|
|
502
|
+
all_values: set[str] = set()
|
|
503
|
+
agent_value_map: dict[str, set[str]] = {}
|
|
504
|
+
agent_conflict_map: dict[str, set[str]] = {}
|
|
505
|
+
|
|
506
|
+
for entry in cards:
|
|
507
|
+
declared = set(entry["card"].get("values", {}).get("declared", []))
|
|
508
|
+
conflicts_with = set(entry["card"].get("values", {}).get("conflicts_with", []))
|
|
509
|
+
agent_value_map[entry["agent_id"]] = declared
|
|
510
|
+
agent_conflict_map[entry["agent_id"]] = conflicts_with
|
|
511
|
+
all_values |= declared
|
|
512
|
+
|
|
513
|
+
divergence_report: list[ValueDivergence] = []
|
|
514
|
+
for value in all_values:
|
|
515
|
+
declaring = [aid for aid in agent_ids if value in agent_value_map[aid]]
|
|
516
|
+
missing = [aid for aid in agent_ids if value not in agent_value_map[aid]]
|
|
517
|
+
conflicting = [aid for aid in agent_ids if value in agent_conflict_map[aid]]
|
|
518
|
+
|
|
519
|
+
if not missing and not conflicting:
|
|
520
|
+
continue
|
|
521
|
+
|
|
522
|
+
impact = round((len(missing) + len(conflicting)) / len(agent_ids), 4)
|
|
523
|
+
divergence_report.append(ValueDivergence(
|
|
524
|
+
value=value,
|
|
525
|
+
agents_declaring=declaring,
|
|
526
|
+
agents_missing=missing,
|
|
527
|
+
agents_conflicting=conflicting,
|
|
528
|
+
impact_on_fleet_score=impact,
|
|
529
|
+
))
|
|
530
|
+
|
|
531
|
+
divergence_report.sort(key=lambda d: d.impact_on_fleet_score, reverse=True)
|
|
532
|
+
|
|
533
|
+
# Build agent cluster map
|
|
534
|
+
agent_cluster_map: dict[str, int] = {}
|
|
535
|
+
for cluster in clusters:
|
|
536
|
+
for aid in cluster.agent_ids:
|
|
537
|
+
agent_cluster_map[aid] = cluster.cluster_id
|
|
538
|
+
|
|
539
|
+
outlier_ids = {o.agent_id for o in outliers}
|
|
540
|
+
|
|
541
|
+
agent_summaries = [
|
|
542
|
+
AgentCoherenceSummary(
|
|
543
|
+
agent_id=aid,
|
|
544
|
+
mean_score=round(agent_means[aid], 4),
|
|
545
|
+
compatible_count=agent_compatible[aid],
|
|
546
|
+
conflict_count=agent_conflict[aid],
|
|
547
|
+
cluster_id=agent_cluster_map.get(aid, 0),
|
|
548
|
+
is_outlier=aid in outlier_ids,
|
|
549
|
+
)
|
|
550
|
+
for aid in agent_ids
|
|
551
|
+
]
|
|
552
|
+
|
|
553
|
+
return FleetCoherenceResult(
|
|
554
|
+
fleet_score=round(fleet_score, 4),
|
|
555
|
+
min_pair_score=round(min_pair_score, 4),
|
|
556
|
+
max_pair_score=round(max_pair_score, 4),
|
|
557
|
+
agent_count=len(cards),
|
|
558
|
+
pair_count=len(pairwise_matrix),
|
|
559
|
+
pairwise_matrix=pairwise_matrix,
|
|
560
|
+
outliers=outliers,
|
|
561
|
+
clusters=clusters,
|
|
562
|
+
divergence_report=divergence_report,
|
|
563
|
+
agent_summaries=agent_summaries,
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
|
|
301
567
|
def detect_drift(
|
|
302
568
|
card: dict[str, Any],
|
|
303
569
|
traces: list[dict[str, Any]],
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/constants.py
RENAMED
|
@@ -38,6 +38,15 @@ MIN_COHERENCE_FOR_PROCEED: float = 0.70
|
|
|
38
38
|
CONFLICT_PENALTY_MULTIPLIER: float = 0.50
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
# Fleet Coherence
|
|
42
|
+
# ----------------
|
|
43
|
+
# Standard deviations below fleet mean to flag an agent as outlier
|
|
44
|
+
OUTLIER_STD_DEV_THRESHOLD: float = 1.0
|
|
45
|
+
|
|
46
|
+
# Minimum pairwise score to consider agents compatible for cluster analysis
|
|
47
|
+
CLUSTER_COMPATIBILITY_THRESHOLD: float = 0.70
|
|
48
|
+
|
|
49
|
+
|
|
41
50
|
# Feature Extraction
|
|
42
51
|
# ------------------
|
|
43
52
|
# Minimum word length for content features (filters noise)
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/models.py
RENAMED
|
@@ -258,3 +258,108 @@ class ValueConflict(BaseModel):
|
|
|
258
258
|
|
|
259
259
|
# Rebuild models with forward references
|
|
260
260
|
CoherenceResult.model_rebuild()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# --- Fleet Coherence Types (E-05: N-Way Value Coherence) ---
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class PairwiseEntry(BaseModel):
|
|
267
|
+
"""A single pairwise coherence entry in the fleet matrix."""
|
|
268
|
+
|
|
269
|
+
agent_a: str = Field(..., description="First agent ID")
|
|
270
|
+
agent_b: str = Field(..., description="Second agent ID")
|
|
271
|
+
result: CoherenceResult = Field(..., description="Pairwise coherence result")
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class FleetOutlier(BaseModel):
|
|
275
|
+
"""An agent flagged as an outlier in fleet coherence."""
|
|
276
|
+
|
|
277
|
+
agent_id: str = Field(..., description="Agent ID")
|
|
278
|
+
agent_mean_score: float = Field(..., description="Agent's mean pairwise score")
|
|
279
|
+
fleet_mean_score: float = Field(..., description="Fleet-wide mean score")
|
|
280
|
+
deviation: float = Field(..., description="Standard deviations below fleet mean")
|
|
281
|
+
primary_conflicts: list[str] = Field(
|
|
282
|
+
default_factory=list, description="Values causing primary conflicts"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class FleetCluster(BaseModel):
|
|
287
|
+
"""A cluster of compatible agents."""
|
|
288
|
+
|
|
289
|
+
cluster_id: int = Field(..., description="Cluster identifier")
|
|
290
|
+
agent_ids: list[str] = Field(..., description="Agent IDs in this cluster")
|
|
291
|
+
internal_coherence: float = Field(
|
|
292
|
+
..., description="Mean coherence score within the cluster"
|
|
293
|
+
)
|
|
294
|
+
shared_values: list[str] = Field(
|
|
295
|
+
default_factory=list,
|
|
296
|
+
description="Values shared by all agents in the cluster",
|
|
297
|
+
)
|
|
298
|
+
distinguishing_values: list[str] = Field(
|
|
299
|
+
default_factory=list,
|
|
300
|
+
description="Values that distinguish this cluster from others",
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class ValueDivergence(BaseModel):
|
|
305
|
+
"""A value dimension where agents diverge."""
|
|
306
|
+
|
|
307
|
+
value: str = Field(..., description="The value in question")
|
|
308
|
+
agents_declaring: list[str] = Field(
|
|
309
|
+
default_factory=list, description="Agent IDs that declare this value"
|
|
310
|
+
)
|
|
311
|
+
agents_missing: list[str] = Field(
|
|
312
|
+
default_factory=list, description="Agent IDs missing this value"
|
|
313
|
+
)
|
|
314
|
+
agents_conflicting: list[str] = Field(
|
|
315
|
+
default_factory=list,
|
|
316
|
+
description="Agent IDs whose conflicts_with includes this value",
|
|
317
|
+
)
|
|
318
|
+
impact_on_fleet_score: float = Field(
|
|
319
|
+
..., description="Estimated impact on fleet score if resolved"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class AgentCoherenceSummary(BaseModel):
|
|
324
|
+
"""Summary of one agent's position in the fleet."""
|
|
325
|
+
|
|
326
|
+
agent_id: str = Field(..., description="Agent ID")
|
|
327
|
+
mean_score: float = Field(
|
|
328
|
+
..., description="Mean pairwise score with all other agents"
|
|
329
|
+
)
|
|
330
|
+
compatible_count: int = Field(..., description="Number of compatible pairs")
|
|
331
|
+
conflict_count: int = Field(..., description="Number of conflicting pairs")
|
|
332
|
+
cluster_id: int = Field(..., description="Cluster this agent belongs to")
|
|
333
|
+
is_outlier: bool = Field(
|
|
334
|
+
..., description="Whether this agent is flagged as an outlier"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class FleetCoherenceResult(BaseModel):
|
|
339
|
+
"""Result of N-way fleet coherence analysis."""
|
|
340
|
+
|
|
341
|
+
fleet_score: float = Field(
|
|
342
|
+
..., description="Mean of all pairwise coherence scores"
|
|
343
|
+
)
|
|
344
|
+
min_pair_score: float = Field(
|
|
345
|
+
..., description="Minimum pairwise score (weakest link)"
|
|
346
|
+
)
|
|
347
|
+
max_pair_score: float = Field(..., description="Maximum pairwise score")
|
|
348
|
+
agent_count: int = Field(..., description="Number of agents analyzed")
|
|
349
|
+
pair_count: int = Field(..., description="Number of pairwise comparisons")
|
|
350
|
+
pairwise_matrix: list[PairwiseEntry] = Field(
|
|
351
|
+
..., description="All pairwise coherence results"
|
|
352
|
+
)
|
|
353
|
+
outliers: list[FleetOutlier] = Field(
|
|
354
|
+
default_factory=list, description="Agents flagged as outliers"
|
|
355
|
+
)
|
|
356
|
+
clusters: list[FleetCluster] = Field(
|
|
357
|
+
default_factory=list, description="Clusters of compatible agents"
|
|
358
|
+
)
|
|
359
|
+
divergence_report: list[ValueDivergence] = Field(
|
|
360
|
+
default_factory=list,
|
|
361
|
+
description="Value dimensions where agents diverge",
|
|
362
|
+
)
|
|
363
|
+
agent_summaries: list[AgentCoherenceSummary] = Field(
|
|
364
|
+
default_factory=list, description="Per-agent coherence summaries"
|
|
365
|
+
)
|
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/schemas/alignment-card.schema.json
RENAMED
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/schemas/ap-trace.schema.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/__init__.py
RENAMED
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/alignment_card.py
RENAMED
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/ap_trace.py
RENAMED
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/schemas/value_coherence.py
RENAMED
|
File without changes
|
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/divergence.py
RENAMED
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/features.py
RENAMED
|
File without changes
|
{agent_alignment_protocol-0.1.7 → agent_alignment_protocol-0.2.0}/src/aap/verification/ssm.py
RENAMED
|
File without changes
|