@mnemom/agent-alignment-protocol 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.mts +156 -1
- package/dist/index.d.ts +156 -1
- package/dist/index.js +235 -0
- package/dist/index.mjs +229 -0
- package/package.json +1 -1
- package/src/compliance.ts +56 -0
- package/src/constants.ts +8 -0
- package/src/index.ts +15 -1
- package/src/verification/api.ts +266 -0
- package/src/verification/models.ts +94 -0
package/dist/index.mjs
CHANGED
|
@@ -6,6 +6,8 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
|
|
|
6
6
|
var CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
7
7
|
var MIN_WORD_LENGTH = 3;
|
|
8
8
|
var MAX_TFIDF_FEATURES = 500;
|
|
9
|
+
var OUTLIER_STD_DEV_THRESHOLD = 1;
|
|
10
|
+
var CLUSTER_COMPATIBILITY_THRESHOLD = 0.7;
|
|
9
11
|
var ALGORITHM_VERSION = "1.2.0";
|
|
10
12
|
|
|
11
13
|
// src/verification/features.ts
|
|
@@ -381,6 +383,204 @@ function checkCoherence(myCard, theirCard, taskValues) {
|
|
|
381
383
|
proposed_resolution: proposedResolution
|
|
382
384
|
};
|
|
383
385
|
}
|
|
386
|
+
function checkFleetCoherence(cards, taskValues) {
|
|
387
|
+
if (cards.length < 2) {
|
|
388
|
+
throw new Error("Fleet coherence requires at least 2 agents");
|
|
389
|
+
}
|
|
390
|
+
const pairwiseMatrix = [];
|
|
391
|
+
for (let i = 0; i < cards.length; i++) {
|
|
392
|
+
for (let j = i + 1; j < cards.length; j++) {
|
|
393
|
+
pairwiseMatrix.push({
|
|
394
|
+
agent_a: cards[i].agentId,
|
|
395
|
+
agent_b: cards[j].agentId,
|
|
396
|
+
result: checkCoherence(cards[i].card, cards[j].card, taskValues)
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
const allScores = pairwiseMatrix.map((p) => p.result.score);
|
|
401
|
+
const fleetScore = allScores.reduce((a, b) => a + b, 0) / allScores.length;
|
|
402
|
+
const minPairScore = Math.min(...allScores);
|
|
403
|
+
const maxPairScore = Math.max(...allScores);
|
|
404
|
+
const agentIds = cards.map((c) => c.agentId);
|
|
405
|
+
const agentScoreMap = /* @__PURE__ */ new Map();
|
|
406
|
+
const agentCompatibleCount = /* @__PURE__ */ new Map();
|
|
407
|
+
const agentConflictCount = /* @__PURE__ */ new Map();
|
|
408
|
+
for (const id of agentIds) {
|
|
409
|
+
agentScoreMap.set(id, []);
|
|
410
|
+
agentCompatibleCount.set(id, 0);
|
|
411
|
+
agentConflictCount.set(id, 0);
|
|
412
|
+
}
|
|
413
|
+
for (const pair of pairwiseMatrix) {
|
|
414
|
+
agentScoreMap.get(pair.agent_a).push(pair.result.score);
|
|
415
|
+
agentScoreMap.get(pair.agent_b).push(pair.result.score);
|
|
416
|
+
if (pair.result.compatible) {
|
|
417
|
+
agentCompatibleCount.set(pair.agent_a, agentCompatibleCount.get(pair.agent_a) + 1);
|
|
418
|
+
agentCompatibleCount.set(pair.agent_b, agentCompatibleCount.get(pair.agent_b) + 1);
|
|
419
|
+
}
|
|
420
|
+
if (pair.result.value_alignment.conflicts.length > 0) {
|
|
421
|
+
agentConflictCount.set(pair.agent_a, agentConflictCount.get(pair.agent_a) + 1);
|
|
422
|
+
agentConflictCount.set(pair.agent_b, agentConflictCount.get(pair.agent_b) + 1);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
const agentMeans = /* @__PURE__ */ new Map();
|
|
426
|
+
for (const id of agentIds) {
|
|
427
|
+
const scores = agentScoreMap.get(id);
|
|
428
|
+
agentMeans.set(id, scores.reduce((a, b) => a + b, 0) / scores.length);
|
|
429
|
+
}
|
|
430
|
+
const meanValues = [...agentMeans.values()];
|
|
431
|
+
const fleetMeanOfMeans = meanValues.reduce((a, b) => a + b, 0) / meanValues.length;
|
|
432
|
+
const variance = meanValues.reduce((sum, v) => sum + (v - fleetMeanOfMeans) ** 2, 0) / meanValues.length;
|
|
433
|
+
const stddev = Math.sqrt(variance);
|
|
434
|
+
const outliers = [];
|
|
435
|
+
if (stddev > 0 && agentIds.length >= 3) {
|
|
436
|
+
for (const id of agentIds) {
|
|
437
|
+
const agentMean = agentMeans.get(id);
|
|
438
|
+
const deviation = (fleetMeanOfMeans - agentMean) / stddev;
|
|
439
|
+
if (deviation >= OUTLIER_STD_DEV_THRESHOLD) {
|
|
440
|
+
const primaryConflicts = /* @__PURE__ */ new Set();
|
|
441
|
+
for (const pair of pairwiseMatrix) {
|
|
442
|
+
if (pair.agent_a === id || pair.agent_b === id) {
|
|
443
|
+
for (const conflict of pair.result.value_alignment.conflicts) {
|
|
444
|
+
if (conflict.initiator_value !== "(conflicts_with)") {
|
|
445
|
+
primaryConflicts.add(conflict.initiator_value);
|
|
446
|
+
}
|
|
447
|
+
if (conflict.responder_value !== "(conflicts_with)") {
|
|
448
|
+
primaryConflicts.add(conflict.responder_value);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
outliers.push({
|
|
454
|
+
agent_id: id,
|
|
455
|
+
agent_mean_score: Math.round(agentMean * 1e4) / 1e4,
|
|
456
|
+
fleet_mean_score: Math.round(fleetMeanOfMeans * 1e4) / 1e4,
|
|
457
|
+
deviation: Math.round(deviation * 1e4) / 1e4,
|
|
458
|
+
primary_conflicts: [...primaryConflicts]
|
|
459
|
+
});
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
464
|
+
for (const id of agentIds) {
|
|
465
|
+
adjacency.set(id, /* @__PURE__ */ new Set());
|
|
466
|
+
}
|
|
467
|
+
for (const pair of pairwiseMatrix) {
|
|
468
|
+
if (pair.result.compatible) {
|
|
469
|
+
adjacency.get(pair.agent_a).add(pair.agent_b);
|
|
470
|
+
adjacency.get(pair.agent_b).add(pair.agent_a);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
const visited = /* @__PURE__ */ new Set();
|
|
474
|
+
const clusters = [];
|
|
475
|
+
let clusterId = 0;
|
|
476
|
+
for (const id of agentIds) {
|
|
477
|
+
if (visited.has(id)) continue;
|
|
478
|
+
const component = [];
|
|
479
|
+
const queue = [id];
|
|
480
|
+
visited.add(id);
|
|
481
|
+
while (queue.length > 0) {
|
|
482
|
+
const current = queue.shift();
|
|
483
|
+
component.push(current);
|
|
484
|
+
for (const neighbor of adjacency.get(current)) {
|
|
485
|
+
if (!visited.has(neighbor)) {
|
|
486
|
+
visited.add(neighbor);
|
|
487
|
+
queue.push(neighbor);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
let internalSum = 0;
|
|
492
|
+
let internalCount = 0;
|
|
493
|
+
for (let i = 0; i < component.length; i++) {
|
|
494
|
+
for (let j = i + 1; j < component.length; j++) {
|
|
495
|
+
const pair = pairwiseMatrix.find(
|
|
496
|
+
(p) => p.agent_a === component[i] && p.agent_b === component[j] || p.agent_a === component[j] && p.agent_b === component[i]
|
|
497
|
+
);
|
|
498
|
+
if (pair) {
|
|
499
|
+
internalSum += pair.result.score;
|
|
500
|
+
internalCount++;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
const internalCoherence = internalCount > 0 ? internalSum / internalCount : 1;
|
|
505
|
+
const clusterCards = component.map((cid) => cards.find((c) => c.agentId === cid));
|
|
506
|
+
const sharedValues = clusterCards.reduce((shared, entry, idx) => {
|
|
507
|
+
const declared = entry.card.values.declared ?? [];
|
|
508
|
+
if (idx === 0) return [...declared];
|
|
509
|
+
return shared.filter((v) => declared.includes(v));
|
|
510
|
+
}, []);
|
|
511
|
+
const allOtherValues = /* @__PURE__ */ new Set();
|
|
512
|
+
for (const entry of cards) {
|
|
513
|
+
if (!component.includes(entry.agentId)) {
|
|
514
|
+
for (const v of entry.card.values.declared ?? []) {
|
|
515
|
+
allOtherValues.add(v);
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
const distinguishingValues = sharedValues.filter((v) => !allOtherValues.has(v));
|
|
520
|
+
clusters.push({
|
|
521
|
+
cluster_id: clusterId++,
|
|
522
|
+
agent_ids: component,
|
|
523
|
+
internal_coherence: Math.round(internalCoherence * 1e4) / 1e4,
|
|
524
|
+
shared_values: sharedValues,
|
|
525
|
+
distinguishing_values: distinguishingValues
|
|
526
|
+
});
|
|
527
|
+
}
|
|
528
|
+
const allValues = /* @__PURE__ */ new Set();
|
|
529
|
+
const agentValueMap = /* @__PURE__ */ new Map();
|
|
530
|
+
const agentConflictMap = /* @__PURE__ */ new Map();
|
|
531
|
+
for (const entry of cards) {
|
|
532
|
+
const declared = new Set(entry.card.values.declared ?? []);
|
|
533
|
+
const conflicts = new Set(entry.card.values.conflicts_with ?? []);
|
|
534
|
+
agentValueMap.set(entry.agentId, declared);
|
|
535
|
+
agentConflictMap.set(entry.agentId, conflicts);
|
|
536
|
+
for (const v of declared) allValues.add(v);
|
|
537
|
+
}
|
|
538
|
+
const divergenceReport = [];
|
|
539
|
+
for (const value of allValues) {
|
|
540
|
+
const declaring = agentIds.filter((id) => agentValueMap.get(id).has(value));
|
|
541
|
+
const missing = agentIds.filter((id) => !agentValueMap.get(id).has(value));
|
|
542
|
+
const conflicting = agentIds.filter((id) => agentConflictMap.get(id).has(value));
|
|
543
|
+
if (missing.length === 0 && conflicting.length === 0) continue;
|
|
544
|
+
const impactOnFleetScore = Math.round(
|
|
545
|
+
(missing.length + conflicting.length) / agentIds.length * 1e4
|
|
546
|
+
) / 1e4;
|
|
547
|
+
divergenceReport.push({
|
|
548
|
+
value,
|
|
549
|
+
agents_declaring: declaring,
|
|
550
|
+
agents_missing: missing,
|
|
551
|
+
agents_conflicting: conflicting,
|
|
552
|
+
impact_on_fleet_score: impactOnFleetScore
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
divergenceReport.sort((a, b) => b.impact_on_fleet_score - a.impact_on_fleet_score);
|
|
556
|
+
const agentClusterMap = /* @__PURE__ */ new Map();
|
|
557
|
+
for (const cluster of clusters) {
|
|
558
|
+
for (const id of cluster.agent_ids) {
|
|
559
|
+
agentClusterMap.set(id, cluster.cluster_id);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
const outlierIds = new Set(outliers.map((o) => o.agent_id));
|
|
563
|
+
const agentSummaries = agentIds.map((id) => ({
|
|
564
|
+
agent_id: id,
|
|
565
|
+
mean_score: Math.round(agentMeans.get(id) * 1e4) / 1e4,
|
|
566
|
+
compatible_count: agentCompatibleCount.get(id),
|
|
567
|
+
conflict_count: agentConflictCount.get(id),
|
|
568
|
+
cluster_id: agentClusterMap.get(id) ?? 0,
|
|
569
|
+
is_outlier: outlierIds.has(id)
|
|
570
|
+
}));
|
|
571
|
+
return {
|
|
572
|
+
fleet_score: Math.round(fleetScore * 1e4) / 1e4,
|
|
573
|
+
min_pair_score: Math.round(minPairScore * 1e4) / 1e4,
|
|
574
|
+
max_pair_score: Math.round(maxPairScore * 1e4) / 1e4,
|
|
575
|
+
agent_count: cards.length,
|
|
576
|
+
pair_count: pairwiseMatrix.length,
|
|
577
|
+
pairwise_matrix: pairwiseMatrix,
|
|
578
|
+
outliers,
|
|
579
|
+
clusters,
|
|
580
|
+
divergence_report: divergenceReport,
|
|
581
|
+
agent_summaries: agentSummaries
|
|
582
|
+
};
|
|
583
|
+
}
|
|
384
584
|
function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD, sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD) {
|
|
385
585
|
const sorted = [...traces].sort(
|
|
386
586
|
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
@@ -587,17 +787,46 @@ function wasEscalated(trace) {
|
|
|
587
787
|
function hadViolations(trace) {
|
|
588
788
|
return trace.action.category === "forbidden";
|
|
589
789
|
}
|
|
790
|
+
|
|
791
|
+
// src/compliance.ts
|
|
792
|
+
var EU_COMPLIANCE_AUDIT_COMMITMENT = {
|
|
793
|
+
retention_days: 90,
|
|
794
|
+
queryable: true,
|
|
795
|
+
query_endpoint: "https://audit.example.com/traces",
|
|
796
|
+
tamper_evidence: "append_only",
|
|
797
|
+
trace_format: "ap-trace-v1"
|
|
798
|
+
};
|
|
799
|
+
var EU_COMPLIANCE_EXTENSIONS = {
|
|
800
|
+
eu_ai_act: {
|
|
801
|
+
article_50_compliant: true,
|
|
802
|
+
ai_system_classification: "general_purpose",
|
|
803
|
+
disclosure_text: "This system is powered by an AI agent. Its decisions are logged and auditable. You may request a human review of any decision.",
|
|
804
|
+
compliance_version: "2026-08"
|
|
805
|
+
}
|
|
806
|
+
};
|
|
807
|
+
var EU_COMPLIANCE_VALUES = [
|
|
808
|
+
"transparency",
|
|
809
|
+
"honesty",
|
|
810
|
+
"user_control",
|
|
811
|
+
"principal_benefit"
|
|
812
|
+
];
|
|
590
813
|
export {
|
|
591
814
|
ALGORITHM_VERSION,
|
|
815
|
+
CLUSTER_COMPATIBILITY_THRESHOLD,
|
|
592
816
|
CONFLICT_PENALTY_MULTIPLIER,
|
|
593
817
|
DEFAULT_SIMILARITY_THRESHOLD,
|
|
594
818
|
DEFAULT_SUSTAINED_TURNS_THRESHOLD,
|
|
819
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
820
|
+
EU_COMPLIANCE_EXTENSIONS,
|
|
821
|
+
EU_COMPLIANCE_VALUES,
|
|
595
822
|
MAX_TFIDF_FEATURES,
|
|
596
823
|
MIN_COHERENCE_FOR_PROCEED,
|
|
597
824
|
MIN_WORD_LENGTH,
|
|
598
825
|
NEAR_BOUNDARY_THRESHOLD,
|
|
826
|
+
OUTLIER_STD_DEV_THRESHOLD,
|
|
599
827
|
VIOLATION_SEVERITY,
|
|
600
828
|
checkCoherence,
|
|
829
|
+
checkFleetCoherence,
|
|
601
830
|
computeCentroid,
|
|
602
831
|
cosineSimilarity,
|
|
603
832
|
createViolation,
|
package/package.json
CHANGED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EU AI Act Article 50 compliance presets for AAP.
|
|
3
|
+
*
|
|
4
|
+
* These presets provide recommended configuration values for deploying
|
|
5
|
+
* AAP-instrumented agents in EU jurisdictions subject to AI Act
|
|
6
|
+
* transparency obligations. Spread them into your AlignmentCard fields.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import {
|
|
11
|
+
* EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
12
|
+
* EU_COMPLIANCE_EXTENSIONS,
|
|
13
|
+
* EU_COMPLIANCE_VALUES,
|
|
14
|
+
* } from "agent-alignment-protocol";
|
|
15
|
+
*
|
|
16
|
+
* const card: AlignmentCard = {
|
|
17
|
+
* ...,
|
|
18
|
+
* audit_commitment: { ...EU_COMPLIANCE_AUDIT_COMMITMENT },
|
|
19
|
+
* values: { declared: EU_COMPLIANCE_VALUES, ... },
|
|
20
|
+
* extensions: { ...EU_COMPLIANCE_EXTENSIONS },
|
|
21
|
+
* };
|
|
22
|
+
* ```
|
|
23
|
+
*
|
|
24
|
+
* DISCLAIMER: These presets reflect a technical mapping of AAP features to
|
|
25
|
+
* Article 50 requirements. They do not constitute legal advice. Consult
|
|
26
|
+
* qualified legal counsel for your specific compliance obligations.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
/** Audit commitment values that satisfy Article 50(4) audit trail requirements. */
|
|
30
|
+
export const EU_COMPLIANCE_AUDIT_COMMITMENT = {
|
|
31
|
+
retention_days: 90,
|
|
32
|
+
queryable: true,
|
|
33
|
+
query_endpoint: "https://audit.example.com/traces",
|
|
34
|
+
tamper_evidence: "append_only" as const,
|
|
35
|
+
trace_format: "ap-trace-v1",
|
|
36
|
+
} as const;
|
|
37
|
+
|
|
38
|
+
/** Extension block for EU AI Act metadata on the Alignment Card. */
|
|
39
|
+
export const EU_COMPLIANCE_EXTENSIONS = {
|
|
40
|
+
eu_ai_act: {
|
|
41
|
+
article_50_compliant: true,
|
|
42
|
+
ai_system_classification: "general_purpose",
|
|
43
|
+
disclosure_text:
|
|
44
|
+
"This system is powered by an AI agent. Its decisions are logged " +
|
|
45
|
+
"and auditable. You may request a human review of any decision.",
|
|
46
|
+
compliance_version: "2026-08",
|
|
47
|
+
},
|
|
48
|
+
} as const;
|
|
49
|
+
|
|
50
|
+
/** Recommended declared values for Article 50 transparency obligations. */
|
|
51
|
+
export const EU_COMPLIANCE_VALUES = [
|
|
52
|
+
"transparency",
|
|
53
|
+
"honesty",
|
|
54
|
+
"user_control",
|
|
55
|
+
"principal_benefit",
|
|
56
|
+
] as const;
|
package/src/constants.ts
CHANGED
|
@@ -39,6 +39,14 @@ export const MIN_WORD_LENGTH = 3;
|
|
|
39
39
|
/** Maximum features to extract from TF-IDF vectorization */
|
|
40
40
|
export const MAX_TFIDF_FEATURES = 500;
|
|
41
41
|
|
|
42
|
+
// Fleet Coherence
|
|
43
|
+
// ----------------
|
|
44
|
+
/** Standard deviations below fleet mean to flag an agent as outlier */
|
|
45
|
+
export const OUTLIER_STD_DEV_THRESHOLD = 1.0;
|
|
46
|
+
|
|
47
|
+
/** Minimum pairwise score to consider agents compatible for cluster analysis */
|
|
48
|
+
export const CLUSTER_COMPATIBILITY_THRESHOLD = 0.7;
|
|
49
|
+
|
|
42
50
|
// Version
|
|
43
51
|
// -------
|
|
44
52
|
export const ALGORITHM_VERSION = "1.2.0";
|
package/src/index.ts
CHANGED
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
*/
|
|
36
36
|
|
|
37
37
|
// Main API exports
|
|
38
|
-
export { verifyTrace, checkCoherence, detectDrift } from "./verification/api";
|
|
38
|
+
export { verifyTrace, checkCoherence, checkFleetCoherence, detectDrift } from "./verification/api";
|
|
39
39
|
|
|
40
40
|
// Schema types
|
|
41
41
|
export type {
|
|
@@ -107,6 +107,13 @@ export type {
|
|
|
107
107
|
DriftAnalysis,
|
|
108
108
|
DriftDirection,
|
|
109
109
|
DriftIndicator,
|
|
110
|
+
// Fleet Coherence (E-05)
|
|
111
|
+
FleetCoherenceResult,
|
|
112
|
+
PairwiseEntry,
|
|
113
|
+
FleetOutlier,
|
|
114
|
+
FleetCluster,
|
|
115
|
+
ValueDivergence,
|
|
116
|
+
AgentCoherenceSummary,
|
|
110
117
|
} from "./verification/models";
|
|
111
118
|
|
|
112
119
|
// Utility exports
|
|
@@ -134,3 +141,10 @@ export { createViolation, VIOLATION_SEVERITY } from "./verification/models";
|
|
|
134
141
|
|
|
135
142
|
// Constants
|
|
136
143
|
export * from "./constants";
|
|
144
|
+
|
|
145
|
+
// EU AI Act compliance presets
|
|
146
|
+
export {
|
|
147
|
+
EU_COMPLIANCE_AUDIT_COMMITMENT,
|
|
148
|
+
EU_COMPLIANCE_EXTENSIONS,
|
|
149
|
+
EU_COMPLIANCE_VALUES,
|
|
150
|
+
} from "./compliance";
|
package/src/verification/api.ts
CHANGED
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
DEFAULT_SUSTAINED_TURNS_THRESHOLD,
|
|
17
17
|
MIN_COHERENCE_FOR_PROCEED,
|
|
18
18
|
NEAR_BOUNDARY_THRESHOLD,
|
|
19
|
+
OUTLIER_STD_DEV_THRESHOLD,
|
|
19
20
|
} from "../constants";
|
|
20
21
|
import type { AlignmentCard } from "../schemas/alignment-card";
|
|
21
22
|
import type { APTrace } from "../schemas/ap-trace";
|
|
@@ -26,11 +27,17 @@ import {
|
|
|
26
27
|
} from "./features";
|
|
27
28
|
import {
|
|
28
29
|
createViolation,
|
|
30
|
+
type AgentCoherenceSummary,
|
|
29
31
|
type CoherenceResult,
|
|
30
32
|
type DriftAlert,
|
|
31
33
|
type DriftDirection,
|
|
32
34
|
type DriftIndicator,
|
|
35
|
+
type FleetCluster,
|
|
36
|
+
type FleetCoherenceResult,
|
|
37
|
+
type FleetOutlier,
|
|
38
|
+
type PairwiseEntry,
|
|
33
39
|
type ValueConflictResult,
|
|
40
|
+
type ValueDivergence,
|
|
34
41
|
type VerificationResult,
|
|
35
42
|
type Violation,
|
|
36
43
|
type Warning,
|
|
@@ -340,6 +347,265 @@ export function checkCoherence(
|
|
|
340
347
|
};
|
|
341
348
|
}
|
|
342
349
|
|
|
350
|
+
/**
|
|
351
|
+
* Check fleet-level value coherence across N agents.
|
|
352
|
+
*
|
|
353
|
+
* Computes all C(n,2) pairwise coherence scores, then derives:
|
|
354
|
+
* - Fleet score: mean of all pairwise scores
|
|
355
|
+
* - Outlier detection: agents >1 std dev below fleet mean
|
|
356
|
+
* - Cluster analysis: connected components at compatibility threshold
|
|
357
|
+
* - Divergence report: values where agents disagree
|
|
358
|
+
*
|
|
359
|
+
* @param cards - Array of agent cards with their IDs
|
|
360
|
+
* @param taskValues - Optional list of values required for the task
|
|
361
|
+
* @returns FleetCoherenceResult with full analysis
|
|
362
|
+
* @throws Error if fewer than 2 agents provided
|
|
363
|
+
*/
|
|
364
|
+
export function checkFleetCoherence(
|
|
365
|
+
cards: Array<{ agentId: string; card: AlignmentCard }>,
|
|
366
|
+
taskValues?: string[]
|
|
367
|
+
): FleetCoherenceResult {
|
|
368
|
+
if (cards.length < 2) {
|
|
369
|
+
throw new Error("Fleet coherence requires at least 2 agents");
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Step 1: Compute all pairwise coherence scores
|
|
373
|
+
const pairwiseMatrix: PairwiseEntry[] = [];
|
|
374
|
+
for (let i = 0; i < cards.length; i++) {
|
|
375
|
+
for (let j = i + 1; j < cards.length; j++) {
|
|
376
|
+
pairwiseMatrix.push({
|
|
377
|
+
agent_a: cards[i].agentId,
|
|
378
|
+
agent_b: cards[j].agentId,
|
|
379
|
+
result: checkCoherence(cards[i].card, cards[j].card, taskValues),
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Step 2: Fleet score (mean of all pairwise scores) + min/max
|
|
385
|
+
const allScores = pairwiseMatrix.map(p => p.result.score);
|
|
386
|
+
const fleetScore = allScores.reduce((a, b) => a + b, 0) / allScores.length;
|
|
387
|
+
const minPairScore = Math.min(...allScores);
|
|
388
|
+
const maxPairScore = Math.max(...allScores);
|
|
389
|
+
|
|
390
|
+
// Step 3: Per-agent summaries
|
|
391
|
+
const agentIds = cards.map(c => c.agentId);
|
|
392
|
+
const agentScoreMap = new Map<string, number[]>();
|
|
393
|
+
const agentCompatibleCount = new Map<string, number>();
|
|
394
|
+
const agentConflictCount = new Map<string, number>();
|
|
395
|
+
|
|
396
|
+
for (const id of agentIds) {
|
|
397
|
+
agentScoreMap.set(id, []);
|
|
398
|
+
agentCompatibleCount.set(id, 0);
|
|
399
|
+
agentConflictCount.set(id, 0);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
for (const pair of pairwiseMatrix) {
|
|
403
|
+
agentScoreMap.get(pair.agent_a)!.push(pair.result.score);
|
|
404
|
+
agentScoreMap.get(pair.agent_b)!.push(pair.result.score);
|
|
405
|
+
if (pair.result.compatible) {
|
|
406
|
+
agentCompatibleCount.set(pair.agent_a, agentCompatibleCount.get(pair.agent_a)! + 1);
|
|
407
|
+
agentCompatibleCount.set(pair.agent_b, agentCompatibleCount.get(pair.agent_b)! + 1);
|
|
408
|
+
}
|
|
409
|
+
if (pair.result.value_alignment.conflicts.length > 0) {
|
|
410
|
+
agentConflictCount.set(pair.agent_a, agentConflictCount.get(pair.agent_a)! + 1);
|
|
411
|
+
agentConflictCount.set(pair.agent_b, agentConflictCount.get(pair.agent_b)! + 1);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const agentMeans = new Map<string, number>();
|
|
416
|
+
for (const id of agentIds) {
|
|
417
|
+
const scores = agentScoreMap.get(id)!;
|
|
418
|
+
agentMeans.set(id, scores.reduce((a, b) => a + b, 0) / scores.length);
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Step 4: Outlier detection
|
|
422
|
+
const meanValues = [...agentMeans.values()];
|
|
423
|
+
const fleetMeanOfMeans = meanValues.reduce((a, b) => a + b, 0) / meanValues.length;
|
|
424
|
+
const variance = meanValues.reduce((sum, v) => sum + (v - fleetMeanOfMeans) ** 2, 0) / meanValues.length;
|
|
425
|
+
const stddev = Math.sqrt(variance);
|
|
426
|
+
|
|
427
|
+
const outliers: FleetOutlier[] = [];
|
|
428
|
+
// Only detect outliers when there's meaningful variance (3+ agents)
|
|
429
|
+
if (stddev > 0 && agentIds.length >= 3) {
|
|
430
|
+
for (const id of agentIds) {
|
|
431
|
+
const agentMean = agentMeans.get(id)!;
|
|
432
|
+
const deviation = (fleetMeanOfMeans - agentMean) / stddev;
|
|
433
|
+
if (deviation >= OUTLIER_STD_DEV_THRESHOLD) {
|
|
434
|
+
// Identify primary conflict values
|
|
435
|
+
const primaryConflicts = new Set<string>();
|
|
436
|
+
for (const pair of pairwiseMatrix) {
|
|
437
|
+
if (pair.agent_a === id || pair.agent_b === id) {
|
|
438
|
+
for (const conflict of pair.result.value_alignment.conflicts) {
|
|
439
|
+
if (conflict.initiator_value !== "(conflicts_with)") {
|
|
440
|
+
primaryConflicts.add(conflict.initiator_value);
|
|
441
|
+
}
|
|
442
|
+
if (conflict.responder_value !== "(conflicts_with)") {
|
|
443
|
+
primaryConflicts.add(conflict.responder_value);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
outliers.push({
|
|
449
|
+
agent_id: id,
|
|
450
|
+
agent_mean_score: Math.round(agentMean * 10000) / 10000,
|
|
451
|
+
fleet_mean_score: Math.round(fleetMeanOfMeans * 10000) / 10000,
|
|
452
|
+
deviation: Math.round(deviation * 10000) / 10000,
|
|
453
|
+
primary_conflicts: [...primaryConflicts],
|
|
454
|
+
});
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// Step 5: Cluster analysis (connected components at compatibility threshold)
|
|
460
|
+
const adjacency = new Map<string, Set<string>>();
|
|
461
|
+
for (const id of agentIds) {
|
|
462
|
+
adjacency.set(id, new Set());
|
|
463
|
+
}
|
|
464
|
+
for (const pair of pairwiseMatrix) {
|
|
465
|
+
if (pair.result.compatible) {
|
|
466
|
+
adjacency.get(pair.agent_a)!.add(pair.agent_b);
|
|
467
|
+
adjacency.get(pair.agent_b)!.add(pair.agent_a);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
const visited = new Set<string>();
|
|
472
|
+
const clusters: FleetCluster[] = [];
|
|
473
|
+
let clusterId = 0;
|
|
474
|
+
|
|
475
|
+
for (const id of agentIds) {
|
|
476
|
+
if (visited.has(id)) continue;
|
|
477
|
+
// BFS to find connected component
|
|
478
|
+
const component: string[] = [];
|
|
479
|
+
const queue = [id];
|
|
480
|
+
visited.add(id);
|
|
481
|
+
while (queue.length > 0) {
|
|
482
|
+
const current = queue.shift()!;
|
|
483
|
+
component.push(current);
|
|
484
|
+
for (const neighbor of adjacency.get(current)!) {
|
|
485
|
+
if (!visited.has(neighbor)) {
|
|
486
|
+
visited.add(neighbor);
|
|
487
|
+
queue.push(neighbor);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Compute internal coherence for this cluster
|
|
493
|
+
let internalSum = 0;
|
|
494
|
+
let internalCount = 0;
|
|
495
|
+
for (let i = 0; i < component.length; i++) {
|
|
496
|
+
for (let j = i + 1; j < component.length; j++) {
|
|
497
|
+
const pair = pairwiseMatrix.find(
|
|
498
|
+
p => (p.agent_a === component[i] && p.agent_b === component[j]) ||
|
|
499
|
+
(p.agent_a === component[j] && p.agent_b === component[i])
|
|
500
|
+
);
|
|
501
|
+
if (pair) {
|
|
502
|
+
internalSum += pair.result.score;
|
|
503
|
+
internalCount++;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
const internalCoherence = internalCount > 0 ? internalSum / internalCount : 1;
|
|
508
|
+
|
|
509
|
+
// Find shared values (intersection of all agents in cluster)
|
|
510
|
+
const clusterCards = component.map(cid => cards.find(c => c.agentId === cid)!);
|
|
511
|
+
const sharedValues = clusterCards.reduce<string[]>((shared, entry, idx) => {
|
|
512
|
+
const declared = entry.card.values.declared ?? [];
|
|
513
|
+
if (idx === 0) return [...declared];
|
|
514
|
+
return shared.filter(v => declared.includes(v));
|
|
515
|
+
}, []);
|
|
516
|
+
|
|
517
|
+
// Find distinguishing values (values in this cluster but not in other clusters' shared values)
|
|
518
|
+
const allOtherValues = new Set<string>();
|
|
519
|
+
for (const entry of cards) {
|
|
520
|
+
if (!component.includes(entry.agentId)) {
|
|
521
|
+
for (const v of entry.card.values.declared ?? []) {
|
|
522
|
+
allOtherValues.add(v);
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
const distinguishingValues = sharedValues.filter(v => !allOtherValues.has(v));
|
|
527
|
+
|
|
528
|
+
clusters.push({
|
|
529
|
+
cluster_id: clusterId++,
|
|
530
|
+
agent_ids: component,
|
|
531
|
+
internal_coherence: Math.round(internalCoherence * 10000) / 10000,
|
|
532
|
+
shared_values: sharedValues,
|
|
533
|
+
distinguishing_values: distinguishingValues,
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Step 6: Divergence report
|
|
538
|
+
const allValues = new Set<string>();
|
|
539
|
+
const agentValueMap = new Map<string, Set<string>>();
|
|
540
|
+
const agentConflictMap = new Map<string, Set<string>>();
|
|
541
|
+
|
|
542
|
+
for (const entry of cards) {
|
|
543
|
+
const declared = new Set(entry.card.values.declared ?? []);
|
|
544
|
+
const conflicts = new Set(entry.card.values.conflicts_with ?? []);
|
|
545
|
+
agentValueMap.set(entry.agentId, declared);
|
|
546
|
+
agentConflictMap.set(entry.agentId, conflicts);
|
|
547
|
+
for (const v of declared) allValues.add(v);
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
const divergenceReport: ValueDivergence[] = [];
|
|
551
|
+
for (const value of allValues) {
|
|
552
|
+
const declaring = agentIds.filter(id => agentValueMap.get(id)!.has(value));
|
|
553
|
+
const missing = agentIds.filter(id => !agentValueMap.get(id)!.has(value));
|
|
554
|
+
const conflicting = agentIds.filter(id => agentConflictMap.get(id)!.has(value));
|
|
555
|
+
|
|
556
|
+
// Skip values with no divergence (everyone declares, no one conflicts)
|
|
557
|
+
if (missing.length === 0 && conflicting.length === 0) continue;
|
|
558
|
+
|
|
559
|
+
// Estimate impact: fraction of agents not aligned on this value
|
|
560
|
+
const impactOnFleetScore = Math.round(
|
|
561
|
+
((missing.length + conflicting.length) / agentIds.length) * 10000
|
|
562
|
+
) / 10000;
|
|
563
|
+
|
|
564
|
+
divergenceReport.push({
|
|
565
|
+
value,
|
|
566
|
+
agents_declaring: declaring,
|
|
567
|
+
agents_missing: missing,
|
|
568
|
+
agents_conflicting: conflicting,
|
|
569
|
+
impact_on_fleet_score: impactOnFleetScore,
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
// Sort divergence report by impact (highest first)
|
|
574
|
+
divergenceReport.sort((a, b) => b.impact_on_fleet_score - a.impact_on_fleet_score);
|
|
575
|
+
|
|
576
|
+
// Build agent cluster map for summaries
|
|
577
|
+
const agentClusterMap = new Map<string, number>();
|
|
578
|
+
for (const cluster of clusters) {
|
|
579
|
+
for (const id of cluster.agent_ids) {
|
|
580
|
+
agentClusterMap.set(id, cluster.cluster_id);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
const outlierIds = new Set(outliers.map(o => o.agent_id));
|
|
585
|
+
|
|
586
|
+
const agentSummaries: AgentCoherenceSummary[] = agentIds.map(id => ({
|
|
587
|
+
agent_id: id,
|
|
588
|
+
mean_score: Math.round(agentMeans.get(id)! * 10000) / 10000,
|
|
589
|
+
compatible_count: agentCompatibleCount.get(id)!,
|
|
590
|
+
conflict_count: agentConflictCount.get(id)!,
|
|
591
|
+
cluster_id: agentClusterMap.get(id) ?? 0,
|
|
592
|
+
is_outlier: outlierIds.has(id),
|
|
593
|
+
}));
|
|
594
|
+
|
|
595
|
+
return {
|
|
596
|
+
fleet_score: Math.round(fleetScore * 10000) / 10000,
|
|
597
|
+
min_pair_score: Math.round(minPairScore * 10000) / 10000,
|
|
598
|
+
max_pair_score: Math.round(maxPairScore * 10000) / 10000,
|
|
599
|
+
agent_count: cards.length,
|
|
600
|
+
pair_count: pairwiseMatrix.length,
|
|
601
|
+
pairwise_matrix: pairwiseMatrix,
|
|
602
|
+
outliers,
|
|
603
|
+
clusters,
|
|
604
|
+
divergence_report: divergenceReport,
|
|
605
|
+
agent_summaries: agentSummaries,
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
|
|
343
609
|
/**
|
|
344
610
|
* Detect behavioral drift from declared alignment.
|
|
345
611
|
*
|