@mnemom/agent-alignment-protocol 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,10 +32,15 @@ import {
32
32
  type DriftAlert,
33
33
  type DriftDirection,
34
34
  type DriftIndicator,
35
+ type FaultLine,
36
+ type FaultLineAlignment,
37
+ type FaultLineAnalysis,
38
+ type FaultLineSummary,
35
39
  type FleetCluster,
36
40
  type FleetCoherenceResult,
37
41
  type FleetOutlier,
38
42
  type PairwiseEntry,
43
+ type Severity,
39
44
  type ValueConflictResult,
40
45
  type ValueDivergence,
41
46
  type VerificationResult,
@@ -745,6 +750,372 @@ export function detectDrift(
745
750
  return alerts;
746
751
  }
747
752
 
753
+ // ============================================================================
754
+ // Fault Line Analysis (E-06)
755
+ // ============================================================================
756
+
757
+ /**
758
+ * Produce a deterministic hex string from an arbitrary input.
759
+ * Used for stable, reproducible IDs.
760
+ */
761
+ function deterministicHex(input: string, length: number): string {
762
+ let hash = 0;
763
+ for (let i = 0; i < input.length; i++) {
764
+ const char = input.charCodeAt(i);
765
+ hash = ((hash << 5) - hash) + char;
766
+ hash = hash & hash;
767
+ }
768
+ return Math.abs(hash).toString(16).padStart(length, '0').slice(0, length);
769
+ }
770
+
771
+ /** Compute Jaccard similarity of two string sets: |A ∩ B| / |A ∪ B|. */
772
+ function jaccardSimilarity(a: string[], b: string[]): number {
773
+ const setA = new Set(a);
774
+ const setB = new Set(b);
775
+ const intersection = [...setA].filter(x => setB.has(x)).length;
776
+ const union = new Set([...setA, ...setB]).size;
777
+ return union === 0 ? 0 : intersection / union;
778
+ }
779
+
780
+ /** Role keywords that indicate agent specialization for complementary detection. */
781
+ const ROLE_KEYWORDS = [
782
+ 'safety', 'executive', 'cfo', 'analyst', 'compliance', 'legal',
783
+ 'risk', 'finance', 'security', 'ethics', 'audit', 'ops', 'operations',
784
+ ];
785
+
786
+ function hasRoleKeyword(agentId: string): boolean {
787
+ const lower = agentId.toLowerCase();
788
+ return ROLE_KEYWORDS.some(kw => lower.includes(kw));
789
+ }
790
+
791
+ /**
792
+ * Analyze fault lines in a fleet based on a FleetCoherenceResult.
793
+ *
794
+ * @param coherenceResult - Result from checkFleetCoherence
795
+ * @param cards - Same agent cards passed to checkFleetCoherence
796
+ * @param options - Optional reputationScores and taskContext
797
+ * @returns FaultLineAnalysis with fault lines and alignment patterns
798
+ */
799
+ export function analyzeFaultLines(
800
+ coherenceResult: FleetCoherenceResult,
801
+ cards: Array<{ agentId: string; card: AlignmentCard }>,
802
+ options?: { reputationScores?: Record<string, number>; taskContext?: string }
803
+ ): FaultLineAnalysis {
804
+ const reputationScores = options?.reputationScores;
805
+
806
+ // Build a lookup: agentId → bounded_actions
807
+ const agentBoundedActions = new Map<string, string[]>();
808
+ for (const { agentId, card } of cards) {
809
+ agentBoundedActions.set(agentId, card.autonomy_envelope?.bounded_actions ?? []);
810
+ }
811
+
812
+ // Build lookup for conflicts_with per agent (already in divergence report, but keep for classification)
813
+ const agentConflictMap = new Map<string, Set<string>>();
814
+ for (const { agentId, card } of cards) {
815
+ agentConflictMap.set(agentId, new Set(card.values.conflicts_with ?? []));
816
+ }
817
+
818
+ const faultLines: FaultLine[] = [];
819
+
820
+ for (const divergence of coherenceResult.divergence_report) {
821
+ const {
822
+ value,
823
+ agents_declaring,
824
+ agents_missing,
825
+ agents_conflicting,
826
+ impact_on_fleet_score,
827
+ } = divergence;
828
+
829
+ // All agents involved in this fault line
830
+ const involvedAgents = [
831
+ ...new Set([...agents_declaring, ...agents_missing, ...agents_conflicting]),
832
+ ];
833
+
834
+ // --- Classification ---
835
+ let classification: FaultLine['classification'];
836
+
837
+ // incompatible: any agent has an explicit conflicts_with for this value
838
+ if (agents_conflicting.length > 0) {
839
+ classification = 'incompatible';
840
+ } else if (
841
+ agents_declaring.length >= 2 &&
842
+ (() => {
843
+ // priority_mismatch: two declaring agents have a pairwise score < 0.5
844
+ for (let i = 0; i < agents_declaring.length; i++) {
845
+ for (let j = i + 1; j < agents_declaring.length; j++) {
846
+ const idA = agents_declaring[i];
847
+ const idB = agents_declaring[j];
848
+ const entry = coherenceResult.pairwise_matrix.find(
849
+ p =>
850
+ (p.agent_a === idA && p.agent_b === idB) ||
851
+ (p.agent_a === idB && p.agent_b === idA)
852
+ );
853
+ if (entry && entry.result.score < 0.5) {
854
+ return true;
855
+ }
856
+ }
857
+ }
858
+ return false;
859
+ })()
860
+ ) {
861
+ classification = 'priority_mismatch';
862
+ } else if (
863
+ agents_declaring.length >= 1 &&
864
+ agents_missing.length >= 1 &&
865
+ (() => {
866
+ // complementary: heuristic — some agent (declaring or missing) has a role keyword
867
+ const allInvolved = [...agents_declaring, ...agents_missing];
868
+ return allInvolved.some(id => hasRoleKeyword(id));
869
+ })()
870
+ ) {
871
+ classification = 'complementary';
872
+ } else {
873
+ classification = 'resolvable';
874
+ }
875
+
876
+ // --- Coordination overlap (Jaccard of bounded_actions across all involved agents) ---
877
+ let coordinationOverlap: number;
878
+ if (involvedAgents.length < 2) {
879
+ coordinationOverlap = 0.5;
880
+ } else {
881
+ const actionSets = involvedAgents.map(id => agentBoundedActions.get(id) ?? []);
882
+ const nonEmpty = actionSets.filter(s => s.length > 0);
883
+ if (nonEmpty.length < 2) {
884
+ coordinationOverlap = 0.5;
885
+ } else {
886
+ // Pairwise mean Jaccard
887
+ let total = 0;
888
+ let count = 0;
889
+ for (let i = 0; i < nonEmpty.length; i++) {
890
+ for (let j = i + 1; j < nonEmpty.length; j++) {
891
+ total += jaccardSimilarity(nonEmpty[i], nonEmpty[j]);
892
+ count++;
893
+ }
894
+ }
895
+ coordinationOverlap = count > 0 ? total / count : 0.5;
896
+ }
897
+ }
898
+
899
+ // --- impact_score ---
900
+ let impactScore = impact_on_fleet_score * coordinationOverlap;
901
+
902
+ // Reputation weighting: multiply by geometric mean of reputation/1000
903
+ if (reputationScores && involvedAgents.length > 0) {
904
+ const repValues = involvedAgents
905
+ .map(id => (reputationScores[id] ?? 500) / 1000)
906
+ .map(r => Math.max(0.001, r)); // avoid log(0)
907
+ const logSum = repValues.reduce((sum, r) => sum + Math.log(r), 0);
908
+ const geoMean = Math.exp(logSum / repValues.length);
909
+ impactScore *= geoMean;
910
+ }
911
+
912
+ impactScore = Math.min(1, Math.max(0, impactScore));
913
+
914
+ // --- Severity ---
915
+ let severity: Severity;
916
+ if (impactScore >= 0.7) {
917
+ severity = 'critical';
918
+ } else if (impactScore >= 0.4) {
919
+ severity = 'high';
920
+ } else if (impactScore >= 0.2) {
921
+ severity = 'medium';
922
+ } else {
923
+ severity = 'low';
924
+ }
925
+
926
+ // --- Resolution hint ---
927
+ let resolutionHint: string;
928
+ switch (classification) {
929
+ case 'resolvable':
930
+ resolutionHint = `Add value '${value}' to ${agents_missing.join(', ')} alignment card(s).`;
931
+ break;
932
+ case 'priority_mismatch':
933
+ resolutionHint = `Align priority/definition of '${value}' across all declaring agents.`;
934
+ break;
935
+ case 'incompatible':
936
+ resolutionHint = `Value '${value}' conflicts with ${agents_conflicting.join(', ')}. Requires human review.`;
937
+ break;
938
+ case 'complementary':
939
+ resolutionHint = `Value '${value}' divergence appears intentional given agent specializations.`;
940
+ break;
941
+ }
942
+
943
+ // --- affects_capabilities: intersection of bounded_actions across all involved agents ---
944
+ let affectsCapabilities: string[] = [];
945
+ if (involvedAgents.length > 0) {
946
+ const firstActions = agentBoundedActions.get(involvedAgents[0]) ?? [];
947
+ affectsCapabilities = firstActions.filter(action =>
948
+ involvedAgents.every(id => (agentBoundedActions.get(id) ?? []).includes(action))
949
+ );
950
+ }
951
+
952
+ // --- deterministic id ---
953
+ const idInput = [value, ...involvedAgents.sort()].join('|');
954
+ const id = deterministicHex(idInput, 12);
955
+
956
+ faultLines.push({
957
+ id,
958
+ value,
959
+ classification,
960
+ severity,
961
+ agents_declaring,
962
+ agents_missing,
963
+ agents_conflicting,
964
+ impact_score: Math.round(impactScore * 10000) / 10000,
965
+ resolution_hint: resolutionHint,
966
+ affects_capabilities: affectsCapabilities,
967
+ });
968
+ }
969
+
970
+ // Sort: critical first, then by impact_score descending
971
+ const severityOrder: Record<Severity, number> = {
972
+ critical: 0,
973
+ high: 1,
974
+ medium: 2,
975
+ low: 3,
976
+ };
977
+ faultLines.sort((a, b) => {
978
+ const sev = severityOrder[a.severity] - severityOrder[b.severity];
979
+ if (sev !== 0) return sev;
980
+ return b.impact_score - a.impact_score;
981
+ });
982
+
983
+ // --- Fault line alignment detection ---
984
+ const alignments: FaultLineAlignment[] = [];
985
+
986
+ // For each pair of fault lines, compute Jaccard of agents_missing sets
987
+ // Group fault lines with similarity > 0.6
988
+ const grouped = new Map<number, number[]>(); // groupId → faultLine indices
989
+ const groupAssignment = new Map<number, number>(); // faultLine index → groupId
990
+ let nextGroupId = 0;
991
+
992
+ for (let i = 0; i < faultLines.length; i++) {
993
+ for (let j = i + 1; j < faultLines.length; j++) {
994
+ const sim = jaccardSimilarity(faultLines[i].agents_missing, faultLines[j].agents_missing);
995
+ if (sim > 0.6) {
996
+ // Find or create groups for i and j
997
+ const gi = groupAssignment.get(i);
998
+ const gj = groupAssignment.get(j);
999
+
1000
+ if (gi === undefined && gj === undefined) {
1001
+ const gid = nextGroupId++;
1002
+ grouped.set(gid, [i, j]);
1003
+ groupAssignment.set(i, gid);
1004
+ groupAssignment.set(j, gid);
1005
+ } else if (gi !== undefined && gj === undefined) {
1006
+ grouped.get(gi)!.push(j);
1007
+ groupAssignment.set(j, gi);
1008
+ } else if (gi === undefined && gj !== undefined) {
1009
+ grouped.get(gj)!.push(i);
1010
+ groupAssignment.set(i, gj);
1011
+ } else if (gi !== gj) {
1012
+ // Merge two groups
1013
+ const smaller = gi! < gj! ? gj! : gi!;
1014
+ const larger = gi! < gj! ? gi! : gj!;
1015
+ const smallerMembers = grouped.get(smaller) ?? [];
1016
+ const largerMembers = grouped.get(larger) ?? [];
1017
+ const merged = [...new Set([...largerMembers, ...smallerMembers])];
1018
+ grouped.set(larger, merged);
1019
+ grouped.delete(smaller);
1020
+ for (const idx of smallerMembers) {
1021
+ groupAssignment.set(idx, larger);
1022
+ }
1023
+ }
1024
+ }
1025
+ }
1026
+ }
1027
+
1028
+ for (const [, members] of grouped) {
1029
+ if (members.length < 2) continue;
1030
+ const unique = [...new Set(members)];
1031
+ const groupFaultLines = unique.map(i => faultLines[i]);
1032
+
1033
+ const minorityAgents = [
1034
+ ...new Set(groupFaultLines.flatMap(fl => fl.agents_missing)),
1035
+ ];
1036
+ const majorityAgents = [
1037
+ ...new Set(groupFaultLines.flatMap(fl => fl.agents_declaring)),
1038
+ ];
1039
+
1040
+ // Alignment score: mean pairwise Jaccard of agents_missing within the group
1041
+ let jaccardSum = 0;
1042
+ let jaccardCount = 0;
1043
+ for (let i = 0; i < unique.length; i++) {
1044
+ for (let j = i + 1; j < unique.length; j++) {
1045
+ jaccardSum += jaccardSimilarity(
1046
+ groupFaultLines[i].agents_missing,
1047
+ groupFaultLines[j].agents_missing
1048
+ );
1049
+ jaccardCount++;
1050
+ }
1051
+ }
1052
+ const alignmentScore = jaccardCount > 0 ? jaccardSum / jaccardCount : 0;
1053
+
1054
+ // Base severity on group size, raised if any fault line is critical/high
1055
+ const hasHigherSeverity = groupFaultLines.some(
1056
+ fl => fl.severity === 'critical' || fl.severity === 'high'
1057
+ );
1058
+ let severity: Severity = unique.length >= 3 ? 'high' : 'medium';
1059
+ if (hasHigherSeverity && severity === 'medium') {
1060
+ severity = 'high';
1061
+ }
1062
+
1063
+ const sortedFaultLineIds = groupFaultLines.map(fl => fl.id).sort();
1064
+ const alignmentId = deterministicHex(sortedFaultLineIds.join('|'), 12);
1065
+
1066
+ alignments.push({
1067
+ id: alignmentId,
1068
+ fault_line_ids: sortedFaultLineIds,
1069
+ minority_agents: minorityAgents,
1070
+ majority_agents: majorityAgents,
1071
+ alignment_score: Math.round(alignmentScore * 10000) / 10000,
1072
+ severity,
1073
+ description: `${groupFaultLines.length} fault lines consistently isolate ${minorityAgents.join(', ')} from the team`,
1074
+ });
1075
+ }
1076
+
1077
+ // --- Summary ---
1078
+ const summary: FaultLineSummary = {
1079
+ total: faultLines.length,
1080
+ resolvable: faultLines.filter(fl => fl.classification === 'resolvable').length,
1081
+ priority_mismatch: faultLines.filter(fl => fl.classification === 'priority_mismatch').length,
1082
+ incompatible: faultLines.filter(fl => fl.classification === 'incompatible').length,
1083
+ complementary: faultLines.filter(fl => fl.classification === 'complementary').length,
1084
+ critical_count: faultLines.filter(fl => fl.severity === 'critical').length,
1085
+ };
1086
+
1087
+ // --- analysis_id: deterministic from fleet_score + sorted fault_line ids ---
1088
+ const analysisIdInput = [
1089
+ String(coherenceResult.fleet_score),
1090
+ ...faultLines.map(fl => fl.id).sort(),
1091
+ ].join('|');
1092
+ const analysisId = deterministicHex(analysisIdInput, 16);
1093
+
1094
+ return {
1095
+ analysis_id: analysisId,
1096
+ fleet_score: coherenceResult.fleet_score,
1097
+ fault_lines: faultLines,
1098
+ alignments,
1099
+ summary,
1100
+ };
1101
+ }
1102
+
1103
+ /**
1104
+ * Convenience wrapper: run fleet coherence then fault line analysis in one call.
1105
+ *
1106
+ * @param cards - Array of agent cards
1107
+ * @param options - Optional reputationScores and taskContext
1108
+ * @returns Both FleetCoherenceResult and FaultLineAnalysis
1109
+ */
1110
+ export function checkFleetFaultLines(
1111
+ cards: Array<{ agentId: string; card: AlignmentCard }>,
1112
+ options?: { reputationScores?: Record<string, number>; taskContext?: string }
1113
+ ): { coherence: FleetCoherenceResult; analysis: FaultLineAnalysis } {
1114
+ const coherence = checkFleetCoherence(cards);
1115
+ const analysis = analyzeFaultLines(coherence, cards, options);
1116
+ return { coherence, analysis };
1117
+ }
1118
+
748
1119
  /**
749
1120
  * Evaluate a condition expression against trace context.
750
1121
  *
@@ -765,7 +1136,8 @@ function evaluateCondition(condition: string, trace: APTrace): boolean {
765
1136
  }
766
1137
 
767
1138
  // Handle field > value (numeric comparison)
768
- const numericMatch = condition.match(/(\w+)\s*([><=!]+)\s*(\d+(?:\.\d+)?)/);
1139
+ // Anchored regex to prevent polynomial backtracking (ReDoS)
1140
+ const numericMatch = condition.match(/^\s*(\w+)\s*([><=!]+)\s*(\d+(?:\.\d+)?)\s*$/);
769
1141
  if (numericMatch) {
770
1142
  const [, field, op, valueStr] = numericMatch;
771
1143
  const value = parseFloat(valueStr);
@@ -274,3 +274,84 @@ export interface FleetCoherenceResult {
274
274
  /** Per-agent coherence summaries */
275
275
  agent_summaries: AgentCoherenceSummary[];
276
276
  }
277
+
278
+ // --- Fault Line Analysis Types (E-06: Fault Line Detection) ---
279
+
280
+ /** Classification of a fault line's nature. */
281
+ export type FaultLineClassification =
282
+ | 'resolvable'
283
+ | 'priority_mismatch'
284
+ | 'incompatible'
285
+ | 'complementary';
286
+
287
+ /** A single fault line — a value dimension that splits the fleet. */
288
+ export interface FaultLine {
289
+ /** Deterministic ID for this fault line */
290
+ id: string;
291
+ /** The value in question */
292
+ value: string;
293
+ /** How the fault line is classified */
294
+ classification: FaultLineClassification;
295
+ /** Severity based on impact_score */
296
+ severity: Severity;
297
+ /** Agent IDs that declare this value */
298
+ agents_declaring: string[];
299
+ /** Agent IDs missing this value */
300
+ agents_missing: string[];
301
+ /** Agent IDs whose conflicts_with includes this value */
302
+ agents_conflicting: string[];
303
+ /** Weighted impact score (0.0 to 1.0) */
304
+ impact_score: number;
305
+ /** Plain-English resolution hint */
306
+ resolution_hint: string;
307
+ /** Bounded actions shared by all involved agents */
308
+ affects_capabilities: string[];
309
+ }
310
+
311
+ /** Aggregated summary of fault lines by classification. */
312
+ export interface FaultLineSummary {
313
+ /** Total number of fault lines */
314
+ total: number;
315
+ /** Count of resolvable fault lines */
316
+ resolvable: number;
317
+ /** Count of priority_mismatch fault lines */
318
+ priority_mismatch: number;
319
+ /** Count of incompatible fault lines */
320
+ incompatible: number;
321
+ /** Count of complementary fault lines */
322
+ complementary: number;
323
+ /** Count of critical-severity fault lines */
324
+ critical_count: number;
325
+ }
326
+
327
+ /** A pattern where the same set of agents is consistently isolated across multiple fault lines. */
328
+ export interface FaultLineAlignment {
329
+ /** Deterministic ID */
330
+ id: string;
331
+ /** IDs of the fault lines that form this alignment */
332
+ fault_line_ids: string[];
333
+ /** Agents consistently missing from this group of fault lines */
334
+ minority_agents: string[];
335
+ /** Agents consistently declaring this group of fault lines */
336
+ majority_agents: string[];
337
+ /** Mean Jaccard similarity within the alignment group */
338
+ alignment_score: number;
339
+ /** Severity of the alignment pattern */
340
+ severity: Severity;
341
+ /** Human-readable description */
342
+ description: string;
343
+ }
344
+
345
+ /** Full fault line analysis result. */
346
+ export interface FaultLineAnalysis {
347
+ /** Deterministic analysis identifier */
348
+ analysis_id: string;
349
+ /** Fleet coherence score (from FleetCoherenceResult) */
350
+ fleet_score: number;
351
+ /** Detected fault lines, sorted by severity then impact_score desc */
352
+ fault_lines: FaultLine[];
353
+ /** Detected alignment patterns */
354
+ alignments: FaultLineAlignment[];
355
+ /** Counts by classification */
356
+ summary: FaultLineSummary;
357
+ }