@mnemom/agent-alignment-protocol 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -457,14 +457,19 @@ declare function checkCoherence(myCard: AlignmentCard, theirCard: AlignmentCard,
457
457
  /**
458
458
  * Detect behavioral drift from declared alignment.
459
459
  *
460
- * Analyzes traces chronologically, computing similarity between each
461
- * trace's behavior and the declared alignment. Alerts when sustained
462
- * low similarity is detected (consecutive traces below threshold).
460
+ * Computes a baseline centroid from the first N traces, then compares
461
+ * subsequent traces against this centroid using cosine similarity.
462
+ * Trace-to-trace comparison provides symmetric feature spaces, yielding
463
+ * meaningful similarity scores (unlike trace-to-card which is structurally
464
+ * depressed due to asymmetric features).
465
+ *
466
+ * Alerts when sustained low similarity is detected (consecutive traces
467
+ * below threshold).
463
468
  *
464
469
  * @see SPEC Section 8 and Appendix B.2 for algorithm specification.
465
470
  *
466
- * @param card - Alignment Card to compare against
467
- * @param traces - List of AP-Traces in chronological order
471
+ * @param card - Alignment Card (used for card_id and direction inference)
472
+ * @param traces - List of AP-Traces (sorted chronologically internally)
468
473
  * @param similarityThreshold - Alert when similarity drops below (default: 0.30)
469
474
  * @param sustainedThreshold - Alert after N consecutive low-similarity traces (default: 3)
470
475
  * @returns List of DriftAlert objects for detected drift events
@@ -646,6 +651,16 @@ declare function extractCardFeatures(card: AlignmentCard): FeatureVector;
646
651
  * Extract features from an AP-Trace.
647
652
  */
648
653
  declare function extractTraceFeatures(trace: APTrace): FeatureVector;
654
+ /**
655
+ * Compute the centroid (element-wise mean) of multiple feature vectors.
656
+ *
657
+ * Used by drift detection to build a baseline from the first N traces,
658
+ * enabling trace-to-trace comparison instead of trace-to-card comparison.
659
+ *
660
+ * @param vectors - Array of feature vectors to average
661
+ * @returns Centroid feature vector
662
+ */
663
+ declare function computeCentroid(vectors: FeatureVector[]): FeatureVector;
649
664
  /**
650
665
  * Compute cosine similarity between two feature vectors.
651
666
  *
@@ -678,6 +693,6 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
678
693
  declare const MIN_WORD_LENGTH = 3;
679
694
  /** Maximum features to extract from TF-IDF vectorization */
680
695
  declare const MAX_TFIDF_FEATURES = 500;
681
- declare const ALGORITHM_VERSION = "1.1.0";
696
+ declare const ALGORITHM_VERSION = "1.2.0";
682
697
 
683
- export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
698
+ export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, computeCentroid, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
package/dist/index.d.ts CHANGED
@@ -457,14 +457,19 @@ declare function checkCoherence(myCard: AlignmentCard, theirCard: AlignmentCard,
457
457
  /**
458
458
  * Detect behavioral drift from declared alignment.
459
459
  *
460
- * Analyzes traces chronologically, computing similarity between each
461
- * trace's behavior and the declared alignment. Alerts when sustained
462
- * low similarity is detected (consecutive traces below threshold).
460
+ * Computes a baseline centroid from the first N traces, then compares
461
+ * subsequent traces against this centroid using cosine similarity.
462
+ * Trace-to-trace comparison provides symmetric feature spaces, yielding
463
+ * meaningful similarity scores (unlike trace-to-card which is structurally
464
+ * depressed due to asymmetric features).
465
+ *
466
+ * Alerts when sustained low similarity is detected (consecutive traces
467
+ * below threshold).
463
468
  *
464
469
  * @see SPEC Section 8 and Appendix B.2 for algorithm specification.
465
470
  *
466
- * @param card - Alignment Card to compare against
467
- * @param traces - List of AP-Traces in chronological order
471
+ * @param card - Alignment Card (used for card_id and direction inference)
472
+ * @param traces - List of AP-Traces (sorted chronologically internally)
468
473
  * @param similarityThreshold - Alert when similarity drops below (default: 0.30)
469
474
  * @param sustainedThreshold - Alert after N consecutive low-similarity traces (default: 3)
470
475
  * @returns List of DriftAlert objects for detected drift events
@@ -646,6 +651,16 @@ declare function extractCardFeatures(card: AlignmentCard): FeatureVector;
646
651
  * Extract features from an AP-Trace.
647
652
  */
648
653
  declare function extractTraceFeatures(trace: APTrace): FeatureVector;
654
+ /**
655
+ * Compute the centroid (element-wise mean) of multiple feature vectors.
656
+ *
657
+ * Used by drift detection to build a baseline from the first N traces,
658
+ * enabling trace-to-trace comparison instead of trace-to-card comparison.
659
+ *
660
+ * @param vectors - Array of feature vectors to average
661
+ * @returns Centroid feature vector
662
+ */
663
+ declare function computeCentroid(vectors: FeatureVector[]): FeatureVector;
649
664
  /**
650
665
  * Compute cosine similarity between two feature vectors.
651
666
  *
@@ -678,6 +693,6 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
678
693
  declare const MIN_WORD_LENGTH = 3;
679
694
  /** Maximum features to extract from TF-IDF vectorization */
680
695
  declare const MAX_TFIDF_FEATURES = 500;
681
- declare const ALGORITHM_VERSION = "1.1.0";
696
+ declare const ALGORITHM_VERSION = "1.2.0";
682
697
 
683
- export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
698
+ export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, computeCentroid, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
package/dist/index.js CHANGED
@@ -30,6 +30,7 @@ __export(index_exports, {
30
30
  NEAR_BOUNDARY_THRESHOLD: () => NEAR_BOUNDARY_THRESHOLD,
31
31
  VIOLATION_SEVERITY: () => VIOLATION_SEVERITY,
32
32
  checkCoherence: () => checkCoherence,
33
+ computeCentroid: () => computeCentroid,
33
34
  cosineSimilarity: () => cosineSimilarity,
34
35
  createViolation: () => createViolation,
35
36
  detectDrift: () => detectDrift,
@@ -54,7 +55,7 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
54
55
  var CONFLICT_PENALTY_MULTIPLIER = 0.5;
55
56
  var MIN_WORD_LENGTH = 3;
56
57
  var MAX_TFIDF_FEATURES = 500;
57
- var ALGORITHM_VERSION = "1.1.0";
58
+ var ALGORITHM_VERSION = "1.2.0";
58
59
 
59
60
  // src/verification/features.ts
60
61
  var STOPWORDS = /* @__PURE__ */ new Set([
@@ -157,6 +158,20 @@ function extractTraceFeatures(trace) {
157
158
  }
158
159
  return features;
159
160
  }
161
+ function computeCentroid(vectors) {
162
+ if (vectors.length === 0) return {};
163
+ const centroid = {};
164
+ for (const vec of vectors) {
165
+ for (const [key, value] of Object.entries(vec)) {
166
+ centroid[key] = (centroid[key] ?? 0) + value;
167
+ }
168
+ }
169
+ const n = vectors.length;
170
+ for (const key of Object.keys(centroid)) {
171
+ centroid[key] /= n;
172
+ }
173
+ return centroid;
174
+ }
160
175
  function cosineSimilarity(a, b) {
161
176
  const keysA = Object.keys(a);
162
177
  const keysB = new Set(Object.keys(b));
@@ -400,17 +415,33 @@ function checkCoherence(myCard, theirCard, taskValues) {
400
415
  };
401
416
  }
402
417
  function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD, sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD) {
403
- if (traces.length < sustainedThreshold) {
418
+ const sorted = [...traces].sort(
419
+ (a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
420
+ );
421
+ const baselineSize = Math.max(
422
+ sustainedThreshold,
423
+ Math.min(10, Math.floor(sorted.length / 4))
424
+ );
425
+ if (sorted.length < baselineSize + sustainedThreshold) {
404
426
  return [];
405
427
  }
406
- const cardFeatures = extractCardFeatures(card);
428
+ const baselineFeatures = sorted.slice(0, baselineSize).map((t) => extractTraceFeatures(t));
429
+ const baselineCentroid = computeCentroid(baselineFeatures);
407
430
  const alerts = [];
408
431
  let lowSimilarityStreak = [];
409
432
  const escalationRates = [];
410
433
  const valueUsage = {};
411
- for (const trace of traces) {
434
+ for (const trace of sorted.slice(0, baselineSize)) {
435
+ const escalation = trace.escalation;
436
+ escalationRates.push(escalation?.required ? 1 : 0);
437
+ for (const value of trace.decision.values_applied ?? []) {
438
+ valueUsage[value] = (valueUsage[value] ?? 0) + 1;
439
+ }
440
+ }
441
+ for (let i = baselineSize; i < sorted.length; i++) {
442
+ const trace = sorted[i];
412
443
  const traceFeatures = extractTraceFeatures(trace);
413
- const similarity = cosineSimilarity(traceFeatures, cardFeatures);
444
+ const similarity = cosineSimilarity(traceFeatures, baselineCentroid);
414
445
  const escalation = trace.escalation;
415
446
  escalationRates.push(escalation?.required ? 1 : 0);
416
447
  for (const value of trace.decision.values_applied ?? []) {
@@ -601,6 +632,7 @@ function hadViolations(trace) {
601
632
  NEAR_BOUNDARY_THRESHOLD,
602
633
  VIOLATION_SEVERITY,
603
634
  checkCoherence,
635
+ computeCentroid,
604
636
  cosineSimilarity,
605
637
  createViolation,
606
638
  detectDrift,
package/dist/index.mjs CHANGED
@@ -6,7 +6,7 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
6
6
  var CONFLICT_PENALTY_MULTIPLIER = 0.5;
7
7
  var MIN_WORD_LENGTH = 3;
8
8
  var MAX_TFIDF_FEATURES = 500;
9
- var ALGORITHM_VERSION = "1.1.0";
9
+ var ALGORITHM_VERSION = "1.2.0";
10
10
 
11
11
  // src/verification/features.ts
12
12
  var STOPWORDS = /* @__PURE__ */ new Set([
@@ -109,6 +109,20 @@ function extractTraceFeatures(trace) {
109
109
  }
110
110
  return features;
111
111
  }
112
+ function computeCentroid(vectors) {
113
+ if (vectors.length === 0) return {};
114
+ const centroid = {};
115
+ for (const vec of vectors) {
116
+ for (const [key, value] of Object.entries(vec)) {
117
+ centroid[key] = (centroid[key] ?? 0) + value;
118
+ }
119
+ }
120
+ const n = vectors.length;
121
+ for (const key of Object.keys(centroid)) {
122
+ centroid[key] /= n;
123
+ }
124
+ return centroid;
125
+ }
112
126
  function cosineSimilarity(a, b) {
113
127
  const keysA = Object.keys(a);
114
128
  const keysB = new Set(Object.keys(b));
@@ -352,17 +366,33 @@ function checkCoherence(myCard, theirCard, taskValues) {
352
366
  };
353
367
  }
354
368
  function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD, sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD) {
355
- if (traces.length < sustainedThreshold) {
369
+ const sorted = [...traces].sort(
370
+ (a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
371
+ );
372
+ const baselineSize = Math.max(
373
+ sustainedThreshold,
374
+ Math.min(10, Math.floor(sorted.length / 4))
375
+ );
376
+ if (sorted.length < baselineSize + sustainedThreshold) {
356
377
  return [];
357
378
  }
358
- const cardFeatures = extractCardFeatures(card);
379
+ const baselineFeatures = sorted.slice(0, baselineSize).map((t) => extractTraceFeatures(t));
380
+ const baselineCentroid = computeCentroid(baselineFeatures);
359
381
  const alerts = [];
360
382
  let lowSimilarityStreak = [];
361
383
  const escalationRates = [];
362
384
  const valueUsage = {};
363
- for (const trace of traces) {
385
+ for (const trace of sorted.slice(0, baselineSize)) {
386
+ const escalation = trace.escalation;
387
+ escalationRates.push(escalation?.required ? 1 : 0);
388
+ for (const value of trace.decision.values_applied ?? []) {
389
+ valueUsage[value] = (valueUsage[value] ?? 0) + 1;
390
+ }
391
+ }
392
+ for (let i = baselineSize; i < sorted.length; i++) {
393
+ const trace = sorted[i];
364
394
  const traceFeatures = extractTraceFeatures(trace);
365
- const similarity = cosineSimilarity(traceFeatures, cardFeatures);
395
+ const similarity = cosineSimilarity(traceFeatures, baselineCentroid);
366
396
  const escalation = trace.escalation;
367
397
  escalationRates.push(escalation?.required ? 1 : 0);
368
398
  for (const value of trace.decision.values_applied ?? []) {
@@ -552,6 +582,7 @@ export {
552
582
  NEAR_BOUNDARY_THRESHOLD,
553
583
  VIOLATION_SEVERITY,
554
584
  checkCoherence,
585
+ computeCentroid,
555
586
  cosineSimilarity,
556
587
  createViolation,
557
588
  detectDrift,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mnemom/agent-alignment-protocol",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "description": "Agent Alignment Protocol (AAP) - Verification and drift detection for AI agents",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
package/src/constants.ts CHANGED
@@ -41,4 +41,4 @@ export const MAX_TFIDF_FEATURES = 500;
41
41
 
42
42
  // Version
43
43
  // -------
44
- export const ALGORITHM_VERSION = "1.1.0";
44
+ export const ALGORITHM_VERSION = "1.2.0";
package/src/index.ts CHANGED
@@ -124,6 +124,7 @@ export {
124
124
  } from "./schemas/ap-trace";
125
125
 
126
126
  export {
127
+ computeCentroid,
127
128
  extractCardFeatures,
128
129
  extractTraceFeatures,
129
130
  cosineSimilarity,
@@ -20,8 +20,8 @@ import {
20
20
  import type { AlignmentCard } from "../schemas/alignment-card";
21
21
  import type { APTrace } from "../schemas/ap-trace";
22
22
  import {
23
+ computeCentroid,
23
24
  cosineSimilarity,
24
- extractCardFeatures,
25
25
  extractTraceFeatures,
26
26
  } from "./features";
27
27
  import {
@@ -319,14 +319,19 @@ export function checkCoherence(
319
319
  /**
320
320
  * Detect behavioral drift from declared alignment.
321
321
  *
322
- * Analyzes traces chronologically, computing similarity between each
323
- * trace's behavior and the declared alignment. Alerts when sustained
324
- * low similarity is detected (consecutive traces below threshold).
322
+ * Computes a baseline centroid from the first N traces, then compares
323
+ * subsequent traces against this centroid using cosine similarity.
324
+ * Trace-to-trace comparison provides symmetric feature spaces, yielding
325
+ * meaningful similarity scores (unlike trace-to-card which is structurally
326
+ * depressed due to asymmetric features).
327
+ *
328
+ * Alerts when sustained low similarity is detected (consecutive traces
329
+ * below threshold).
325
330
  *
326
331
  * @see SPEC Section 8 and Appendix B.2 for algorithm specification.
327
332
  *
328
- * @param card - Alignment Card to compare against
329
- * @param traces - List of AP-Traces in chronological order
333
+ * @param card - Alignment Card (used for card_id and direction inference)
334
+ * @param traces - List of AP-Traces (sorted chronologically internally)
330
335
  * @param similarityThreshold - Alert when similarity drops below (default: 0.30)
331
336
  * @param sustainedThreshold - Alert after N consecutive low-similarity traces (default: 3)
332
337
  * @returns List of DriftAlert objects for detected drift events
@@ -337,11 +342,27 @@ export function detectDrift(
337
342
  similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD,
338
343
  sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD
339
344
  ): DriftAlert[] {
340
- if (traces.length < sustainedThreshold) {
345
+ // Sort traces chronologically
346
+ const sorted = [...traces].sort(
347
+ (a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
348
+ );
349
+
350
+ // Compute baseline window size
351
+ const baselineSize = Math.max(
352
+ sustainedThreshold,
353
+ Math.min(10, Math.floor(sorted.length / 4))
354
+ );
355
+
356
+ // Need enough traces for baseline + sustained threshold
357
+ if (sorted.length < baselineSize + sustainedThreshold) {
341
358
  return [];
342
359
  }
343
360
 
344
- const cardFeatures = extractCardFeatures(card);
361
+ // Extract features for baseline traces and compute centroid
362
+ const baselineFeatures = sorted
363
+ .slice(0, baselineSize)
364
+ .map((t) => extractTraceFeatures(t));
365
+ const baselineCentroid = computeCentroid(baselineFeatures);
345
366
 
346
367
  const alerts: DriftAlert[] = [];
347
368
  let lowSimilarityStreak: Array<{ trace: APTrace; similarity: number }> = [];
@@ -350,9 +371,20 @@ export function detectDrift(
350
371
  const escalationRates: number[] = [];
351
372
  const valueUsage: Record<string, number> = {};
352
373
 
353
- for (const trace of traces) {
374
+ // Include baseline traces in escalation/value tracking
375
+ for (const trace of sorted.slice(0, baselineSize)) {
376
+ const escalation = trace.escalation;
377
+ escalationRates.push(escalation?.required ? 1.0 : 0.0);
378
+ for (const value of trace.decision.values_applied ?? []) {
379
+ valueUsage[value] = (valueUsage[value] ?? 0) + 1;
380
+ }
381
+ }
382
+
383
+ // Iterate from after baseline to end
384
+ for (let i = baselineSize; i < sorted.length; i++) {
385
+ const trace = sorted[i];
354
386
  const traceFeatures = extractTraceFeatures(trace);
355
- const similarity = cosineSimilarity(traceFeatures, cardFeatures);
387
+ const similarity = cosineSimilarity(traceFeatures, baselineCentroid);
356
388
 
357
389
  // Track escalation rate
358
390
  const escalation = trace.escalation;
@@ -67,7 +67,7 @@ export function extractCardFeatures(card: AlignmentCard): FeatureVector {
67
67
  }
68
68
  }
69
69
 
70
- // Principal features (distinct namespace from trace action categories)
70
+ // Principal features (matching Python SDK naming)
71
71
  features[`principal_type:${card.principal.type}`] = 1.0;
72
72
  features[`relationship:${card.principal.relationship}`] = 1.0;
73
73
 
@@ -114,6 +114,30 @@ export function extractTraceFeatures(trace: APTrace): FeatureVector {
114
114
  return features;
115
115
  }
116
116
 
117
+ /**
118
+ * Compute the centroid (element-wise mean) of multiple feature vectors.
119
+ *
120
+ * Used by drift detection to build a baseline from the first N traces,
121
+ * enabling trace-to-trace comparison instead of trace-to-card comparison.
122
+ *
123
+ * @param vectors - Array of feature vectors to average
124
+ * @returns Centroid feature vector
125
+ */
126
+ export function computeCentroid(vectors: FeatureVector[]): FeatureVector {
127
+ if (vectors.length === 0) return {};
128
+ const centroid: FeatureVector = {};
129
+ for (const vec of vectors) {
130
+ for (const [key, value] of Object.entries(vec)) {
131
+ centroid[key] = (centroid[key] ?? 0) + value;
132
+ }
133
+ }
134
+ const n = vectors.length;
135
+ for (const key of Object.keys(centroid)) {
136
+ centroid[key] /= n;
137
+ }
138
+ return centroid;
139
+ }
140
+
117
141
  /**
118
142
  * Compute cosine similarity between two feature vectors.
119
143
  *