@mnemom/agent-alignment-protocol 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +22 -7
- package/dist/index.d.ts +22 -7
- package/dist/index.js +37 -5
- package/dist/index.mjs +36 -5
- package/package.json +1 -1
- package/src/constants.ts +1 -1
- package/src/index.ts +1 -0
- package/src/verification/api.ts +42 -10
- package/src/verification/features.ts +25 -1
package/dist/index.d.mts
CHANGED
|
@@ -457,14 +457,19 @@ declare function checkCoherence(myCard: AlignmentCard, theirCard: AlignmentCard,
|
|
|
457
457
|
/**
|
|
458
458
|
* Detect behavioral drift from declared alignment.
|
|
459
459
|
*
|
|
460
|
-
*
|
|
461
|
-
*
|
|
462
|
-
*
|
|
460
|
+
* Computes a baseline centroid from the first N traces, then compares
|
|
461
|
+
* subsequent traces against this centroid using cosine similarity.
|
|
462
|
+
* Trace-to-trace comparison provides symmetric feature spaces, yielding
|
|
463
|
+
* meaningful similarity scores (unlike trace-to-card which is structurally
|
|
464
|
+
* depressed due to asymmetric features).
|
|
465
|
+
*
|
|
466
|
+
* Alerts when sustained low similarity is detected (consecutive traces
|
|
467
|
+
* below threshold).
|
|
463
468
|
*
|
|
464
469
|
* @see SPEC Section 8 and Appendix B.2 for algorithm specification.
|
|
465
470
|
*
|
|
466
|
-
* @param card - Alignment Card
|
|
467
|
-
* @param traces - List of AP-Traces
|
|
471
|
+
* @param card - Alignment Card (used for card_id and direction inference)
|
|
472
|
+
* @param traces - List of AP-Traces (sorted chronologically internally)
|
|
468
473
|
* @param similarityThreshold - Alert when similarity drops below (default: 0.30)
|
|
469
474
|
* @param sustainedThreshold - Alert after N consecutive low-similarity traces (default: 3)
|
|
470
475
|
* @returns List of DriftAlert objects for detected drift events
|
|
@@ -646,6 +651,16 @@ declare function extractCardFeatures(card: AlignmentCard): FeatureVector;
|
|
|
646
651
|
* Extract features from an AP-Trace.
|
|
647
652
|
*/
|
|
648
653
|
declare function extractTraceFeatures(trace: APTrace): FeatureVector;
|
|
654
|
+
/**
|
|
655
|
+
* Compute the centroid (element-wise mean) of multiple feature vectors.
|
|
656
|
+
*
|
|
657
|
+
* Used by drift detection to build a baseline from the first N traces,
|
|
658
|
+
* enabling trace-to-trace comparison instead of trace-to-card comparison.
|
|
659
|
+
*
|
|
660
|
+
* @param vectors - Array of feature vectors to average
|
|
661
|
+
* @returns Centroid feature vector
|
|
662
|
+
*/
|
|
663
|
+
declare function computeCentroid(vectors: FeatureVector[]): FeatureVector;
|
|
649
664
|
/**
|
|
650
665
|
* Compute cosine similarity between two feature vectors.
|
|
651
666
|
*
|
|
@@ -678,6 +693,6 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
|
678
693
|
declare const MIN_WORD_LENGTH = 3;
|
|
679
694
|
/** Maximum features to extract from TF-IDF vectorization */
|
|
680
695
|
declare const MAX_TFIDF_FEATURES = 500;
|
|
681
|
-
declare const ALGORITHM_VERSION = "1.
|
|
696
|
+
declare const ALGORITHM_VERSION = "1.2.0";
|
|
682
697
|
|
|
683
|
-
export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
|
|
698
|
+
export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, computeCentroid, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
|
package/dist/index.d.ts
CHANGED
|
@@ -457,14 +457,19 @@ declare function checkCoherence(myCard: AlignmentCard, theirCard: AlignmentCard,
|
|
|
457
457
|
/**
|
|
458
458
|
* Detect behavioral drift from declared alignment.
|
|
459
459
|
*
|
|
460
|
-
*
|
|
461
|
-
*
|
|
462
|
-
*
|
|
460
|
+
* Computes a baseline centroid from the first N traces, then compares
|
|
461
|
+
* subsequent traces against this centroid using cosine similarity.
|
|
462
|
+
* Trace-to-trace comparison provides symmetric feature spaces, yielding
|
|
463
|
+
* meaningful similarity scores (unlike trace-to-card which is structurally
|
|
464
|
+
* depressed due to asymmetric features).
|
|
465
|
+
*
|
|
466
|
+
* Alerts when sustained low similarity is detected (consecutive traces
|
|
467
|
+
* below threshold).
|
|
463
468
|
*
|
|
464
469
|
* @see SPEC Section 8 and Appendix B.2 for algorithm specification.
|
|
465
470
|
*
|
|
466
|
-
* @param card - Alignment Card
|
|
467
|
-
* @param traces - List of AP-Traces
|
|
471
|
+
* @param card - Alignment Card (used for card_id and direction inference)
|
|
472
|
+
* @param traces - List of AP-Traces (sorted chronologically internally)
|
|
468
473
|
* @param similarityThreshold - Alert when similarity drops below (default: 0.30)
|
|
469
474
|
* @param sustainedThreshold - Alert after N consecutive low-similarity traces (default: 3)
|
|
470
475
|
* @returns List of DriftAlert objects for detected drift events
|
|
@@ -646,6 +651,16 @@ declare function extractCardFeatures(card: AlignmentCard): FeatureVector;
|
|
|
646
651
|
* Extract features from an AP-Trace.
|
|
647
652
|
*/
|
|
648
653
|
declare function extractTraceFeatures(trace: APTrace): FeatureVector;
|
|
654
|
+
/**
|
|
655
|
+
* Compute the centroid (element-wise mean) of multiple feature vectors.
|
|
656
|
+
*
|
|
657
|
+
* Used by drift detection to build a baseline from the first N traces,
|
|
658
|
+
* enabling trace-to-trace comparison instead of trace-to-card comparison.
|
|
659
|
+
*
|
|
660
|
+
* @param vectors - Array of feature vectors to average
|
|
661
|
+
* @returns Centroid feature vector
|
|
662
|
+
*/
|
|
663
|
+
declare function computeCentroid(vectors: FeatureVector[]): FeatureVector;
|
|
649
664
|
/**
|
|
650
665
|
* Compute cosine similarity between two feature vectors.
|
|
651
666
|
*
|
|
@@ -678,6 +693,6 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
|
678
693
|
declare const MIN_WORD_LENGTH = 3;
|
|
679
694
|
/** Maximum features to extract from TF-IDF vectorization */
|
|
680
695
|
declare const MAX_TFIDF_FEATURES = 500;
|
|
681
|
-
declare const ALGORITHM_VERSION = "1.
|
|
696
|
+
declare const ALGORITHM_VERSION = "1.2.0";
|
|
682
697
|
|
|
683
|
-
export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
|
|
698
|
+
export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, computeCentroid, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
|
package/dist/index.js
CHANGED
|
@@ -30,6 +30,7 @@ __export(index_exports, {
|
|
|
30
30
|
NEAR_BOUNDARY_THRESHOLD: () => NEAR_BOUNDARY_THRESHOLD,
|
|
31
31
|
VIOLATION_SEVERITY: () => VIOLATION_SEVERITY,
|
|
32
32
|
checkCoherence: () => checkCoherence,
|
|
33
|
+
computeCentroid: () => computeCentroid,
|
|
33
34
|
cosineSimilarity: () => cosineSimilarity,
|
|
34
35
|
createViolation: () => createViolation,
|
|
35
36
|
detectDrift: () => detectDrift,
|
|
@@ -54,7 +55,7 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
|
|
|
54
55
|
var CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
55
56
|
var MIN_WORD_LENGTH = 3;
|
|
56
57
|
var MAX_TFIDF_FEATURES = 500;
|
|
57
|
-
var ALGORITHM_VERSION = "1.
|
|
58
|
+
var ALGORITHM_VERSION = "1.2.0";
|
|
58
59
|
|
|
59
60
|
// src/verification/features.ts
|
|
60
61
|
var STOPWORDS = /* @__PURE__ */ new Set([
|
|
@@ -157,6 +158,20 @@ function extractTraceFeatures(trace) {
|
|
|
157
158
|
}
|
|
158
159
|
return features;
|
|
159
160
|
}
|
|
161
|
+
function computeCentroid(vectors) {
|
|
162
|
+
if (vectors.length === 0) return {};
|
|
163
|
+
const centroid = {};
|
|
164
|
+
for (const vec of vectors) {
|
|
165
|
+
for (const [key, value] of Object.entries(vec)) {
|
|
166
|
+
centroid[key] = (centroid[key] ?? 0) + value;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
const n = vectors.length;
|
|
170
|
+
for (const key of Object.keys(centroid)) {
|
|
171
|
+
centroid[key] /= n;
|
|
172
|
+
}
|
|
173
|
+
return centroid;
|
|
174
|
+
}
|
|
160
175
|
function cosineSimilarity(a, b) {
|
|
161
176
|
const keysA = Object.keys(a);
|
|
162
177
|
const keysB = new Set(Object.keys(b));
|
|
@@ -400,17 +415,33 @@ function checkCoherence(myCard, theirCard, taskValues) {
|
|
|
400
415
|
};
|
|
401
416
|
}
|
|
402
417
|
function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD, sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD) {
|
|
403
|
-
|
|
418
|
+
const sorted = [...traces].sort(
|
|
419
|
+
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
420
|
+
);
|
|
421
|
+
const baselineSize = Math.max(
|
|
422
|
+
sustainedThreshold,
|
|
423
|
+
Math.min(10, Math.floor(sorted.length / 4))
|
|
424
|
+
);
|
|
425
|
+
if (sorted.length < baselineSize + sustainedThreshold) {
|
|
404
426
|
return [];
|
|
405
427
|
}
|
|
406
|
-
const
|
|
428
|
+
const baselineFeatures = sorted.slice(0, baselineSize).map((t) => extractTraceFeatures(t));
|
|
429
|
+
const baselineCentroid = computeCentroid(baselineFeatures);
|
|
407
430
|
const alerts = [];
|
|
408
431
|
let lowSimilarityStreak = [];
|
|
409
432
|
const escalationRates = [];
|
|
410
433
|
const valueUsage = {};
|
|
411
|
-
for (const trace of
|
|
434
|
+
for (const trace of sorted.slice(0, baselineSize)) {
|
|
435
|
+
const escalation = trace.escalation;
|
|
436
|
+
escalationRates.push(escalation?.required ? 1 : 0);
|
|
437
|
+
for (const value of trace.decision.values_applied ?? []) {
|
|
438
|
+
valueUsage[value] = (valueUsage[value] ?? 0) + 1;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
for (let i = baselineSize; i < sorted.length; i++) {
|
|
442
|
+
const trace = sorted[i];
|
|
412
443
|
const traceFeatures = extractTraceFeatures(trace);
|
|
413
|
-
const similarity = cosineSimilarity(traceFeatures,
|
|
444
|
+
const similarity = cosineSimilarity(traceFeatures, baselineCentroid);
|
|
414
445
|
const escalation = trace.escalation;
|
|
415
446
|
escalationRates.push(escalation?.required ? 1 : 0);
|
|
416
447
|
for (const value of trace.decision.values_applied ?? []) {
|
|
@@ -601,6 +632,7 @@ function hadViolations(trace) {
|
|
|
601
632
|
NEAR_BOUNDARY_THRESHOLD,
|
|
602
633
|
VIOLATION_SEVERITY,
|
|
603
634
|
checkCoherence,
|
|
635
|
+
computeCentroid,
|
|
604
636
|
cosineSimilarity,
|
|
605
637
|
createViolation,
|
|
606
638
|
detectDrift,
|
package/dist/index.mjs
CHANGED
|
@@ -6,7 +6,7 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
|
|
|
6
6
|
var CONFLICT_PENALTY_MULTIPLIER = 0.5;
|
|
7
7
|
var MIN_WORD_LENGTH = 3;
|
|
8
8
|
var MAX_TFIDF_FEATURES = 500;
|
|
9
|
-
var ALGORITHM_VERSION = "1.
|
|
9
|
+
var ALGORITHM_VERSION = "1.2.0";
|
|
10
10
|
|
|
11
11
|
// src/verification/features.ts
|
|
12
12
|
var STOPWORDS = /* @__PURE__ */ new Set([
|
|
@@ -109,6 +109,20 @@ function extractTraceFeatures(trace) {
|
|
|
109
109
|
}
|
|
110
110
|
return features;
|
|
111
111
|
}
|
|
112
|
+
function computeCentroid(vectors) {
|
|
113
|
+
if (vectors.length === 0) return {};
|
|
114
|
+
const centroid = {};
|
|
115
|
+
for (const vec of vectors) {
|
|
116
|
+
for (const [key, value] of Object.entries(vec)) {
|
|
117
|
+
centroid[key] = (centroid[key] ?? 0) + value;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
const n = vectors.length;
|
|
121
|
+
for (const key of Object.keys(centroid)) {
|
|
122
|
+
centroid[key] /= n;
|
|
123
|
+
}
|
|
124
|
+
return centroid;
|
|
125
|
+
}
|
|
112
126
|
function cosineSimilarity(a, b) {
|
|
113
127
|
const keysA = Object.keys(a);
|
|
114
128
|
const keysB = new Set(Object.keys(b));
|
|
@@ -352,17 +366,33 @@ function checkCoherence(myCard, theirCard, taskValues) {
|
|
|
352
366
|
};
|
|
353
367
|
}
|
|
354
368
|
function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD, sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD) {
|
|
355
|
-
|
|
369
|
+
const sorted = [...traces].sort(
|
|
370
|
+
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
371
|
+
);
|
|
372
|
+
const baselineSize = Math.max(
|
|
373
|
+
sustainedThreshold,
|
|
374
|
+
Math.min(10, Math.floor(sorted.length / 4))
|
|
375
|
+
);
|
|
376
|
+
if (sorted.length < baselineSize + sustainedThreshold) {
|
|
356
377
|
return [];
|
|
357
378
|
}
|
|
358
|
-
const
|
|
379
|
+
const baselineFeatures = sorted.slice(0, baselineSize).map((t) => extractTraceFeatures(t));
|
|
380
|
+
const baselineCentroid = computeCentroid(baselineFeatures);
|
|
359
381
|
const alerts = [];
|
|
360
382
|
let lowSimilarityStreak = [];
|
|
361
383
|
const escalationRates = [];
|
|
362
384
|
const valueUsage = {};
|
|
363
|
-
for (const trace of
|
|
385
|
+
for (const trace of sorted.slice(0, baselineSize)) {
|
|
386
|
+
const escalation = trace.escalation;
|
|
387
|
+
escalationRates.push(escalation?.required ? 1 : 0);
|
|
388
|
+
for (const value of trace.decision.values_applied ?? []) {
|
|
389
|
+
valueUsage[value] = (valueUsage[value] ?? 0) + 1;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
for (let i = baselineSize; i < sorted.length; i++) {
|
|
393
|
+
const trace = sorted[i];
|
|
364
394
|
const traceFeatures = extractTraceFeatures(trace);
|
|
365
|
-
const similarity = cosineSimilarity(traceFeatures,
|
|
395
|
+
const similarity = cosineSimilarity(traceFeatures, baselineCentroid);
|
|
366
396
|
const escalation = trace.escalation;
|
|
367
397
|
escalationRates.push(escalation?.required ? 1 : 0);
|
|
368
398
|
for (const value of trace.decision.values_applied ?? []) {
|
|
@@ -552,6 +582,7 @@ export {
|
|
|
552
582
|
NEAR_BOUNDARY_THRESHOLD,
|
|
553
583
|
VIOLATION_SEVERITY,
|
|
554
584
|
checkCoherence,
|
|
585
|
+
computeCentroid,
|
|
555
586
|
cosineSimilarity,
|
|
556
587
|
createViolation,
|
|
557
588
|
detectDrift,
|
package/package.json
CHANGED
package/src/constants.ts
CHANGED
package/src/index.ts
CHANGED
package/src/verification/api.ts
CHANGED
|
@@ -20,8 +20,8 @@ import {
|
|
|
20
20
|
import type { AlignmentCard } from "../schemas/alignment-card";
|
|
21
21
|
import type { APTrace } from "../schemas/ap-trace";
|
|
22
22
|
import {
|
|
23
|
+
computeCentroid,
|
|
23
24
|
cosineSimilarity,
|
|
24
|
-
extractCardFeatures,
|
|
25
25
|
extractTraceFeatures,
|
|
26
26
|
} from "./features";
|
|
27
27
|
import {
|
|
@@ -319,14 +319,19 @@ export function checkCoherence(
|
|
|
319
319
|
/**
|
|
320
320
|
* Detect behavioral drift from declared alignment.
|
|
321
321
|
*
|
|
322
|
-
*
|
|
323
|
-
*
|
|
324
|
-
*
|
|
322
|
+
* Computes a baseline centroid from the first N traces, then compares
|
|
323
|
+
* subsequent traces against this centroid using cosine similarity.
|
|
324
|
+
* Trace-to-trace comparison provides symmetric feature spaces, yielding
|
|
325
|
+
* meaningful similarity scores (unlike trace-to-card which is structurally
|
|
326
|
+
* depressed due to asymmetric features).
|
|
327
|
+
*
|
|
328
|
+
* Alerts when sustained low similarity is detected (consecutive traces
|
|
329
|
+
* below threshold).
|
|
325
330
|
*
|
|
326
331
|
* @see SPEC Section 8 and Appendix B.2 for algorithm specification.
|
|
327
332
|
*
|
|
328
|
-
* @param card - Alignment Card
|
|
329
|
-
* @param traces - List of AP-Traces
|
|
333
|
+
* @param card - Alignment Card (used for card_id and direction inference)
|
|
334
|
+
* @param traces - List of AP-Traces (sorted chronologically internally)
|
|
330
335
|
* @param similarityThreshold - Alert when similarity drops below (default: 0.30)
|
|
331
336
|
* @param sustainedThreshold - Alert after N consecutive low-similarity traces (default: 3)
|
|
332
337
|
* @returns List of DriftAlert objects for detected drift events
|
|
@@ -337,11 +342,27 @@ export function detectDrift(
|
|
|
337
342
|
similarityThreshold = DEFAULT_SIMILARITY_THRESHOLD,
|
|
338
343
|
sustainedThreshold = DEFAULT_SUSTAINED_TURNS_THRESHOLD
|
|
339
344
|
): DriftAlert[] {
|
|
340
|
-
|
|
345
|
+
// Sort traces chronologically
|
|
346
|
+
const sorted = [...traces].sort(
|
|
347
|
+
(a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
|
348
|
+
);
|
|
349
|
+
|
|
350
|
+
// Compute baseline window size
|
|
351
|
+
const baselineSize = Math.max(
|
|
352
|
+
sustainedThreshold,
|
|
353
|
+
Math.min(10, Math.floor(sorted.length / 4))
|
|
354
|
+
);
|
|
355
|
+
|
|
356
|
+
// Need enough traces for baseline + sustained threshold
|
|
357
|
+
if (sorted.length < baselineSize + sustainedThreshold) {
|
|
341
358
|
return [];
|
|
342
359
|
}
|
|
343
360
|
|
|
344
|
-
|
|
361
|
+
// Extract features for baseline traces and compute centroid
|
|
362
|
+
const baselineFeatures = sorted
|
|
363
|
+
.slice(0, baselineSize)
|
|
364
|
+
.map((t) => extractTraceFeatures(t));
|
|
365
|
+
const baselineCentroid = computeCentroid(baselineFeatures);
|
|
345
366
|
|
|
346
367
|
const alerts: DriftAlert[] = [];
|
|
347
368
|
let lowSimilarityStreak: Array<{ trace: APTrace; similarity: number }> = [];
|
|
@@ -350,9 +371,20 @@ export function detectDrift(
|
|
|
350
371
|
const escalationRates: number[] = [];
|
|
351
372
|
const valueUsage: Record<string, number> = {};
|
|
352
373
|
|
|
353
|
-
|
|
374
|
+
// Include baseline traces in escalation/value tracking
|
|
375
|
+
for (const trace of sorted.slice(0, baselineSize)) {
|
|
376
|
+
const escalation = trace.escalation;
|
|
377
|
+
escalationRates.push(escalation?.required ? 1.0 : 0.0);
|
|
378
|
+
for (const value of trace.decision.values_applied ?? []) {
|
|
379
|
+
valueUsage[value] = (valueUsage[value] ?? 0) + 1;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Iterate from after baseline to end
|
|
384
|
+
for (let i = baselineSize; i < sorted.length; i++) {
|
|
385
|
+
const trace = sorted[i];
|
|
354
386
|
const traceFeatures = extractTraceFeatures(trace);
|
|
355
|
-
const similarity = cosineSimilarity(traceFeatures,
|
|
387
|
+
const similarity = cosineSimilarity(traceFeatures, baselineCentroid);
|
|
356
388
|
|
|
357
389
|
// Track escalation rate
|
|
358
390
|
const escalation = trace.escalation;
|
|
@@ -67,7 +67,7 @@ export function extractCardFeatures(card: AlignmentCard): FeatureVector {
|
|
|
67
67
|
}
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
// Principal features (
|
|
70
|
+
// Principal features (matching Python SDK naming)
|
|
71
71
|
features[`principal_type:${card.principal.type}`] = 1.0;
|
|
72
72
|
features[`relationship:${card.principal.relationship}`] = 1.0;
|
|
73
73
|
|
|
@@ -114,6 +114,30 @@ export function extractTraceFeatures(trace: APTrace): FeatureVector {
|
|
|
114
114
|
return features;
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
/**
|
|
118
|
+
* Compute the centroid (element-wise mean) of multiple feature vectors.
|
|
119
|
+
*
|
|
120
|
+
* Used by drift detection to build a baseline from the first N traces,
|
|
121
|
+
* enabling trace-to-trace comparison instead of trace-to-card comparison.
|
|
122
|
+
*
|
|
123
|
+
* @param vectors - Array of feature vectors to average
|
|
124
|
+
* @returns Centroid feature vector
|
|
125
|
+
*/
|
|
126
|
+
export function computeCentroid(vectors: FeatureVector[]): FeatureVector {
|
|
127
|
+
if (vectors.length === 0) return {};
|
|
128
|
+
const centroid: FeatureVector = {};
|
|
129
|
+
for (const vec of vectors) {
|
|
130
|
+
for (const [key, value] of Object.entries(vec)) {
|
|
131
|
+
centroid[key] = (centroid[key] ?? 0) + value;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const n = vectors.length;
|
|
135
|
+
for (const key of Object.keys(centroid)) {
|
|
136
|
+
centroid[key] /= n;
|
|
137
|
+
}
|
|
138
|
+
return centroid;
|
|
139
|
+
}
|
|
140
|
+
|
|
117
141
|
/**
|
|
118
142
|
* Compute cosine similarity between two feature vectors.
|
|
119
143
|
*
|