@mnemom/agent-alignment-protocol 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -678,6 +678,6 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
678
678
  declare const MIN_WORD_LENGTH = 3;
679
679
  /** Maximum features to extract from TF-IDF vectorization */
680
680
  declare const MAX_TFIDF_FEATURES = 500;
681
- declare const ALGORITHM_VERSION = "1.0.0";
681
+ declare const ALGORITHM_VERSION = "1.1.0";
682
682
 
683
683
  export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
package/dist/index.d.ts CHANGED
@@ -678,6 +678,6 @@ declare const CONFLICT_PENALTY_MULTIPLIER = 0.5;
678
678
  declare const MIN_WORD_LENGTH = 3;
679
679
  /** Maximum features to extract from TF-IDF vectorization */
680
680
  declare const MAX_TFIDF_FEATURES = 500;
681
- declare const ALGORITHM_VERSION = "1.0.0";
681
+ declare const ALGORITHM_VERSION = "1.1.0";
682
682
 
683
683
  export { ALGORITHM_VERSION, type APTrace, type Action, type ActionCategory, type ActionTarget, type ActionType, type AlignmentCard, type AlignmentCardRequest, type AlignmentCardResponse, type Alternative, type AuditCommitment, type AuditStorage, type AutonomyEnvelope, type AutonomyScope, CONFLICT_PENALTY_MULTIPLIER, type Coherence, type CoherenceResult, type CoherenceResultMessage, DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SUSTAINED_TURNS_THRESHOLD, type DataSharing, type Decision, type DriftAlert, type DriftAnalysis, type DriftDirection, type DriftIndicator, type Escalation, type EscalationStatus, type EscalationTrigger, type HierarchyType, MAX_TFIDF_FEATURES, MIN_COHERENCE_FOR_PROCEED, MIN_WORD_LENGTH, type MonetaryValue, NEAR_BOUNDARY_THRESHOLD, type Principal, type PrincipalResponse, type PrincipalType, type ProposedCollaboration, type ProposedResolution, type RelationshipType, type RequesterInfo, type Severity, type Signature, type StorageType, type TamperEvidence, type TaskContext, type TraceContext, type TriggerAction, type TriggerCheck, VIOLATION_SEVERITY, type ValueAlignment, type ValueAlignmentDetail, type ValueCoherenceCheck, type ValueCoherenceMessage, type ValueConflict, type ValueConflictResult, type ValueDefinition, type Values, type VerificationMetadata, type VerificationResult, type Violation, type ViolationType, type Warning, checkCoherence, cosineSimilarity, createViolation, detectDrift, extractCardFeatures, extractTraceFeatures, getSelectedAlternative, hadViolations, hasValue, isActionBounded, isActionForbidden, isCardExpired, verifyTrace, wasEscalated };
package/dist/index.js CHANGED
@@ -54,7 +54,7 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
54
54
  var CONFLICT_PENALTY_MULTIPLIER = 0.5;
55
55
  var MIN_WORD_LENGTH = 3;
56
56
  var MAX_TFIDF_FEATURES = 500;
57
- var ALGORITHM_VERSION = "1.0.0";
57
+ var ALGORITHM_VERSION = "1.1.0";
58
58
 
59
59
  // src/verification/features.ts
60
60
  var STOPWORDS = /* @__PURE__ */ new Set([
@@ -148,15 +148,7 @@ function extractTraceFeatures(trace) {
148
148
  features[`escalation:${trace.escalation.escalation_status}`] = 1;
149
149
  }
150
150
  }
151
- const reasoningTokens = tokenize(trace.decision.selection_reasoning);
152
- for (const token of reasoningTokens) {
153
- features[`content:${token}`] = (features[`content:${token}`] ?? 0) + 0.5;
154
- }
155
151
  for (const alt of trace.decision.alternatives_considered) {
156
- const altTokens = tokenize(alt.description);
157
- for (const token of altTokens) {
158
- features[`content:${token}`] = (features[`content:${token}`] ?? 0) + 0.25;
159
- }
160
152
  if (alt.flags) {
161
153
  for (const flag of alt.flags) {
162
154
  features[`flag:${flag}`] = 1;
@@ -429,7 +421,7 @@ function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRE
429
421
  } else {
430
422
  lowSimilarityStreak = [];
431
423
  }
432
- if (lowSimilarityStreak.length >= sustainedThreshold) {
424
+ if (lowSimilarityStreak.length === sustainedThreshold) {
433
425
  const latest = lowSimilarityStreak[lowSimilarityStreak.length - 1];
434
426
  const direction = inferDriftDirection(
435
427
  lowSimilarityStreak,
package/dist/index.mjs CHANGED
@@ -6,7 +6,7 @@ var MIN_COHERENCE_FOR_PROCEED = 0.7;
6
6
  var CONFLICT_PENALTY_MULTIPLIER = 0.5;
7
7
  var MIN_WORD_LENGTH = 3;
8
8
  var MAX_TFIDF_FEATURES = 500;
9
- var ALGORITHM_VERSION = "1.0.0";
9
+ var ALGORITHM_VERSION = "1.1.0";
10
10
 
11
11
  // src/verification/features.ts
12
12
  var STOPWORDS = /* @__PURE__ */ new Set([
@@ -100,15 +100,7 @@ function extractTraceFeatures(trace) {
100
100
  features[`escalation:${trace.escalation.escalation_status}`] = 1;
101
101
  }
102
102
  }
103
- const reasoningTokens = tokenize(trace.decision.selection_reasoning);
104
- for (const token of reasoningTokens) {
105
- features[`content:${token}`] = (features[`content:${token}`] ?? 0) + 0.5;
106
- }
107
103
  for (const alt of trace.decision.alternatives_considered) {
108
- const altTokens = tokenize(alt.description);
109
- for (const token of altTokens) {
110
- features[`content:${token}`] = (features[`content:${token}`] ?? 0) + 0.25;
111
- }
112
104
  if (alt.flags) {
113
105
  for (const flag of alt.flags) {
114
106
  features[`flag:${flag}`] = 1;
@@ -381,7 +373,7 @@ function detectDrift(card, traces, similarityThreshold = DEFAULT_SIMILARITY_THRE
381
373
  } else {
382
374
  lowSimilarityStreak = [];
383
375
  }
384
- if (lowSimilarityStreak.length >= sustainedThreshold) {
376
+ if (lowSimilarityStreak.length === sustainedThreshold) {
385
377
  const latest = lowSimilarityStreak[lowSimilarityStreak.length - 1];
386
378
  const direction = inferDriftDirection(
387
379
  lowSimilarityStreak,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mnemom/agent-alignment-protocol",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "Agent Alignment Protocol (AAP) - Verification and drift detection for AI agents",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
package/src/constants.ts CHANGED
@@ -41,4 +41,4 @@ export const MAX_TFIDF_FEATURES = 500;
41
41
 
42
42
  // Version
43
43
  // -------
44
- export const ALGORITHM_VERSION = "1.0.0";
44
+ export const ALGORITHM_VERSION = "1.1.0";
@@ -370,8 +370,8 @@ export function detectDrift(
370
370
  lowSimilarityStreak = [];
371
371
  }
372
372
 
373
- // Check if we've hit the threshold for alerting
374
- if (lowSimilarityStreak.length >= sustainedThreshold) {
373
+ // Check if we've hit the threshold for alerting (== not >= to fire once)
374
+ if (lowSimilarityStreak.length === sustainedThreshold) {
375
375
  const latest = lowSimilarityStreak[lowSimilarityStreak.length - 1];
376
376
 
377
377
  // Infer drift direction
@@ -98,18 +98,12 @@ export function extractTraceFeatures(trace: APTrace): FeatureVector {
98
98
  }
99
99
  }
100
100
 
101
- // Content features from reasoning
102
- const reasoningTokens = tokenize(trace.decision.selection_reasoning);
103
- for (const token of reasoningTokens) {
104
- features[`content:${token}`] = (features[`content:${token}`] ?? 0) + 0.5;
105
- }
101
+ // Note: Content features from reasoning/alternatives are deliberately excluded.
102
+ // Card features are purely structural, so content tokens dilute cosine
103
+ // similarity without adding alignment signal. See CALIBRATION.md Section 3.5.
106
104
 
107
- // Alternative features
105
+ // Flag features from alternatives (structural, not content)
108
106
  for (const alt of trace.decision.alternatives_considered) {
109
- const altTokens = tokenize(alt.description);
110
- for (const token of altTokens) {
111
- features[`content:${token}`] = (features[`content:${token}`] ?? 0) + 0.25;
112
- }
113
107
  if (alt.flags) {
114
108
  for (const flag of alt.flags) {
115
109
  features[`flag:${flag}`] = 1.0;