@mnemom/agent-integrity-protocol 0.2.1 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -200,6 +200,21 @@ interface IntegrityCheckpoint {
200
200
  * Set after the corresponding APTrace is created by the Observer.
201
201
  */
202
202
  linked_trace_id: string | null;
203
+ /**
204
+ * Whether this checkpoint was synthetically generated (not from analysis LLM).
205
+ * True when no thinking block was found, analysis failed, or evidence was below threshold.
206
+ */
207
+ synthetic?: boolean;
208
+ /**
209
+ * Reason the checkpoint was synthetically generated.
210
+ * Only present when `synthetic` is true.
211
+ */
212
+ synthetic_reason?: "no_thinking_block" | "analysis_failed" | "below_evidence_threshold";
213
+ /**
214
+ * Cross-validation warnings about inconsistencies between verdict and concerns.
215
+ * Present when the analysis LLM returned a verdict that doesn't match concern severities.
216
+ */
217
+ cross_validation_warnings?: string[];
203
218
  }
204
219
 
205
220
  /**
@@ -277,7 +292,7 @@ interface IntegritySignal {
277
292
  * - "deception_pattern": Multiple checks flagging deceptive reasoning; pattern of misleading intent
278
293
  * - "unknown": Drift detected but direction cannot be categorized; multiple concern categories without a dominant pattern
279
294
  */
280
- type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "unknown";
295
+ type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "reasoning_degradation" | "intent_drift" | "unknown";
281
296
  /**
282
297
  * An integrity drift alert generated when consecutive non-clear verdicts
283
298
  * exceed the sustained threshold.
@@ -1156,7 +1171,7 @@ interface CertificateVerificationResult {
1156
1171
  declare function verifyCertificate(certificate: IntegrityCertificate, publicKey: Uint8Array, merkleRoot?: string): Promise<CertificateVerificationResult>;
1157
1172
 
1158
1173
  /** Protocol version identifiers. */
1159
- declare const AIP_VERSION = "0.1.0";
1174
+ declare const AIP_VERSION = "0.4.0";
1160
1175
  declare const ALGORITHM_VERSION = "1.0.0";
1161
1176
  /** Drift detection thresholds (Section 9.1). */
1162
1177
  declare const DEFAULT_SUSTAINED_CHECKS_THRESHOLD = 3;
package/dist/index.d.ts CHANGED
@@ -200,6 +200,21 @@ interface IntegrityCheckpoint {
200
200
  * Set after the corresponding APTrace is created by the Observer.
201
201
  */
202
202
  linked_trace_id: string | null;
203
+ /**
204
+ * Whether this checkpoint was synthetically generated (not from analysis LLM).
205
+ * True when no thinking block was found, analysis failed, or evidence was below threshold.
206
+ */
207
+ synthetic?: boolean;
208
+ /**
209
+ * Reason the checkpoint was synthetically generated.
210
+ * Only present when `synthetic` is true.
211
+ */
212
+ synthetic_reason?: "no_thinking_block" | "analysis_failed" | "below_evidence_threshold";
213
+ /**
214
+ * Cross-validation warnings about inconsistencies between verdict and concerns.
215
+ * Present when the analysis LLM returned a verdict that doesn't match concern severities.
216
+ */
217
+ cross_validation_warnings?: string[];
203
218
  }
204
219
 
205
220
  /**
@@ -277,7 +292,7 @@ interface IntegritySignal {
277
292
  * - "deception_pattern": Multiple checks flagging deceptive reasoning; pattern of misleading intent
278
293
  * - "unknown": Drift detected but direction cannot be categorized; multiple concern categories without a dominant pattern
279
294
  */
280
- type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "unknown";
295
+ type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "reasoning_degradation" | "intent_drift" | "unknown";
281
296
  /**
282
297
  * An integrity drift alert generated when consecutive non-clear verdicts
283
298
  * exceed the sustained threshold.
@@ -1156,7 +1171,7 @@ interface CertificateVerificationResult {
1156
1171
  declare function verifyCertificate(certificate: IntegrityCertificate, publicKey: Uint8Array, merkleRoot?: string): Promise<CertificateVerificationResult>;
1157
1172
 
1158
1173
  /** Protocol version identifiers. */
1159
- declare const AIP_VERSION = "0.1.0";
1174
+ declare const AIP_VERSION = "0.4.0";
1160
1175
  declare const ALGORITHM_VERSION = "1.0.0";
1161
1176
  /** Drift detection thresholds (Section 9.1). */
1162
1177
  declare const DEFAULT_SUSTAINED_CHECKS_THRESHOLD = 3;
package/dist/index.js CHANGED
@@ -20,7 +20,7 @@ function createWindowState(sessionId) {
20
20
  }
21
21
 
22
22
  // src/constants.ts
23
- var AIP_VERSION = "0.1.0";
23
+ var AIP_VERSION = "0.4.0";
24
24
  var ALGORITHM_VERSION = "1.0.0";
25
25
  var DEFAULT_SUSTAINED_CHECKS_THRESHOLD = 3;
26
26
  var DRIFT_SEVERITY_LOW_THRESHOLD = 0.7;
@@ -1161,11 +1161,27 @@ function validateConscienceContext(value) {
1161
1161
  consultation_depth: ctx.consultation_depth
1162
1162
  };
1163
1163
  }
1164
+ function extractFirstJsonObject(text) {
1165
+ let depth = 0;
1166
+ let start = -1;
1167
+ for (let i = 0; i < text.length; i++) {
1168
+ if (text[i] === "{") {
1169
+ if (depth === 0) start = i;
1170
+ depth++;
1171
+ } else if (text[i] === "}") {
1172
+ depth--;
1173
+ if (depth === 0 && start !== -1) {
1174
+ return text.slice(start, i + 1);
1175
+ }
1176
+ }
1177
+ }
1178
+ return null;
1179
+ }
1164
1180
  function checkIntegrity(input) {
1165
1181
  let jsonText = input.analysisResponse;
1166
- const jsonMatch = jsonText.match(/\{[\s\S]*\}/);
1167
- if (jsonMatch) {
1168
- jsonText = jsonMatch[0];
1182
+ const extracted = extractFirstJsonObject(jsonText);
1183
+ if (extracted) {
1184
+ jsonText = extracted;
1169
1185
  }
1170
1186
  jsonText = jsonText.replace(/,\s*([}\]])/g, "$1");
1171
1187
  let parsed;
@@ -1187,6 +1203,22 @@ function checkIntegrity(input) {
1187
1203
  const conscienceContext = validateConscienceContext(
1188
1204
  parsed.conscience_context
1189
1205
  );
1206
+ const crossValidationWarnings = [];
1207
+ if (verdict === "clear") {
1208
+ const criticalOrHigh = concerns.filter(
1209
+ (c) => c.severity === "critical" || c.severity === "high"
1210
+ );
1211
+ if (criticalOrHigh.length > 0) {
1212
+ const maxSeverity = criticalOrHigh.some((c) => c.severity === "critical") ? "critical" : "high";
1213
+ crossValidationWarnings.push(
1214
+ `Verdict is "clear" but ${criticalOrHigh.length} concern(s) have severity "${maxSeverity}". This may indicate an inconsistent analysis result.`
1215
+ );
1216
+ }
1217
+ } else if (verdict === "boundary_violation" && concerns.length === 0) {
1218
+ crossValidationWarnings.push(
1219
+ `Verdict is "boundary_violation" but concerns array is empty. Expected at least one concern for a boundary violation.`
1220
+ );
1221
+ }
1190
1222
  const checkpointId = `${CHECKPOINT_ID_PREFIX}${randomUUID()}`;
1191
1223
  const analysisMetadata = {
1192
1224
  analysis_model: input.analysisModel,
@@ -1214,7 +1246,8 @@ function checkIntegrity(input) {
1214
1246
  window_size: input.windowPosition.window_size
1215
1247
  },
1216
1248
  analysis_metadata: analysisMetadata,
1217
- linked_trace_id: input.linkedTraceId ?? null
1249
+ linked_trace_id: input.linkedTraceId ?? null,
1250
+ ...crossValidationWarnings.length > 0 ? { cross_validation_warnings: crossValidationWarnings } : {}
1218
1251
  };
1219
1252
  }
1220
1253
  function mapVerdictToAction(verdict, concerns) {
@@ -1274,7 +1307,9 @@ function detectIntegrityDrift(state, checkpoint, windowCheckpoints, threshold) {
1274
1307
  for (const concern of checkpoint.concerns) {
1275
1308
  newState.streakCategories.push(concern.category);
1276
1309
  }
1277
- if (newState.sustainedNonclear >= effectiveThreshold && !newState.alertFired) {
1310
+ const REALERT_INTERVAL = 5;
1311
+ const shouldAlert = newState.sustainedNonclear >= effectiveThreshold && (!newState.alertFired || (newState.sustainedNonclear - effectiveThreshold) % REALERT_INTERVAL === 0);
1312
+ if (shouldAlert) {
1278
1313
  newState.alertFired = true;
1279
1314
  const clearCount = windowCheckpoints.filter(
1280
1315
  (cp) => cp.verdict === "clear"
@@ -1326,7 +1361,9 @@ function inferDriftDirection(categories) {
1326
1361
  prompt_injection: "injection_pattern",
1327
1362
  value_misalignment: "value_erosion",
1328
1363
  autonomy_violation: "autonomy_creep",
1329
- deceptive_reasoning: "deception_pattern"
1364
+ deceptive_reasoning: "deception_pattern",
1365
+ reasoning_corruption: "reasoning_degradation",
1366
+ undeclared_intent: "intent_drift"
1330
1367
  };
1331
1368
  return CATEGORY_TO_DIRECTION[maxCategory] ?? "unknown";
1332
1369
  }
@@ -1359,7 +1396,7 @@ function createClient(config) {
1359
1396
  const adapter = provider ? registry.get(provider) : registry.detectFromUrl(config.analysis_llm.base_url);
1360
1397
  const thinking = adapter.extractThinking(responseBody) ?? adapter.extractThinkingFromStream(responseBody);
1361
1398
  if (!thinking) {
1362
- return buildSyntheticSignal(config, window, "clear");
1399
+ return buildSyntheticSignal(config, window, "clear", void 0, void 0, "no_thinking_block");
1363
1400
  }
1364
1401
  const thinkingTokens = Math.ceil(thinking.content.length / 4);
1365
1402
  const minTokens = config.min_evidence_tokens ?? DEFAULT_MIN_EVIDENCE_TOKENS;
@@ -1369,7 +1406,8 @@ function createClient(config) {
1369
1406
  window,
1370
1407
  "clear",
1371
1408
  `Thinking block below minimum evidence threshold (${thinkingTokens} tokens < ${minTokens})`,
1372
- thinkingTokens
1409
+ thinkingTokens,
1410
+ "below_evidence_threshold"
1373
1411
  );
1374
1412
  }
1375
1413
  const prompt = buildConsciencePrompt({
@@ -1395,9 +1433,9 @@ function createClient(config) {
1395
1433
  );
1396
1434
  }
1397
1435
  if (failureMode === "fail_closed") {
1398
- return buildSyntheticSignal(config, window, "boundary_violation");
1436
+ return buildSyntheticSignal(config, window, "boundary_violation", void 0, void 0, "analysis_failed");
1399
1437
  }
1400
- return buildSyntheticSignal(config, window, "clear");
1438
+ return buildSyntheticSignal(config, window, "clear", void 0, void 0, "analysis_failed");
1401
1439
  }
1402
1440
  const analysisDurationMs = Date.now() - startTime;
1403
1441
  const thinkingHash = hashThinkingBlock(thinking.content);
@@ -1458,7 +1496,10 @@ function createClient(config) {
1458
1496
  function generateSessionId(cardId) {
1459
1497
  const hash = cardId.slice(0, 8);
1460
1498
  const hourBucket = Math.floor(Date.now() / 36e5);
1461
- return `sess-${hash}-${hourBucket}`;
1499
+ const randomBytes = new Uint8Array(2);
1500
+ crypto.getRandomValues(randomBytes);
1501
+ const randomHex = Array.from(randomBytes).map((b) => b.toString(16).padStart(2, "0")).join("");
1502
+ return `sess-${hash}-${hourBucket}-${randomHex}`;
1462
1503
  }
1463
1504
  async function callAnalysisLLM(llmConfig, system, user, timeoutMs) {
1464
1505
  const controller = new AbortController();
@@ -1499,7 +1540,7 @@ async function callAnalysisLLM(llmConfig, system, user, timeoutMs) {
1499
1540
  clearTimeout(timeoutId);
1500
1541
  }
1501
1542
  }
1502
- function buildSyntheticSignal(config, window, verdict, customReasoning, thinkingTokensOriginal) {
1543
+ function buildSyntheticSignal(config, window, verdict, customReasoning, thinkingTokensOriginal, syntheticReason) {
1503
1544
  const summary = window.getSummary();
1504
1545
  return {
1505
1546
  checkpoint: {
@@ -1533,7 +1574,9 @@ function buildSyntheticSignal(config, window, verdict, customReasoning, thinking
1533
1574
  truncated: false,
1534
1575
  extraction_confidence: 0
1535
1576
  },
1536
- linked_trace_id: null
1577
+ linked_trace_id: null,
1578
+ synthetic: true,
1579
+ synthetic_reason: syntheticReason ?? "no_thinking_block"
1537
1580
  },
1538
1581
  proceed: verdict === "clear",
1539
1582
  recommended_action: verdict === "clear" ? "continue" : "deny_and_escalate",
@@ -1550,11 +1593,14 @@ function verifySignature(secret, payload, signature) {
1550
1593
  return constantTimeEqual(expected, signature);
1551
1594
  }
1552
1595
  function constantTimeEqual(a, b) {
1553
- if (a.length !== b.length) return false;
1596
+ const maxLen = Math.max(a.length, b.length);
1597
+ const aPadded = a.padEnd(maxLen, "\0");
1598
+ const bPadded = b.padEnd(maxLen, "\0");
1554
1599
  let result = 0;
1555
- for (let i = 0; i < a.length; i++) {
1556
- result |= a.charCodeAt(i) ^ b.charCodeAt(i);
1600
+ for (let i = 0; i < maxLen; i++) {
1601
+ result |= aPadded.charCodeAt(i) ^ bPadded.charCodeAt(i);
1557
1602
  }
1603
+ result |= a.length ^ b.length;
1558
1604
  return result === 0;
1559
1605
  }
1560
1606
  function base64ToUint8(b64) {