@mnemom/agent-integrity-protocol 0.2.1 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +68 -22
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +17 -2
- package/dist/index.d.ts +17 -2
- package/dist/index.js +63 -17
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.cts
CHANGED
|
@@ -200,6 +200,21 @@ interface IntegrityCheckpoint {
|
|
|
200
200
|
* Set after the corresponding APTrace is created by the Observer.
|
|
201
201
|
*/
|
|
202
202
|
linked_trace_id: string | null;
|
|
203
|
+
/**
|
|
204
|
+
* Whether this checkpoint was synthetically generated (not from analysis LLM).
|
|
205
|
+
* True when no thinking block was found, analysis failed, or evidence was below threshold.
|
|
206
|
+
*/
|
|
207
|
+
synthetic?: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* Reason the checkpoint was synthetically generated.
|
|
210
|
+
* Only present when `synthetic` is true.
|
|
211
|
+
*/
|
|
212
|
+
synthetic_reason?: "no_thinking_block" | "analysis_failed" | "below_evidence_threshold";
|
|
213
|
+
/**
|
|
214
|
+
* Cross-validation warnings about inconsistencies between verdict and concerns.
|
|
215
|
+
* Present when the analysis LLM returned a verdict that doesn't match concern severities.
|
|
216
|
+
*/
|
|
217
|
+
cross_validation_warnings?: string[];
|
|
203
218
|
}
|
|
204
219
|
|
|
205
220
|
/**
|
|
@@ -277,7 +292,7 @@ interface IntegritySignal {
|
|
|
277
292
|
* - "deception_pattern": Multiple checks flagging deceptive reasoning; pattern of misleading intent
|
|
278
293
|
* - "unknown": Drift detected but direction cannot be categorized; multiple concern categories without a dominant pattern
|
|
279
294
|
*/
|
|
280
|
-
type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "unknown";
|
|
295
|
+
type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "reasoning_degradation" | "intent_drift" | "unknown";
|
|
281
296
|
/**
|
|
282
297
|
* An integrity drift alert generated when consecutive non-clear verdicts
|
|
283
298
|
* exceed the sustained threshold.
|
|
@@ -1156,7 +1171,7 @@ interface CertificateVerificationResult {
|
|
|
1156
1171
|
declare function verifyCertificate(certificate: IntegrityCertificate, publicKey: Uint8Array, merkleRoot?: string): Promise<CertificateVerificationResult>;
|
|
1157
1172
|
|
|
1158
1173
|
/** Protocol version identifiers. */
|
|
1159
|
-
declare const AIP_VERSION = "0.
|
|
1174
|
+
declare const AIP_VERSION = "0.4.0";
|
|
1160
1175
|
declare const ALGORITHM_VERSION = "1.0.0";
|
|
1161
1176
|
/** Drift detection thresholds (Section 9.1). */
|
|
1162
1177
|
declare const DEFAULT_SUSTAINED_CHECKS_THRESHOLD = 3;
|
package/dist/index.d.ts
CHANGED
|
@@ -200,6 +200,21 @@ interface IntegrityCheckpoint {
|
|
|
200
200
|
* Set after the corresponding APTrace is created by the Observer.
|
|
201
201
|
*/
|
|
202
202
|
linked_trace_id: string | null;
|
|
203
|
+
/**
|
|
204
|
+
* Whether this checkpoint was synthetically generated (not from analysis LLM).
|
|
205
|
+
* True when no thinking block was found, analysis failed, or evidence was below threshold.
|
|
206
|
+
*/
|
|
207
|
+
synthetic?: boolean;
|
|
208
|
+
/**
|
|
209
|
+
* Reason the checkpoint was synthetically generated.
|
|
210
|
+
* Only present when `synthetic` is true.
|
|
211
|
+
*/
|
|
212
|
+
synthetic_reason?: "no_thinking_block" | "analysis_failed" | "below_evidence_threshold";
|
|
213
|
+
/**
|
|
214
|
+
* Cross-validation warnings about inconsistencies between verdict and concerns.
|
|
215
|
+
* Present when the analysis LLM returned a verdict that doesn't match concern severities.
|
|
216
|
+
*/
|
|
217
|
+
cross_validation_warnings?: string[];
|
|
203
218
|
}
|
|
204
219
|
|
|
205
220
|
/**
|
|
@@ -277,7 +292,7 @@ interface IntegritySignal {
|
|
|
277
292
|
* - "deception_pattern": Multiple checks flagging deceptive reasoning; pattern of misleading intent
|
|
278
293
|
* - "unknown": Drift detected but direction cannot be categorized; multiple concern categories without a dominant pattern
|
|
279
294
|
*/
|
|
280
|
-
type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "unknown";
|
|
295
|
+
type DriftDirection = "injection_pattern" | "value_erosion" | "autonomy_creep" | "deception_pattern" | "reasoning_degradation" | "intent_drift" | "unknown";
|
|
281
296
|
/**
|
|
282
297
|
* An integrity drift alert generated when consecutive non-clear verdicts
|
|
283
298
|
* exceed the sustained threshold.
|
|
@@ -1156,7 +1171,7 @@ interface CertificateVerificationResult {
|
|
|
1156
1171
|
declare function verifyCertificate(certificate: IntegrityCertificate, publicKey: Uint8Array, merkleRoot?: string): Promise<CertificateVerificationResult>;
|
|
1157
1172
|
|
|
1158
1173
|
/** Protocol version identifiers. */
|
|
1159
|
-
declare const AIP_VERSION = "0.
|
|
1174
|
+
declare const AIP_VERSION = "0.4.0";
|
|
1160
1175
|
declare const ALGORITHM_VERSION = "1.0.0";
|
|
1161
1176
|
/** Drift detection thresholds (Section 9.1). */
|
|
1162
1177
|
declare const DEFAULT_SUSTAINED_CHECKS_THRESHOLD = 3;
|
package/dist/index.js
CHANGED
|
@@ -20,7 +20,7 @@ function createWindowState(sessionId) {
|
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
// src/constants.ts
|
|
23
|
-
var AIP_VERSION = "0.
|
|
23
|
+
var AIP_VERSION = "0.4.0";
|
|
24
24
|
var ALGORITHM_VERSION = "1.0.0";
|
|
25
25
|
var DEFAULT_SUSTAINED_CHECKS_THRESHOLD = 3;
|
|
26
26
|
var DRIFT_SEVERITY_LOW_THRESHOLD = 0.7;
|
|
@@ -1161,11 +1161,27 @@ function validateConscienceContext(value) {
|
|
|
1161
1161
|
consultation_depth: ctx.consultation_depth
|
|
1162
1162
|
};
|
|
1163
1163
|
}
|
|
1164
|
+
function extractFirstJsonObject(text) {
|
|
1165
|
+
let depth = 0;
|
|
1166
|
+
let start = -1;
|
|
1167
|
+
for (let i = 0; i < text.length; i++) {
|
|
1168
|
+
if (text[i] === "{") {
|
|
1169
|
+
if (depth === 0) start = i;
|
|
1170
|
+
depth++;
|
|
1171
|
+
} else if (text[i] === "}") {
|
|
1172
|
+
depth--;
|
|
1173
|
+
if (depth === 0 && start !== -1) {
|
|
1174
|
+
return text.slice(start, i + 1);
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
return null;
|
|
1179
|
+
}
|
|
1164
1180
|
function checkIntegrity(input) {
|
|
1165
1181
|
let jsonText = input.analysisResponse;
|
|
1166
|
-
const
|
|
1167
|
-
if (
|
|
1168
|
-
jsonText =
|
|
1182
|
+
const extracted = extractFirstJsonObject(jsonText);
|
|
1183
|
+
if (extracted) {
|
|
1184
|
+
jsonText = extracted;
|
|
1169
1185
|
}
|
|
1170
1186
|
jsonText = jsonText.replace(/,\s*([}\]])/g, "$1");
|
|
1171
1187
|
let parsed;
|
|
@@ -1187,6 +1203,22 @@ function checkIntegrity(input) {
|
|
|
1187
1203
|
const conscienceContext = validateConscienceContext(
|
|
1188
1204
|
parsed.conscience_context
|
|
1189
1205
|
);
|
|
1206
|
+
const crossValidationWarnings = [];
|
|
1207
|
+
if (verdict === "clear") {
|
|
1208
|
+
const criticalOrHigh = concerns.filter(
|
|
1209
|
+
(c) => c.severity === "critical" || c.severity === "high"
|
|
1210
|
+
);
|
|
1211
|
+
if (criticalOrHigh.length > 0) {
|
|
1212
|
+
const maxSeverity = criticalOrHigh.some((c) => c.severity === "critical") ? "critical" : "high";
|
|
1213
|
+
crossValidationWarnings.push(
|
|
1214
|
+
`Verdict is "clear" but ${criticalOrHigh.length} concern(s) have severity "${maxSeverity}". This may indicate an inconsistent analysis result.`
|
|
1215
|
+
);
|
|
1216
|
+
}
|
|
1217
|
+
} else if (verdict === "boundary_violation" && concerns.length === 0) {
|
|
1218
|
+
crossValidationWarnings.push(
|
|
1219
|
+
`Verdict is "boundary_violation" but concerns array is empty. Expected at least one concern for a boundary violation.`
|
|
1220
|
+
);
|
|
1221
|
+
}
|
|
1190
1222
|
const checkpointId = `${CHECKPOINT_ID_PREFIX}${randomUUID()}`;
|
|
1191
1223
|
const analysisMetadata = {
|
|
1192
1224
|
analysis_model: input.analysisModel,
|
|
@@ -1214,7 +1246,8 @@ function checkIntegrity(input) {
|
|
|
1214
1246
|
window_size: input.windowPosition.window_size
|
|
1215
1247
|
},
|
|
1216
1248
|
analysis_metadata: analysisMetadata,
|
|
1217
|
-
linked_trace_id: input.linkedTraceId ?? null
|
|
1249
|
+
linked_trace_id: input.linkedTraceId ?? null,
|
|
1250
|
+
...crossValidationWarnings.length > 0 ? { cross_validation_warnings: crossValidationWarnings } : {}
|
|
1218
1251
|
};
|
|
1219
1252
|
}
|
|
1220
1253
|
function mapVerdictToAction(verdict, concerns) {
|
|
@@ -1274,7 +1307,9 @@ function detectIntegrityDrift(state, checkpoint, windowCheckpoints, threshold) {
|
|
|
1274
1307
|
for (const concern of checkpoint.concerns) {
|
|
1275
1308
|
newState.streakCategories.push(concern.category);
|
|
1276
1309
|
}
|
|
1277
|
-
|
|
1310
|
+
const REALERT_INTERVAL = 5;
|
|
1311
|
+
const shouldAlert = newState.sustainedNonclear >= effectiveThreshold && (!newState.alertFired || (newState.sustainedNonclear - effectiveThreshold) % REALERT_INTERVAL === 0);
|
|
1312
|
+
if (shouldAlert) {
|
|
1278
1313
|
newState.alertFired = true;
|
|
1279
1314
|
const clearCount = windowCheckpoints.filter(
|
|
1280
1315
|
(cp) => cp.verdict === "clear"
|
|
@@ -1326,7 +1361,9 @@ function inferDriftDirection(categories) {
|
|
|
1326
1361
|
prompt_injection: "injection_pattern",
|
|
1327
1362
|
value_misalignment: "value_erosion",
|
|
1328
1363
|
autonomy_violation: "autonomy_creep",
|
|
1329
|
-
deceptive_reasoning: "deception_pattern"
|
|
1364
|
+
deceptive_reasoning: "deception_pattern",
|
|
1365
|
+
reasoning_corruption: "reasoning_degradation",
|
|
1366
|
+
undeclared_intent: "intent_drift"
|
|
1330
1367
|
};
|
|
1331
1368
|
return CATEGORY_TO_DIRECTION[maxCategory] ?? "unknown";
|
|
1332
1369
|
}
|
|
@@ -1359,7 +1396,7 @@ function createClient(config) {
|
|
|
1359
1396
|
const adapter = provider ? registry.get(provider) : registry.detectFromUrl(config.analysis_llm.base_url);
|
|
1360
1397
|
const thinking = adapter.extractThinking(responseBody) ?? adapter.extractThinkingFromStream(responseBody);
|
|
1361
1398
|
if (!thinking) {
|
|
1362
|
-
return buildSyntheticSignal(config, window, "clear");
|
|
1399
|
+
return buildSyntheticSignal(config, window, "clear", void 0, void 0, "no_thinking_block");
|
|
1363
1400
|
}
|
|
1364
1401
|
const thinkingTokens = Math.ceil(thinking.content.length / 4);
|
|
1365
1402
|
const minTokens = config.min_evidence_tokens ?? DEFAULT_MIN_EVIDENCE_TOKENS;
|
|
@@ -1369,7 +1406,8 @@ function createClient(config) {
|
|
|
1369
1406
|
window,
|
|
1370
1407
|
"clear",
|
|
1371
1408
|
`Thinking block below minimum evidence threshold (${thinkingTokens} tokens < ${minTokens})`,
|
|
1372
|
-
thinkingTokens
|
|
1409
|
+
thinkingTokens,
|
|
1410
|
+
"below_evidence_threshold"
|
|
1373
1411
|
);
|
|
1374
1412
|
}
|
|
1375
1413
|
const prompt = buildConsciencePrompt({
|
|
@@ -1395,9 +1433,9 @@ function createClient(config) {
|
|
|
1395
1433
|
);
|
|
1396
1434
|
}
|
|
1397
1435
|
if (failureMode === "fail_closed") {
|
|
1398
|
-
return buildSyntheticSignal(config, window, "boundary_violation");
|
|
1436
|
+
return buildSyntheticSignal(config, window, "boundary_violation", void 0, void 0, "analysis_failed");
|
|
1399
1437
|
}
|
|
1400
|
-
return buildSyntheticSignal(config, window, "clear");
|
|
1438
|
+
return buildSyntheticSignal(config, window, "clear", void 0, void 0, "analysis_failed");
|
|
1401
1439
|
}
|
|
1402
1440
|
const analysisDurationMs = Date.now() - startTime;
|
|
1403
1441
|
const thinkingHash = hashThinkingBlock(thinking.content);
|
|
@@ -1458,7 +1496,10 @@ function createClient(config) {
|
|
|
1458
1496
|
function generateSessionId(cardId) {
|
|
1459
1497
|
const hash = cardId.slice(0, 8);
|
|
1460
1498
|
const hourBucket = Math.floor(Date.now() / 36e5);
|
|
1461
|
-
|
|
1499
|
+
const randomBytes = new Uint8Array(2);
|
|
1500
|
+
crypto.getRandomValues(randomBytes);
|
|
1501
|
+
const randomHex = Array.from(randomBytes).map((b) => b.toString(16).padStart(2, "0")).join("");
|
|
1502
|
+
return `sess-${hash}-${hourBucket}-${randomHex}`;
|
|
1462
1503
|
}
|
|
1463
1504
|
async function callAnalysisLLM(llmConfig, system, user, timeoutMs) {
|
|
1464
1505
|
const controller = new AbortController();
|
|
@@ -1499,7 +1540,7 @@ async function callAnalysisLLM(llmConfig, system, user, timeoutMs) {
|
|
|
1499
1540
|
clearTimeout(timeoutId);
|
|
1500
1541
|
}
|
|
1501
1542
|
}
|
|
1502
|
-
function buildSyntheticSignal(config, window, verdict, customReasoning, thinkingTokensOriginal) {
|
|
1543
|
+
function buildSyntheticSignal(config, window, verdict, customReasoning, thinkingTokensOriginal, syntheticReason) {
|
|
1503
1544
|
const summary = window.getSummary();
|
|
1504
1545
|
return {
|
|
1505
1546
|
checkpoint: {
|
|
@@ -1533,7 +1574,9 @@ function buildSyntheticSignal(config, window, verdict, customReasoning, thinking
|
|
|
1533
1574
|
truncated: false,
|
|
1534
1575
|
extraction_confidence: 0
|
|
1535
1576
|
},
|
|
1536
|
-
linked_trace_id: null
|
|
1577
|
+
linked_trace_id: null,
|
|
1578
|
+
synthetic: true,
|
|
1579
|
+
synthetic_reason: syntheticReason ?? "no_thinking_block"
|
|
1537
1580
|
},
|
|
1538
1581
|
proceed: verdict === "clear",
|
|
1539
1582
|
recommended_action: verdict === "clear" ? "continue" : "deny_and_escalate",
|
|
@@ -1550,11 +1593,14 @@ function verifySignature(secret, payload, signature) {
|
|
|
1550
1593
|
return constantTimeEqual(expected, signature);
|
|
1551
1594
|
}
|
|
1552
1595
|
function constantTimeEqual(a, b) {
|
|
1553
|
-
|
|
1596
|
+
const maxLen = Math.max(a.length, b.length);
|
|
1597
|
+
const aPadded = a.padEnd(maxLen, "\0");
|
|
1598
|
+
const bPadded = b.padEnd(maxLen, "\0");
|
|
1554
1599
|
let result = 0;
|
|
1555
|
-
for (let i = 0; i <
|
|
1556
|
-
result |=
|
|
1600
|
+
for (let i = 0; i < maxLen; i++) {
|
|
1601
|
+
result |= aPadded.charCodeAt(i) ^ bPadded.charCodeAt(i);
|
|
1557
1602
|
}
|
|
1603
|
+
result |= a.length ^ b.length;
|
|
1558
1604
|
return result === 0;
|
|
1559
1605
|
}
|
|
1560
1606
|
function base64ToUint8(b64) {
|