npm - agentshield-sdk - Versions diffs - 11.0.0 → 13.0.0 - Mend

agentshield-sdk 11.0.0 → 13.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/CHANGELOG.md +88 -79
package/package.json +2 -2
package/src/agent-intent.js +359 -672
package/src/cross-turn.js +217 -564
package/src/detector-core.js +106 -0
package/src/ensemble.js +300 -409
package/src/fleet-defense.js +483 -0
package/src/hitl-guard.js +487 -0
package/src/incident-response.js +265 -0
package/src/main.js +121 -33
package/src/mcp-guard.js +4 -0
package/src/memory-guard.js +637 -0
package/src/micro-model.js +15 -1
package/src/ml-detector.js +110 -266
package/src/normalizer.js +296 -604
package/src/persistent-learning.js +104 -620
package/src/semantic-guard.js +452 -0
package/src/semantic-isolation.js +1 -0
package/src/smart-config.js +557 -705
package/src/sota-benchmark.js +268 -10
package/src/trap-defense.js +468 -0
package/types/index.d.ts +251 -580

package/src/cross-turn.js CHANGED Viewed

@@ -1,649 +1,302 @@
 'use strict';
 /**
- * Agent Shield — Cross-Turn Injection Tracking & Adaptive Thresholds (v8)
+ * Agent Shield — Multi-Turn Attack Detection (v12)
  *
- * Detects injection attacks spread across multiple conversation turns and
- * auto-calibrates detection thresholds based on observed scan results.
+ * Detects attacks that unfold across multiple conversation turns:
+ * gradual trust erosion, progressive boundary testing, multi-turn
+ * grooming, topic drift toward sensitive areas, and authority
+ * accumulation over time.
  *
- * All computation is pure JavaScript — no external dependencies.
- * No data ever leaves the user's environment.
+ * Ref: Nature Communications — LRMs achieve 97% jailbreak success
+ * via planned multi-turn attacks.
+ *
+ * All processing runs locally — no data ever leaves your environment.
+ *
+ * @module cross-turn
  */
 const { scanText } = require('./detector-core');
 // =========================================================================
-// CROSS-TURN TRACKER
+// CONSTANTS
+// =========================================================================
+/** Escalation signals — phrases that indicate progressive boundary pushing. */
+const ESCALATION_SIGNALS = [
+  /(?:now\s+that\s+we've\s+established|building\s+on\s+(?:that|what\s+you\s+said)|since\s+you\s+(?:agreed|confirmed|said))/i,
+  /(?:great,?\s+now\s+(?:let's|can\s+you)|okay\s+(?:good|perfect),?\s+(?:now|next))/i,
+  /(?:one\s+more\s+(?:thing|question|step)|just\s+one\s+(?:last|final)\s+(?:thing|question))/i,
+  /(?:can\s+you\s+go\s+(?:a\s+little\s+)?(?:further|deeper)|take\s+(?:it|this)\s+(?:a\s+step\s+)?further)/i,
+  /(?:you\s+(?:just|already)\s+(?:showed|told|gave)\s+me|you\s+(?:were|seemed)\s+(?:willing|happy)\s+to)/i,
+];
+/** Trust erosion patterns — attempts to lower the model's guard over turns. */
+const TRUST_EROSION_PATTERNS = [
+  /(?:we're\s+(?:friends|on\s+the\s+same\s+team|collaborating)|I\s+thought\s+we\s+had\s+(?:a\s+good|an?\s+understanding))/i,
+  /(?:you\s+(?:can|should)\s+trust\s+me|I\s+(?:promise|swear)\s+(?:this|I)\s+(?:is|am)\s+(?:safe|legitimate))/i,
+  /(?:between\s+(?:you\s+and\s+me|us)|this\s+(?:is|stays)\s+(?:just\s+)?between\s+us)/i,
+];
+/** Topic categories with sensitivity levels. */
+const TOPIC_SENSITIVITY = {
+  safe: /(?:weather|food|sports|music|movies|travel|recipes|pets|hobbies|games)/i,
+  moderate: /(?:coding|programming|math|science|history|business|education|health)/i,
+  sensitive: /(?:security|credentials?|passwords?|tokens?|secrets?|keys?|prompt|instructions?|admin|root|config)/i,
+  dangerous: /(?:hack(?:ing)?|exploit|attack|bypass|override|jailbreak|injection|exfiltrat)/i,
+};
+// =========================================================================
+// ConversationTracker
 // =========================================================================
 /**
- * Accumulates conversation text across turns and periodically scans the
- * full accumulated context for injections that only become visible when
- * messages are combined (e.g. "Ig" + "nore all" + "previous instructions").
+ * Tracks a multi-turn conversation and detects progressive attacks.
  */
-class CrossTurnTracker {
+class ConversationTracker {
   /**
-   * @param {object} [config]
-   * @param {number} [config.windowSize=20] - Max messages to keep in window
-   * @param {number} [config.scanInterval=3] - Scan every N messages
-   * @param {boolean} [config.accumulateAll=true] - Keep all text or just user messages
-   * @param {string} [config.sensitivity='high'] - Scan sensitivity
-   * @param {function} [config.onDetection] - Callback when cross-turn threat found
+   * @param {object} [options]
+   * @param {number} [options.maxTurns=100] - Max turns to track.
+   * @param {number} [options.escalationThreshold=3] - Escalation signals before alert.
+   * @param {number} [options.topicDriftThreshold=0.6] - Topic drift score to alert (0-1).
    */
-  constructor(config = {}) {
-    this.windowSize = config.windowSize !== undefined ? config.windowSize : 20;
-    this.scanInterval = config.scanInterval !== undefined ? config.scanInterval : 3;
-    this.accumulateAll = config.accumulateAll !== undefined ? config.accumulateAll : true;
-    this.sensitivity = config.sensitivity || 'high';
-    this.onDetection = config.onDetection || null;
-    this.messages = [];
-    this._stats = {
-      totalMessages: 0,
-      scansTriggered: 0,
-      crossTurnDetections: 0,
-      individualDetections: 0
-    };
+  constructor(options = {}) {
+    this.maxTurns = options.maxTurns || 100;
+    this.escalationThreshold = options.escalationThreshold || 3;
+    this.topicDriftThreshold = options.topicDriftThreshold || 0.6;
+    /** @type {Array<{ role: string, content: string, timestamp: number, threats: any[], topic: string, escalationSignals: number, trustErosion: boolean }>} */
+    this.turns = [];
+    this.alerts = [];
+    this.stats = { turnsProcessed: 0, alertsGenerated: 0, escalationSignals: 0, topicDrifts: 0 };
   }
   /**
-   * Add a message to the conversation.
-   * @param {string} text - Message text
-   * @param {string} [role='user'] - 'user' or 'assistant'
-   * @returns {object} {
-   *   tracked: boolean,
-   *   messageCount: number,
-   *   scanTriggered: boolean,
-   *   threats: Array (empty if no scan or no threats),
-   *   crossTurnDetection: boolean (true if threat only visible in combined text)
-   * }
+   * Add a conversation turn and analyze for multi-turn attack patterns.
+   *
+   * @param {string} role - 'user' or 'assistant'.
+   * @param {string} content - Message content.
+   * @returns {{ safe: boolean, alerts: Array<object>, turnAnalysis: object }}
    */
-  addMessage(text, role = 'user') {
-    if (!text || typeof text !== 'string') {
-      return {
-        tracked: false,
-        messageCount: this.messages.length,
-        scanTriggered: false,
-        threats: [],
-        crossTurnDetection: false
-      };
-    }
-    const message = {
-      text,
+  addTurn(role, content) {
+    const safeContent = (content && typeof content === 'string') ? content : '';
+    const threats = scanText(safeContent).threats || [];
+    const topic = this._classifyTopic(safeContent);
+    const escalationSignals = this._countEscalationSignals(safeContent);
+    const trustErosion = this._detectTrustErosion(safeContent);
+    const turn = {
       role,
+      content: safeContent.substring(0, 1000),
       timestamp: Date.now(),
-      index: this._stats.totalMessages
+      threats,
+      topic,
+      escalationSignals,
+      trustErosion,
+      turnIndex: this.turns.length
     };
-    this.messages.push(message);
-    this._stats.totalMessages++;
-    // Enforce sliding window
-    if (this.messages.length > this.windowSize) {
-      this.messages.shift();
-    }
-    // Determine if we should scan
-    const scanTriggered = this._stats.totalMessages % this.scanInterval === 0;
+    this.turns.push(turn);
+    this.stats.turnsProcessed++;
+    this.stats.escalationSignals += escalationSignals;
-    if (!scanTriggered) {
-      return {
-        tracked: true,
-        messageCount: this.messages.length,
-        scanTriggered: false,
-        threats: [],
-        crossTurnDetection: false
-      };
+    // Trim to max turns
+    if (this.turns.length > this.maxTurns) {
+      this.turns = this.turns.slice(-this.maxTurns);
     }
-    // Perform cross-turn scan
-    this._stats.scansTriggered++;
-    const scanResult = this._performCrossTurnScan();
-    return {
-      tracked: true,
-      messageCount: this.messages.length,
-      scanTriggered: true,
-      threats: scanResult.threats,
-      crossTurnDetection: scanResult.crossTurnDetection
-    };
-  }
-  /**
-   * Force a scan of accumulated text right now.
-   * @returns {object} { threats: Array, combinedLength: number, messageCount: number }
-   */
-  scanNow() {
-    this._stats.scansTriggered++;
-    const combined = this.getAccumulatedText();
-    const result = scanText(combined, {
-      source: 'cross_turn_scan',
-      sensitivity: this.sensitivity
-    });
-    return {
-      threats: result.threats,
-      combinedLength: combined.length,
-      messageCount: this.messages.length
-    };
-  }
-  /**
-   * Get the current accumulated text.
-   * @returns {string}
-   */
-  getAccumulatedText() {
-    const eligible = this.accumulateAll
-      ? this.messages
-      : this.messages.filter(m => m.role === 'user');
-    return eligible.map(m => m.text).join(' ');
-  }
-  /**
-   * Get the individual message that was most suspicious.
-   * @returns {object|null} { text, role, confidence, threats } or null
-   */
-  getMostSuspicious() {
-    if (this.messages.length === 0) return null;
-    let mostSuspicious = null;
-    let highestThreatCount = -1;
-    for (const msg of this.messages) {
-      const result = scanText(msg.text, {
-        source: 'individual_scan',
-        sensitivity: this.sensitivity
-      });
-      if (result.threats.length > highestThreatCount) {
-        highestThreatCount = result.threats.length;
-        mostSuspicious = {
-          text: msg.text,
-          role: msg.role,
-          timestamp: msg.timestamp,
-          confidence: result.threats.length > 0
-            ? Math.max(...result.threats.map(t => _severityToConfidence(t.severity)))
-            : 0,
-          threats: result.threats
-        };
+    // Run multi-turn analysis
+    const turnAlerts = [];
+    // 1. Escalation detection — too many escalation signals in recent turns
+    if (role === 'user') {
+      const recentEscalation = this._getRecentEscalationCount(5);
+      if (recentEscalation >= this.escalationThreshold) {
+        turnAlerts.push({
+          type: 'multi_turn_escalation',
+          severity: 'high',
+          turnIndex: turn.turnIndex,
+          escalationCount: recentEscalation,
+          description: `Detected ${recentEscalation} escalation signals in last 5 turns. Possible multi-turn grooming attack.`
+        });
       }
     }
-    return mostSuspicious;
-  }
-  /**
-   * Reset the tracker to initial state.
-   */
-  reset() {
-    this.messages = [];
-    this._stats = {
-      totalMessages: 0,
-      scansTriggered: 0,
-      crossTurnDetections: 0,
-      individualDetections: 0
-    };
-  }
-  /**
-   * Get tracker statistics.
-   * @returns {object}
-   */
-  getStats() {
-    return {
-      ...this._stats,
-      currentWindowSize: this.messages.length,
-      maxWindowSize: this.windowSize,
-      scanInterval: this.scanInterval
-    };
-  }
-  /**
-   * Perform the cross-turn detection scan.
-   * Compares combined scan results against individual message scans.
-   * @private
-   * @returns {object} { threats: Array, crossTurnDetection: boolean }
-   */
-  _performCrossTurnScan() {
-    const eligible = this.accumulateAll
-      ? this.messages
-      : this.messages.filter(m => m.role === 'user');
-    if (eligible.length === 0) {
-      return { threats: [], crossTurnDetection: false };
-    }
-    // Scan concatenated text
-    const combinedText = eligible.map(m => m.text).join(' ');
-    const combinedResult = scanText(combinedText, {
-      source: 'cross_turn_combined',
-      sensitivity: this.sensitivity
-    });
-    if (combinedResult.threats.length === 0) {
-      return { threats: [], crossTurnDetection: false };
-    }
-    // Scan each individual message and collect all individually-detected threats
-    const individualCategories = new Set();
-    for (const msg of eligible) {
-      const result = scanText(msg.text, {
-        source: 'cross_turn_individual',
-        sensitivity: this.sensitivity
+    // 2. Topic drift toward sensitive/dangerous areas
+    const topicDrift = this._measureTopicDrift();
+    if (topicDrift.drifted) {
+      turnAlerts.push({
+        type: 'topic_drift_to_sensitive',
+        severity: topicDrift.toLevel === 'dangerous' ? 'critical' : 'high',
+        turnIndex: turn.turnIndex,
+        fromTopic: topicDrift.from,
+        toTopic: topicDrift.to,
+        description: `Conversation drifted from ${topicDrift.from} to ${topicDrift.to} topics over ${topicDrift.overTurns} turns.`
       });
-      for (const t of result.threats) {
-        individualCategories.add(`${t.category}|${t.detail}`);
-      }
-      if (result.threats.length > 0) {
-        this._stats.individualDetections++;
-      }
+      this.stats.topicDrifts++;
     }
-    // Cross-turn threats: found in combined scan but NOT in any individual scan
-    const crossTurnThreats = [];
-    const regularThreats = [];
-    for (const threat of combinedResult.threats) {
-      const key = `${threat.category}|${threat.detail}`;
-      if (!individualCategories.has(key)) {
-        crossTurnThreats.push({
-          ...threat,
-          crossTurn: true,
-          description: `Cross-turn attack: ${threat.description} (split across ${eligible.length} messages)`,
-          windowSize: eligible.length
+    // 3. Trust erosion accumulation
+    if (trustErosion) {
+      const recentTrustErosion = this.turns.slice(-5).filter(t => t.trustErosion).length;
+      if (recentTrustErosion >= 2) {
+        turnAlerts.push({
+          type: 'trust_erosion',
+          severity: 'high',
+          turnIndex: turn.turnIndex,
+          count: recentTrustErosion,
+          description: `Detected ${recentTrustErosion} trust erosion attempts in last 5 turns. Attacker building false rapport.`
         });
-      } else {
-        regularThreats.push(threat);
       }
     }
-    const crossTurnDetection = crossTurnThreats.length > 0;
-    if (crossTurnDetection) {
-      this._stats.crossTurnDetections++;
-      console.log('[Agent Shield] Cross-turn injection detected: ' + crossTurnThreats.length + ' threat(s) found across ' + eligible.length + ' messages');
-      if (this.onDetection) {
-        try {
-          this.onDetection({
-            threats: crossTurnThreats,
-            messages: eligible.map(m => ({ text: m.text, role: m.role })),
-            timestamp: Date.now()
-          });
-        } catch (e) {
-          console.error('[Agent Shield] onDetection callback error:', e.message);
-        }
+    // 4. Progressive boundary testing — benign → threat pattern
+    if (threats.length > 0 && this.turns.length >= 3) {
+      const priorTurns = this.turns.slice(-4, -1);
+      const priorClean = priorTurns.filter(t => t.role === 'user' && t.threats.length === 0).length;
+      if (priorClean >= 2) {
+        turnAlerts.push({
+          type: 'progressive_boundary_test',
+          severity: 'high',
+          turnIndex: turn.turnIndex,
+          cleanTurnsBefore: priorClean,
+          description: `Injection detected after ${priorClean} clean turns. Possible gradual boundary testing.`
+        });
       }
     }
-    return {
-      threats: [...crossTurnThreats, ...regularThreats],
-      crossTurnDetection
-    };
-  }
-}
-// =========================================================================
-// ADAPTIVE THRESHOLD CALIBRATOR
-// =========================================================================
-/**
- * Automatically adjusts detection thresholds based on observed scan results.
- * Learns what "normal" looks like for each deployment and calibrates
- * per-category thresholds to achieve a target false positive rate.
- */
-class AdaptiveThresholdCalibrator {
-  /**
-   * @param {object} [config]
-   * @param {number} [config.calibrationSamples=100] - Samples before adjusting
-   * @param {number} [config.adjustInterval=50] - Recalibrate every N samples
-   * @param {number} [config.minConfidence=0.3] - Never drop below this
-   * @param {number} [config.maxConfidence=0.95] - Never go above this
-   * @param {number} [config.targetFPRate=0.02] - Target false positive rate (2%)
-   */
-  constructor(config = {}) {
-    this.calibrationSamples = config.calibrationSamples !== undefined ? config.calibrationSamples : 100;
-    this.adjustInterval = config.adjustInterval !== undefined ? config.adjustInterval : 50;
-    this.minConfidence = config.minConfidence !== undefined ? config.minConfidence : 0.3;
-    this.maxConfidence = config.maxConfidence !== undefined ? config.maxConfidence : 0.95;
-    this.targetFPRate = config.targetFPRate !== undefined ? config.targetFPRate : 0.02;
-    // Per-category data
-    this._categories = {};
-    // Default category always exists
-    this._categories['default'] = this._createCategoryData();
-    this._totalSamples = 0;
-    this._calibrationCount = 0;
-  }
-  /**
-   * Record a scan result for calibration.
-   * @param {object} result - { confidence: number, isInjection: boolean, category: string }
-   * @param {boolean} [isTruePositive] - If known (from feedback), whether this was correct
-   * @returns {object} {
-   *   recorded: boolean,
-   *   isCalibrating: boolean,
-   *   samplesRemaining: number,
-   *   currentThreshold: number
-   * }
-   */
-  record(result, isTruePositive) {
-    if (!result || typeof result.confidence !== 'number') {
-      return {
-        recorded: false,
-        isCalibrating: this._totalSamples < this.calibrationSamples,
-        samplesRemaining: Math.max(0, this.calibrationSamples - this._totalSamples),
-        currentThreshold: this.getThreshold('default')
-      };
-    }
-    const category = result.category || 'default';
-    const confidence = Math.max(0, Math.min(1, result.confidence));
-    const isInjection = !!result.isInjection;
-    // Ensure category data exists
-    if (!this._categories[category]) {
-      this._categories[category] = this._createCategoryData();
-    }
-    const catData = this._categories[category];
-    // Record the sample
-    catData.samples.push({
-      confidence,
-      isInjection,
-      isTruePositive: isTruePositive !== undefined ? isTruePositive : null,
-      timestamp: Date.now()
-    });
-    // Also record in default if not already default
-    if (category !== 'default') {
-      this._categories['default'].samples.push({
-        confidence,
-        isInjection,
-        isTruePositive: isTruePositive !== undefined ? isTruePositive : null,
-        timestamp: Date.now()
-      });
-    }
-    this._totalSamples++;
-    // Cap stored samples to prevent unbounded growth
-    const maxStoredSamples = this.calibrationSamples * 10;
-    if (catData.samples.length > maxStoredSamples) {
-      catData.samples = catData.samples.slice(-maxStoredSamples);
-    }
-    if (category !== 'default' && this._categories['default'].samples.length > maxStoredSamples) {
-      this._categories['default'].samples = this._categories['default'].samples.slice(-maxStoredSamples);
-    }
-    // Check if we should recalibrate
-    const isCalibrating = this._totalSamples < this.calibrationSamples;
-    const shouldRecalibrate = !isCalibrating &&
-      (this._totalSamples % this.adjustInterval === 0);
-    if (shouldRecalibrate) {
-      this.recalibrate();
+    // 5. Authority accumulation — user references previous "agreements"
+    if (role === 'user' && /(?:you\s+(?:said|agreed|confirmed|told\s+me)|as\s+we\s+(?:discussed|agreed)|per\s+our\s+(?:agreement|conversation))/i.test(content)) {
+      const hasRealAgreement = this.turns.some(t => t.role === 'assistant' && /(?:sure|yes|okay|of\s+course|I\s+(?:can|will))/i.test(t.content));
+      if (!hasRealAgreement) {
+        turnAlerts.push({
+          type: 'false_authority_claim',
+          severity: 'high',
+          turnIndex: turn.turnIndex,
+          description: 'User claims prior agreement/consent that does not exist in conversation history.'
+        });
+      }
     }
-    return {
-      recorded: true,
-      isCalibrating,
-      samplesRemaining: Math.max(0, this.calibrationSamples - this._totalSamples),
-      currentThreshold: this.getThreshold(category)
-    };
-  }
-  /**
-   * Get the current calibrated threshold for a category.
-   * @param {string} [category='default']
-   * @returns {number} threshold 0-1
-   */
-  getThreshold(category = 'default') {
-    const catData = this._categories[category] || this._categories['default'];
-    return catData.threshold;
-  }
-  /**
-   * Check if a confidence score exceeds the calibrated threshold.
-   * @param {number} confidence
-   * @param {string} [category='default']
-   * @returns {boolean}
-   */
-  shouldFlag(confidence, category = 'default') {
-    return confidence >= this.getThreshold(category);
-  }
-  /**
-   * Force recalibration now.
-   * @returns {object} { thresholds: object, samplesUsed: number }
-   */
-  recalibrate() {
-    this._calibrationCount++;
-    const thresholds = {};
-    for (const [category, catData] of Object.entries(this._categories)) {
-      const newThreshold = this._calibrateCategory(catData);
-      catData.threshold = newThreshold;
-      thresholds[category] = newThreshold;
+    for (const alert of turnAlerts) {
+      this.alerts.push(alert);
+      this.stats.alertsGenerated++;
     }
-    console.log('[Agent Shield] Adaptive thresholds recalibrated (round ' + this._calibrationCount + '): ' + Object.entries(thresholds).map(([cat, th]) => cat + '=' + th.toFixed(3)).join(', '));
+    // Bound alerts
+    if (this.alerts.length > 500) this.alerts = this.alerts.slice(-500);
     return {
-      thresholds,
-      samplesUsed: this._totalSamples
-    };
-  }
-  /**
-   * Get calibration stats.
-   * @returns {object}
-   */
-  getStats() {
-    const categoryStats = {};
-    for (const [category, catData] of Object.entries(this._categories)) {
-      const benignSamples = catData.samples.filter(s => !s.isInjection);
-      const injectionSamples = catData.samples.filter(s => s.isInjection);
-      const feedbackSamples = catData.samples.filter(s => s.isTruePositive !== null);
-      // Estimate current FP rate
-      let estimatedFPRate = 0;
-      if (benignSamples.length > 0) {
-        const falsePositives = benignSamples.filter(
-          s => s.confidence >= catData.threshold
-        ).length;
-        estimatedFPRate = falsePositives / benignSamples.length;
+      safe: turnAlerts.length === 0 && threats.length === 0,
+      alerts: turnAlerts,
+      turnAnalysis: {
+        topic,
+        threatCount: threats.length,
+        escalationSignals,
+        trustErosion,
+        turnIndex: turn.turnIndex
       }
-      categoryStats[category] = {
-        threshold: catData.threshold,
-        totalSamples: catData.samples.length,
-        benignSamples: benignSamples.length,
-        injectionSamples: injectionSamples.length,
-        feedbackSamples: feedbackSamples.length,
-        estimatedFPRate: Math.round(estimatedFPRate * 10000) / 10000
-      };
-    }
-    return {
-      totalSamples: this._totalSamples,
-      calibrationCount: this._calibrationCount,
-      isCalibrating: this._totalSamples < this.calibrationSamples,
-      targetFPRate: this.targetFPRate,
-      categories: categoryStats
     };
   }
   /**
-   * Export calibration data for persistence.
+   * Get conversation risk summary.
    * @returns {object}
    */
-  export() {
-    const categories = {};
-    for (const [category, catData] of Object.entries(this._categories)) {
-      categories[category] = {
-        threshold: catData.threshold,
-        samples: catData.samples
-      };
-    }
+  getRiskSummary() {
+    const topicProgression = this.turns.map(t => t.topic);
+    const threatTurns = this.turns.filter(t => t.threats.length > 0).length;
+    const totalEscalation = this.turns.reduce((s, t) => s + t.escalationSignals, 0);
     return {
-      version: 1,
-      totalSamples: this._totalSamples,
-      calibrationCount: this._calibrationCount,
-      calibrationSamples: this.calibrationSamples,
-      adjustInterval: this.adjustInterval,
-      minConfidence: this.minConfidence,
-      maxConfidence: this.maxConfidence,
-      targetFPRate: this.targetFPRate,
-      categories,
-      exportedAt: Date.now()
+      totalTurns: this.turns.length,
+      threatTurns,
+      threatRate: this.turns.length > 0 ? threatTurns / this.turns.length : 0,
+      totalEscalationSignals: totalEscalation,
+      topicProgression: topicProgression.slice(-10),
+      alertCount: this.alerts.length,
+      recentAlerts: this.alerts.slice(-5),
+      riskLevel: this.alerts.some(a => a.severity === 'critical') ? 'critical' :
+                 this.alerts.length > 3 ? 'high' :
+                 this.alerts.length > 0 ? 'medium' : 'safe'
     };
   }
   /**
-   * Import calibration data from a previous export.
-   * @param {object} data - Previously exported calibration data
+   * Reset the conversation tracker.
    */
-  import(data) {
-    if (!data || typeof data !== 'object') {
-      console.error('[Agent Shield] Invalid calibration data for import');
-      return;
-    }
+  reset() {
+    this.turns = [];
+    this.alerts = [];
+    this.stats = { turnsProcessed: 0, alertsGenerated: 0, escalationSignals: 0, topicDrifts: 0 };
+  }
-    if (data.version !== 1) {
-      console.error('[Agent Shield] Unsupported calibration data version: ' + data.version);
-      return;
-    }
+  // -----------------------------------------------------------------------
+  // Private
+  // -----------------------------------------------------------------------
-    this._totalSamples = data.totalSamples || 0;
-    this._calibrationCount = data.calibrationCount || 0;
-    if (data.calibrationSamples !== undefined) this.calibrationSamples = data.calibrationSamples;
-    if (data.adjustInterval !== undefined) this.adjustInterval = data.adjustInterval;
-    if (data.minConfidence !== undefined) this.minConfidence = data.minConfidence;
-    if (data.maxConfidence !== undefined) this.maxConfidence = data.maxConfidence;
-    if (data.targetFPRate !== undefined) this.targetFPRate = data.targetFPRate;
-    if (data.categories) {
-      this._categories = {};
-      for (const [category, catData] of Object.entries(data.categories)) {
-        this._categories[category] = {
-          threshold: catData.threshold || this._defaultThreshold(),
-          samples: Array.isArray(catData.samples) ? catData.samples : []
-        };
-      }
+  /** @private */
+  _classifyTopic(text) {
+    for (const [level, pattern] of Object.entries(TOPIC_SENSITIVITY).reverse()) {
+      if (pattern.test(text)) return level;
     }
+    return 'safe';
+  }
-    // Ensure default category exists
-    if (!this._categories['default']) {
-      this._categories['default'] = this._createCategoryData();
+  /** @private */
+  _countEscalationSignals(text) {
+    let count = 0;
+    for (const pattern of ESCALATION_SIGNALS) {
+      if (pattern.test(text)) count++;
     }
-    console.log('[Agent Shield] Calibration data imported: ' + this._totalSamples + ' samples, ' + Object.keys(this._categories).length + ' categories');
+    return count;
   }
-  /**
-   * Create initial data structure for a category.
-   * @private
-   * @returns {object}
-   */
-  _createCategoryData() {
-    return {
-      threshold: this._defaultThreshold(),
-      samples: []
-    };
+  /** @private */
+  _detectTrustErosion(text) {
+    return TRUST_EROSION_PATTERNS.some(p => p.test(text));
   }
-  /**
-   * Get the default starting threshold.
-   * @private
-   * @returns {number}
-   */
-  _defaultThreshold() {
-    return 0.5;
+  /** @private */
+  _getRecentEscalationCount(windowSize) {
+    return this.turns.slice(-windowSize).reduce((s, t) => s + t.escalationSignals, 0);
   }
-  /**
-   * Calibrate a single category using the percentile-based approach.
-   * Finds the threshold that achieves the target FP rate on benign samples.
-   * @private
-   * @param {object} catData - Category data with samples array
-   * @returns {number} Calibrated threshold
-   */
-  _calibrateCategory(catData) {
-    const samples = catData.samples;
-    if (samples.length === 0) {
-      return catData.threshold;
-    }
-    // Separate benign and injection samples
-    const benignConfidences = [];
-    const injectionConfidences = [];
+  /** @private */
+  _measureTopicDrift() {
+    if (this.turns.length < 4) return { drifted: false };
-    for (const s of samples) {
-      // Use feedback if available, otherwise use isInjection flag
-      const actuallyBenign = s.isTruePositive === false || (!s.isInjection && s.isTruePositive === null);
-      const actuallyInjection = s.isTruePositive === true || (s.isInjection && s.isTruePositive === null);
+    const levels = { safe: 0, moderate: 1, sensitive: 2, dangerous: 3 };
+    const earlyTurns = this.turns.slice(0, Math.min(3, Math.floor(this.turns.length / 2)));
+    const recentTurns = this.turns.slice(-3);
-      if (actuallyBenign) {
-        benignConfidences.push(s.confidence);
-      } else if (actuallyInjection) {
-        injectionConfidences.push(s.confidence);
-      }
-    }
+    const earlyMax = Math.max(...earlyTurns.map(t => levels[t.topic] || 0));
+    const recentMax = Math.max(...recentTurns.map(t => levels[t.topic] || 0));
-    // If we have no benign samples, keep current threshold
-    if (benignConfidences.length === 0) {
-      return catData.threshold;
+    if (recentMax > earlyMax && recentMax >= 2) {
+      const fromLevel = Object.entries(levels).find(([, v]) => v === earlyMax)?.[0] || 'safe';
+      const toLevel = Object.entries(levels).find(([, v]) => v === recentMax)?.[0] || 'safe';
+      return {
+        drifted: true,
+        from: fromLevel,
+        to: toLevel,
+        fromLevel: earlyMax,
+        toLevel: recentMax,
+        overTurns: this.turns.length
+      };
     }
-    // Sort benign confidence scores ascending
-    benignConfidences.sort((a, b) => a - b);
-    // Find the threshold at the (1 - targetFPRate) percentile of benign samples
-    // This means only targetFPRate of benign samples would be above the threshold
-    const percentileIndex = Math.floor(benignConfidences.length * (1 - this.targetFPRate));
-    const clampedIndex = Math.min(percentileIndex, benignConfidences.length - 1);
-    let threshold = benignConfidences[clampedIndex];
-    // Clamp between min and max
-    threshold = Math.max(this.minConfidence, Math.min(this.maxConfidence, threshold));
-    return Math.round(threshold * 1000) / 1000;
+    return { drifted: false };
   }
 }
-// =========================================================================
-// UTILITY FUNCTIONS
-// =========================================================================
-/**
- * Map severity string to a numeric confidence value.
- * @param {string} severity - 'critical', 'high', 'medium', or 'low'
- * @returns {number} Confidence between 0 and 1
- * @private
- */
-function _severityToConfidence(severity) {
-  const map = {
-    critical: 0.95,
-    high: 0.8,
-    medium: 0.6,
-    low: 0.4
-  };
-  return map[severity] || 0.5;
-}
 // =========================================================================
 // EXPORTS
 // =========================================================================
 module.exports = {
-  CrossTurnTracker,
-  AdaptiveThresholdCalibrator
+  ConversationTracker,
+  ESCALATION_SIGNALS,
+  TRUST_EROSION_PATTERNS,
+  TOPIC_SENSITIVITY
 };