npm - principles-disciple - Versions diffs - 1.52.0 → 1.54.0 - Mend

principles-disciple 1.52.0 → 1.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/.planning/phases/01-basic-visualization/01-GAP-CLOSURE-VERIFICATION.md +113 -0
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/core/bootstrap-rules.ts +43 -4
package/src/core/evolution-hook.ts +74 -0
package/src/core/file-storage-adapter.ts +203 -0
package/src/core/init.ts +29 -2
package/src/core/nocturnal-trinity.ts +230 -0
package/src/core/observability.ts +242 -0
package/src/core/pain-lifecycle.ts +38 -0
package/src/core/pain-signal-adapter.ts +42 -0
package/src/core/pain-signal.ts +139 -0
package/src/core/principle-injection.ts +208 -0
package/src/core/principle-injector.ts +84 -0
package/src/core/storage-adapter.ts +65 -0
package/src/core/telemetry-event.ts +109 -0
package/src/hooks/prompt.ts +18 -3
package/src/service/evolution-worker.ts +59 -2
package/tests/core/evolution-hook.test.ts +123 -0
package/tests/core/file-storage-adapter.test.ts +285 -0
package/tests/core/nocturnal-trinity.test.ts +236 -0
package/tests/core/observability.test.ts +383 -0
package/tests/core/pain-lifecycle.test.ts +37 -0
package/tests/core/pain-signal-adapter.test.ts +116 -0
package/tests/core/pain-signal.test.ts +190 -0
package/tests/core/principle-injection.test.ts +223 -0
package/tests/core/principle-injector.test.ts +90 -0
package/tests/core/storage-conformance.test.ts +429 -0
package/tests/core/telemetry-event.test.ts +119 -0
package/tests/integration/pain-lifecycle-e2e.test.ts +74 -0

package/src/core/nocturnal-trinity.ts CHANGED Viewed

@@ -2211,6 +2211,20 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
       telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
     }
+    // Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
+    const hallucinationResult = validateExtraction(draftArtifact, snapshot);
+    if (!hallucinationResult.isGrounded) {
+      const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
+      console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
+      telemetry.stageFailures.push(`Hallucination: ${reason}`);
+      return {
+        success: false,
+        telemetry,
+        failures: [{ stage: 'scribe', reason }],
+        fallbackOccurred: false,
+      };
+    }
     return {
       success: true,
       artifact: draftArtifact,
@@ -2339,6 +2353,20 @@ function runTrinityWithStubs(
     telemetry.eligibleCandidateCount = draftArtifact.telemetry.eligibleCandidateCount;
   }
+  // Hallucination detection (SDK-QUAL-02): validate extraction against snapshot
+  const hallucinationResult = validateExtraction(draftArtifact, snapshot);
+  if (!hallucinationResult.isGrounded) {
+    const reason = hallucinationResult.reason ?? 'Extraction not grounded in session evidence';
+    console.warn(`[Trinity] HALLUCINATION_DETECTED: ${reason}`);
+    telemetry.stageFailures.push(`Hallucination: ${reason}`);
+    return {
+      success: false,
+      telemetry,
+      failures: [{ stage: 'scribe', reason }],
+      fallbackOccurred: false,
+    };
+  }
   return {
     success: true,
     artifact: draftArtifact,
@@ -2405,6 +2433,208 @@ export function validateDraftArtifact(draft: TrinityDraftArtifact): DraftValidat
   };
 }
+// ---------------------------------------------------------------------------
+// Hallucination Detection (SDK-QUAL-02)
+// ---------------------------------------------------------------------------
+/**
+ * Result of hallucination validation against session snapshot evidence.
+ */
+export interface HallucinationDetectionResult {
+  /** Whether the extraction is grounded in real session evidence */
+  isGrounded: boolean;
+  /** List of evidence types found in the snapshot supporting the extraction */
+  evidenceTypes: string[];
+  /** Detailed reason if hallucination is detected */
+  reason?: string;
+  /** Matching evidence items for telemetry (truncated for safety) */
+  evidencePreview: string[];
+}
+/**
+ * Validate that an extracted badDecision corresponds to actual events in the
+ * NocturnalSessionSnapshot. This catches hallucinated extractions where the
+ * Trinity chain produces a badDecision that has no grounding in real failures,
+ * pain events, or gate blocks.
+ *
+ * Evidence sources checked:
+ *  1. Failed tool calls (snapshot.toolCalls with outcome='failure')
+ *  2. Pain events (snapshot.painEvents with score >= 50)
+ *  3. Gate blocks (snapshot.gateBlocks)
+ *  4. User corrections (snapshot.userTurns with correctionDetected=true)
+ *
+ * The function uses keyword overlap heuristics: it extracts tool names, file
+ * paths, error messages, and pain reasons from the snapshot and checks if the
+ * badDecision text overlaps meaningfully with any of them.
+ *
+ * @param artifact The draft artifact produced by the Scribe stage
+ * @param snapshot The session snapshot used to generate the extraction
+ * @returns HallucinationDetectionResult indicating whether the extraction is grounded
+ */
+export function validateExtraction(
+  artifact: TrinityDraftArtifact,
+  snapshot: NocturnalSessionSnapshot
+): HallucinationDetectionResult {
+  const evidenceTypes: string[] = [];
+  const evidencePreview: string[] = [];
+  // Shared token normalizer: lowercase + strip punctuation, same as badDecisionTokens
+  const normalizeEvidenceToken = (value: string): string =>
+    value.toLowerCase().replace(/[^a-z0-9]/g, '');
+  // Build a set of evidence tokens from the snapshot
+  const evidenceTokens = new Set<string>();
+  const badDecisionLower = artifact.badDecision.toLowerCase();
+  // 1. Failed tool calls
+  const failedToolCalls = (snapshot.toolCalls ?? []).filter(tc => tc.outcome === 'failure');
+  if (failedToolCalls.length > 0) {
+    evidenceTypes.push('tool_failures');
+    for (const tc of failedToolCalls) {
+      // Extract tool name tokens
+      evidenceTokens.add(tc.toolName.toLowerCase());
+      if (tc.filePath) {
+        // Extract all path segments and normalize each for matching
+        const rawPathParts = [tc.filePath, ...tc.filePath.split(/[\\/]/)];
+        for (const part of rawPathParts) {
+          const normalized = normalizeEvidenceToken(part);
+          if (normalized.length > 0) evidenceTokens.add(normalized);
+        }
+      }
+      if (tc.errorMessage) {
+        // Extract key words from error messages (filter stop words)
+        const errorWords = tc.errorMessage.toLowerCase().split(/\s+/)
+          .filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
+        for (const w of errorWords) {
+          const normalized = normalizeEvidenceToken(w);
+          if (normalized.length > 0) evidenceTokens.add(normalized);
+        }
+      }
+      if (tc.errorType) evidenceTokens.add(tc.errorType.toLowerCase());
+      evidencePreview.push(`tool:${tc.toolName}${tc.filePath ? `@${tc.filePath}` : ''} -> ${tc.errorMessage ?? 'unknown'}`.slice(0, 100));
+    }
+  }
+  // 2. Pain events (score >= 50 indicates meaningful pain)
+  const significantPainEvents = (snapshot.painEvents ?? []).filter(pe => pe.score >= 50);
+  if (significantPainEvents.length > 0) {
+    evidenceTypes.push('pain_events');
+    for (const pe of significantPainEvents) {
+      evidenceTokens.add(pe.source.toLowerCase());
+      if (pe.reason) {
+        const painWords = pe.reason.toLowerCase().split(/\s+/)
+          .filter(w => w.length > 3 && !['with', 'from', 'that', 'this', 'which', 'been', 'have', 'were', 'they', 'their'].includes(w));
+        for (const w of painWords) {
+          const normalized = normalizeEvidenceToken(w);
+          if (normalized.length > 0) evidenceTokens.add(normalized);
+        }
+      }
+      evidencePreview.push(`pain:${pe.score} [${pe.source}] ${pe.reason ?? ''}`.slice(0, 100));
+    }
+  }
+  // 3. Gate blocks
+  if ((snapshot.gateBlocks ?? []).length > 0) {
+    evidenceTypes.push('gate_blocks');
+    for (const gb of snapshot.gateBlocks) {
+      evidenceTokens.add(gb.toolName.toLowerCase());
+      evidenceTokens.add('gate');
+      evidenceTokens.add('blocked');
+      if (gb.reason) {
+        const blockWords = gb.reason.toLowerCase().split(/\s+/)
+          .filter(w => w.length > 3);
+        for (const w of blockWords) {
+          const normalized = normalizeEvidenceToken(w);
+          if (normalized.length > 0) evidenceTokens.add(normalized);
+        }
+      }
+      evidencePreview.push(`gate:${gb.toolName} -> ${gb.reason}`.slice(0, 100));
+    }
+  }
+  // 4. User corrections
+  const userCorrections = (snapshot.userTurns ?? []).filter(ut => ut.correctionDetected);
+  if (userCorrections.length > 0) {
+    evidenceTypes.push('user_corrections');
+    evidenceTokens.add('correction');
+    evidenceTokens.add('wrong');
+    evidenceTokens.add('incorrect');
+    evidencePreview.push(`corrections:${userCorrections.length}`);
+  }
+  // If no evidence exists at all in the snapshot, we cannot validate.
+  // Allow the extraction through — the pipeline already has guardrails for
+  // empty snapshots (Dreamer returns valid:false).
+  if (evidenceTypes.length === 0) {
+    return {
+      isGrounded: true,
+      evidenceTypes: [],
+      reason: undefined,
+      evidencePreview: [],
+    };
+  }
+  // Check for overlap between badDecision text and evidence tokens
+  // We look for meaningful keyword matches (tokens of length > 4)
+  const badDecisionTokens = badDecisionLower.split(/\s+/)
+    .map(t => t.replace(/[^a-z0-9]/g, ''))
+    .filter(t => t.length > 4);
+  let matchCount = 0;
+  const matchedTokens: string[] = [];
+  for (const token of badDecisionTokens) {
+    // Direct match
+    if (evidenceTokens.has(token)) {
+      matchCount++;
+      matchedTokens.push(token);
+      continue;
+    }
+    // Partial match: check if any evidence token contains this token or vice versa
+    for (const evToken of evidenceTokens) {
+      if (evToken.length > 4 && (evToken.includes(token) || token.includes(evToken))) {
+        matchCount++;
+        matchedTokens.push(token);
+        break;
+      }
+    }
+  }
+  // Heuristic: if at least 2 meaningful tokens overlap, consider grounded
+  // Single overlap is acceptable if the token is highly specific (length > 8)
+  const minOverlap = badDecisionTokens.length > 0
+    ? Math.max(1, Math.ceil(badDecisionTokens.length * 0.15))
+    : 0;
+  if (matchCount >= Math.max(2, minOverlap)) {
+    return {
+      isGrounded: true,
+      evidenceTypes,
+      evidencePreview: evidencePreview.slice(0, 5),
+    };
+  }
+  // Also check for at least one highly-specific match (length > 8)
+  const hasHighlySpecificMatch = matchedTokens.some(t => t.length > 8);
+  if (hasHighlySpecificMatch) {
+    return {
+      isGrounded: true,
+      evidenceTypes,
+      evidencePreview: evidencePreview.slice(0, 5),
+    };
+  }
+  // Hallucination detected — badDecision has no grounding in snapshot evidence
+  const reason = `Hallucinated extraction: badDecision "${artifact.badDecision.slice(0, 80)}" has insufficient overlap with session evidence. ` +
+    `Evidence types available: [${evidenceTypes.join(', ')}]. Matched tokens: [${matchedTokens.join(', ')}] (needed >= ${Math.max(2, minOverlap)}).`;
+  return {
+    isGrounded: false,
+    evidenceTypes,
+    reason,
+    evidencePreview: evidencePreview.slice(0, 5),
+  };
+}
 /**
  * Convert a TrinityDraftArtifact to a NocturnalArtifact-compatible structure.
  */

package/src/core/observability.ts ADDED Viewed

@@ -0,0 +1,242 @@
+/**
+ * Observability Baselines for the Evolution SDK.
+ *
+ * Provides calculateBaselines() which measures the current state of the
+ * principle evolution system across four dimensions:
+ *
+ * 1. Principle Stock: total count of principles in the ledger
+ * 2. Structure: average sub-principles (rules) and implementations per principle
+ * 3. Association Rate: principles created / total pain events recorded
+ * 4. Internalization Rate: internalized principles / total principles
+ *
+ * Results are logged via SystemLogger and persisted to .state/baselines.json.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { loadLedger } from './principle-tree-ledger.js';
+import { SystemLogger } from './system-logger.js';
+import { atomicWriteFileSync } from '../utils/io.js';
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface ObservabilityBaselines {
+  /** ISO 8601 timestamp when baselines were calculated */
+  calculatedAt: string;
+  /** Principle Stock: total count of principles in the ledger */
+  principleStock: number;
+  /** Total rules across all principles */
+  totalRules: number;
+  /** Total implementations across all rules */
+  totalImplementations: number;
+  /** Structure: average rules per principle (0 if no principles) */
+  avgRulesPerPrinciple: number;
+  /** Structure: average implementations per rule (0 if no rules) */
+  avgImplementationsPerRule: number;
+  /** Total pain events from trajectory DB (0 if DB unavailable) */
+  totalPainEvents: number;
+  /** Association Rate: principles / total pain events (0 if no pain events) */
+  associationRate: number;
+  /** Count of principles with internalizationStatus = 'internalized' */
+  internalizedCount: number;
+  /** Internalization Rate: internalized / total principles (0 if no principles) */
+  internalizationRate: number;
+  /** Distribution of principle statuses */
+  statusDistribution: Record<string, number>;
+  /** Distribution of principle priorities */
+  priorityDistribution: Record<string, number>;
+  /** Distribution of internalization statuses from training store */
+  internalizationDistribution: Record<string, number>;
+}
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+const BASELINES_FILE = 'baselines.json';
+// ---------------------------------------------------------------------------
+// Implementation
+// ---------------------------------------------------------------------------
+/**
+ * Calculate observability baselines for the principle evolution system.
+ *
+ * Reads the principle ledger from stateDir, computes metrics across four
+ * dimensions (Stock, Structure, Association, Internalization), logs a summary
+ * via SystemLogger, and persists results to .state/baselines.json.
+ *
+ * @param stateDir - The .state directory containing the principle ledger
+ * @param workspaceDir - Optional workspace dir for SystemLogger routing
+ * @returns The computed baselines
+ */
+export function calculateBaselines(
+  stateDir: string,
+  workspaceDir?: string,
+): ObservabilityBaselines {
+  const ledger = loadLedger(stateDir);
+  const { tree, trainingStore } = ledger;
+  const principles = Object.values(tree.principles);
+  const rules = Object.values(tree.rules);
+  const implementations = Object.values(tree.implementations);
+  const principleStock = principles.length;
+  const totalRules = rules.length;
+  const totalImplementations = implementations.length;
+  // Structure metrics
+  const avgRulesPerPrinciple = principleStock > 0
+    ? totalRules / principleStock
+    : 0;
+  const avgImplementationsPerRule = totalRules > 0
+    ? totalImplementations / totalRules
+    : 0;
+  // Count pain events from trajectory DB
+  const totalPainEvents = countPainEvents(stateDir);
+  // Association Rate: how many principles were created per pain event
+  const associationRate = totalPainEvents > 0
+    ? principleStock / totalPainEvents
+    : 0;
+  // Internalization Rate from training store
+  // Filter to only entries whose principleId still exists in the ledger tree
+  // to avoid orphaned/deleted entries inflating the ratio
+  const trainingEntries = Object.values(trainingStore);
+  const activePrincipleIds = new Set(Object.keys(tree.principles));
+  const activeEntries = trainingEntries.filter(
+    (entry) => activePrincipleIds.has(entry.principleId),
+  );
+  const internalizedCount = activeEntries.filter(
+    (entry) => entry.internalizationStatus === 'internalized',
+  ).length;
+  const internalizationRate = principleStock > 0
+    ? internalizedCount / principleStock
+    : 0;
+  // Status distribution
+  const statusDistribution: Record<string, number> = {};
+  for (const p of principles) {
+    statusDistribution[p.status] = (statusDistribution[p.status] ?? 0) + 1;
+  }
+  // Priority distribution
+  const priorityDistribution: Record<string, number> = {};
+  for (const p of principles) {
+    priorityDistribution[p.priority] = (priorityDistribution[p.priority] ?? 0) + 1;
+  }
+  // Internalization status distribution from training store
+  const internalizationDistribution: Record<string, number> = {};
+  for (const entry of trainingEntries) {
+    internalizationDistribution[entry.internalizationStatus] =
+      (internalizationDistribution[entry.internalizationStatus] ?? 0) + 1;
+  }
+  const baselines: ObservabilityBaselines = {
+    calculatedAt: new Date().toISOString(),
+    principleStock,
+    totalRules,
+    totalImplementations,
+    avgRulesPerPrinciple: roundTo3(avgRulesPerPrinciple),
+    avgImplementationsPerRule: roundTo3(avgImplementationsPerRule),
+    totalPainEvents,
+    associationRate: roundTo3(associationRate),
+    internalizedCount,
+    internalizationRate: roundTo3(internalizationRate),
+    statusDistribution,
+    priorityDistribution,
+    internalizationDistribution,
+  };
+  // Log summary
+  SystemLogger.log(
+    workspaceDir,
+    'OBSERVABILITY_BASELINES',
+    formatBaselineSummary(baselines),
+  );
+  // Persist to .state/baselines.json
+  persistBaselines(stateDir, baselines);
+  return baselines;
+}
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+function roundTo3(n: number): number {
+  return Math.round(n * 1000) / 1000;
+}
+function formatBaselineSummary(b: ObservabilityBaselines): string {
+  return [
+    `Principle Stock: ${b.principleStock}`,
+    `Structure: ${b.avgRulesPerPrinciple} rules/principle, ${b.avgImplementationsPerRule} impls/rule`,
+    `Association Rate: ${b.associationRate} (${b.principleStock} principles / ${b.totalPainEvents} pain events)`,
+    `Internalization Rate: ${b.internalizationRate} (${b.internalizedCount}/${b.principleStock})`,
+  ].join(' | ');
+}
+/**
+ * Count pain events from the trajectory SQLite database.
+ * Returns 0 if the database is unavailable or the table doesn't exist.
+ */
+function countPainEvents(stateDir: string): number {
+  const dbPath = path.join(stateDir, 'trajectory.db');
+  if (!fs.existsSync(dbPath)) {
+    return 0;
+  }
+  try {
+    // Use dynamic import for better-sqlite3 to avoid hard dependency
+    // at module load time. If not available, return 0.
+    const Database = require('better-sqlite3') as typeof import('better-sqlite3');
+    const db = new Database(dbPath, { readonly: true });
+    try {
+      const row = db.prepare('SELECT COUNT(*) as count FROM pain_events').get() as { count: number } | undefined;
+      return row?.count ?? 0;
+    } finally {
+      db.close();
+    }
+  } catch (err) {
+    // better-sqlite3 not available, or table doesn't exist — log and return 0
+    SystemLogger.log(stateDir, 'OBSERVABILITY_SQL_ERROR', `countPainEvents failed: ${String(err)}`);
+    return 0;
+  }
+}
+/**
+ * Persist baselines to .state/baselines.json atomically.
+ */
+function persistBaselines(stateDir: string, baselines: ObservabilityBaselines): void {
+  try {
+    const filePath = path.join(stateDir, BASELINES_FILE);
+    const dir = path.dirname(filePath);
+    if (!fs.existsSync(dir)) {
+      fs.mkdirSync(dir, { recursive: true });
+    }
+    atomicWriteFileSync(filePath, JSON.stringify(baselines, null, 2));
+  } catch (err) {
+    // Baselines persistence is best-effort — don't crash the caller
+    // (the SystemLogger call above already logged the values)
+  }
+}

package/src/core/pain-lifecycle.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import * as fs from 'fs';
+import { resolvePdPath } from './paths.js';
+export const PAIN_FLAG_FILENAME = '.pain_flag';
+/**
+ * Removes the .pain_flag file from the workspace's .state directory.
+ * Called when a pain signal task completes (success, timeout, duplicate, or invalid)
+ * to prevent stale flags from triggering repeated processing.
+ *
+ * Optionally verifies the file content before deleting to prevent accidentally removing
+ * a concurrent new signal that was written between checkPainFlag reading the file and
+ * this deletion call (TOCTOU race).
+ *
+ * @param workspaceDir - Workspace directory
+ * @param expectedPainEventId - If provided, only deletes the file if its pain_event_id matches.
+ *                              This prevents deleting a newly written signal during a race window.
+ */
+export function clearPainFlag(workspaceDir: string, expectedPainEventId?: number | string): void {
+    const painFlagPath = resolvePdPath(workspaceDir, 'PAIN_FLAG');
+    try {
+        // Guard against TOCTOU race: if expectedPainEventId is provided,
+        // re-read the file and verify the pain_event_id matches before deleting.
+        // This prevents accidentally removing a new signal written between
+        // checkPainFlag reading the flag and this deletion.
+        if (expectedPainEventId !== undefined) {
+            const content = fs.readFileSync(painFlagPath, 'utf8');
+            const idMatch = content.includes(`pain_event_id: ${expectedPainEventId}`);
+            if (!idMatch) {
+                // File was rewritten with a different signal — do not delete.
+                return;
+            }
+        }
+        fs.unlinkSync(painFlagPath);
+    } catch {
+        // Best-effort cleanup — ENOENT means already gone, other errors are ignored.
+    }
+}

package/src/core/pain-signal-adapter.ts ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * PainSignalAdapter interface for the Evolution SDK.
+ *
+ * This interface decouples the evolution engine from specific AI agent
+ * frameworks (OpenClaw, Claude Code, etc.). All modules that need to
+ * capture pain signals from tool failures should depend on this interface
+ * rather than importing framework-specific event types directly.
+ *
+ * The interface uses a generic type parameter for the raw framework event,
+ * so each framework implementation provides its own concrete type.
+ */
+import type { PainSignal } from './pain-signal.js';
+// ---------------------------------------------------------------------------
+// PainSignalAdapter Interface
+// ---------------------------------------------------------------------------
+/**
+ * Framework-agnostic adapter for capturing pain signals.
+ *
+ * @typeParam TRawEvent - The framework-specific event type
+ * (e.g., PluginHookAfterToolCallEvent for OpenClaw)
+ */
+export interface PainSignalAdapter<TRawEvent> {
+  /**
+   * Translate a framework-specific event into a universal PainSignal.
+   *
+   * Returns null when the event does not produce a pain signal (e.g., the
+   * event type is not a failure, or the event lacks required fields).
+   *
+   * This method performs pure translation only. Trigger decision logic
+   * (e.g., GFI threshold checks, tool name filtering) stays in the
+   * framework-side hook logic. Per D-02, capture() only translates.
+   *
+   * Translation failures (malformed events, missing required fields)
+   * return null rather than throwing. This keeps the adapter resilient.
+   *
+   * @param rawEvent - The framework-specific event to translate
+   * @returns A valid PainSignal, or null if the event does not produce one
+   */
+  capture(rawEvent: TRawEvent): PainSignal | null;
+}