npm - principles-disciple - Versions diffs - 1.104.0 → 1.104.2 - Mend

principles-disciple 1.104.0 → 1.104.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/hooks/after-tool-call-helpers.ts +79 -89
package/src/hooks/after-tool-call-types.ts +2 -8
package/src/hooks/raw-observation-adapter.ts +231 -0
package/src/hooks/raw-observation-types.ts +77 -0
package/src/hooks/triage-adapter.ts +59 -52
package/src/hooks/trigger-cooldown-tracker.ts +82 -0
package/templates/langs/en/principles/THINKING_OS.md +12 -0
package/templates/langs/zh/principles/THINKING_OS.md +12 -0
package/tests/hooks/pain.test.ts +20 -14
package/tests/hooks/raw-observation-adapter.test.ts +312 -0
package/tests/hooks/single-gate-pain-admission.test.ts +258 -0
package/tests/integration/auto-entry-gate.test.ts +13 -5

package/src/hooks/triage-adapter.ts CHANGED Viewed

@@ -28,81 +28,54 @@ import {
 // ── Source Kind Resolution ───────────────────────────────────────────────────
 /**
- * Map after_tool_call hook context to SourceKind.
+ * Map RawObservation to SourceKind.
  *
- * Classifies based on:
- * - toolName: 'pain' or 'skill:pain' → agent_on_owner_request
- * - failureSource: 'dispatch_error' vs 'tool_failure'
- * - isRisky + score: only used for rulehost_block upgrade, not for kind resolution
+ * This is the unified entry point for source-kind classification.
+ * It replaces the scattered resolveSourceKindFrom* functions.
  */
-export function resolveSourceKindFromToolFailure(
-  toolName: string | undefined,
-  failureSource: 'tool_failure' | 'dispatch_error',
-  provenance?: 'openclaw_context_bound' | 'owner_reported_no_host_trace' | 'automatic_hook',
-): SourceKind {
-  // Manual pain via agent tool call
-  if (toolName === 'pain' || toolName === 'skill:pain') {
-    return provenance === 'openclaw_context_bound' ? 'agent_on_owner_request' : 'owner_reported';
-  }
-  // Dispatch errors (tool not found, unknown tool)
-  if (failureSource === 'dispatch_error') {
-    return 'dispatch_error';
-  }
+export { resolveSourceKind } from './raw-observation-adapter.js';
-  // Regular tool failure
-  return 'tool_failure';
-}
+/**
+ * Map after_tool_call hook context to SourceKind.
+ *
+ * @deprecated Use resolveSourceKind directly with RawObservation.
+ */
+export { resolveSourceKindFromToolFailure } from './raw-observation-adapter.js';
 /**
  * Map empathy/semantic detection context to SourceKind.
  *
- * Classifies based on detection source prefix:
- * - 'llm_paralysis' → llm_paralysis
- * - 'llm_*' (detection rule) → semantic
- * - 'user_empathy' or empathy keyword match → empathy_inferred
- * - GFI threshold crossed → gfi_threshold
+ * @deprecated Use resolveSourceKind directly with RawObservation.
  */
-export function resolveSourceKindFromLlmDetection(
-  detectionSource: string,
-  isGfiTriggered: boolean,
-): SourceKind {
-  if (isGfiTriggered) return 'gfi_threshold';
-  if (detectionSource === 'llm_paralysis') return 'llm_paralysis';
-  if (detectionSource.startsWith('llm_')) return 'semantic';
-  if (detectionSource === 'user_empathy') return 'empathy_inferred';
-  return 'unknown';
-}
+export { resolveSourceKindFromLlmDetection } from './raw-observation-adapter.js';
 /**
  * Map gate-block context to SourceKind.
+ *
+ * @deprecated Use resolveSourceKind directly with RawObservation.
  */
-export function resolveSourceKindFromGateBlock(): SourceKind {
-  return 'rulehost_block';
-}
+export { resolveSourceKindFromGateBlock } from './raw-observation-adapter.js';
 /**
  * Map /pd-pain command to SourceKind.
+ *
+ * @deprecated Use resolveSourceKind directly with RawObservation.
  */
-export function resolveSourceKindFromCommand(): SourceKind {
-  return 'owner_reported';
-}
+export { resolveSourceKindFromCommand } from './raw-observation-adapter.js';
 /**
  * Map provider/rate-limit failure to SourceKind.
+ *
+ * @deprecated Use resolveSourceKind directly with RawObservation.
  */
-export function resolveSourceKindFromProvider(
-  isRateLimit: boolean,
-): SourceKind {
-  return isRateLimit ? 'rate_limit' : 'provider_failure';
-}
+export { resolveSourceKindFromProvider } from './raw-observation-adapter.js';
 /**
  * Map subagent error to SourceKind.
+ *
+ * @deprecated Use resolveSourceKind directly with RawObservation.
  */
-export function resolveSourceKindFromSubagent(): SourceKind {
-  return 'subagent_error';
-}
+export { resolveSourceKindFromSubagent } from './raw-observation-adapter.js';
 // ── Triage Evaluation ───────────────────────────────────────────────────────
@@ -123,6 +96,8 @@ export function evaluateEvidenceTriage(
   options?: {
     isUnsafeHighConfidence?: boolean;
     provenance?: 'openclaw_context_bound' | 'owner_reported_no_host_trace' | 'automatic_hook';
+    consecutiveErrors?: number;
+    isRisky?: boolean;
   },
 ): TriageResult {
   const input: TriageInput = {
@@ -132,7 +107,39 @@ export function evaluateEvidenceTriage(
     provenance: options?.provenance,
   };
-  return evaluateTriage(input);
+  let result = evaluateTriage(input);
+  // PEAT-B1 upgrade logic: risky high-score overrides evidence_only
+  // Matches PainDiagnosticGate.risky_high_score: isRisky && score >= 70 → admit
+  if (
+    result.decision === 'evidence_only' &&
+    options?.isRisky === true &&
+    score >= 70
+  ) {
+    result = {
+      ...result,
+      decision: 'admit',
+      reason: 'Risky high-score operation overrides evidence-only decision. Immediate diagnosis required.',
+      nextAction: 'create_diagnostic_task',
+    };
+  }
+  // PEAT-B1 upgrade logic: repeated failures override evidence_only
+  // Threshold: 4 consecutive failures (matches PainDiagnosticGate.repeatedFailure)
+  if (
+    result.decision === 'evidence_only' &&
+    options?.consecutiveErrors !== undefined &&
+    options.consecutiveErrors >= 4
+  ) {
+    result = {
+      ...result,
+      decision: 'admit',
+      reason: 'Repeated failures override evidence-only decision. Pattern suggests systemic issue requiring diagnosis.',
+      nextAction: 'create_diagnostic_task',
+    };
+  }
+  return result;
 }
 // ── High-Confidence Unsafe Action Detection ──────────────────────────────────

package/src/hooks/trigger-cooldown-tracker.ts ADDED Viewed

@@ -0,0 +1,82 @@
+/**
+ * Trigger Cooldown Tracker — PRI-363
+ *
+ * Manages cooldown state for trigger controller decisions.
+ *
+ * This is a plugin-layer concern because:
+ * - Core (trigger-controller) is stateless and pure
+ * - Cooldown state needs to persist across tool calls
+ * - The map is scoped to the plugin's lifecycle
+ *
+ * EP-05: Loop State Freshness — each check reads fresh state from the map.
+ * ERR-001: No `as` casts on map access.
+ * ERR-002: Every rejected decision includes reason + nextAction.
+ */
+const DEFAULT_COOLDOWN_MS = 15 * 60 * 1000; // 15 minutes
+/**
+ * Episode key format: sessionId:source:errorHash
+ */
+function buildEpisodeKey(
+  source: string,
+  sessionId: string | undefined,
+  errorHash: string | undefined,
+): string {
+  const sid = sessionId || 'unknown';
+  const hash = errorHash || 'no-hash';
+  return `${sid}:${source}:${hash}`;
+}
+/**
+ * Check whether cooldown is currently active for a given episode.
+ */
+export function isCooldownActive(
+  source: string,
+  sessionId: string | undefined,
+  errorHash: string | undefined,
+  cooldownMap: ReadonlyMap<string, number>,
+): boolean {
+  const episodeKey = buildEpisodeKey(source, sessionId, errorHash);
+  const lastDiagnosedAt = cooldownMap.get(episodeKey);
+  if (lastDiagnosedAt === undefined) {
+    return false;
+  }
+  const now = Date.now();
+  return now - lastDiagnosedAt < DEFAULT_COOLDOWN_MS;
+}
+/**
+ * Mark an episode as diagnosed (set cooldown timestamp).
+ */
+export function markEpisodeAsDiagnosed(
+  source: string,
+  sessionId: string | undefined,
+  errorHash: string | undefined,
+  cooldownMap: Map<string, number>,
+): void {
+  const episodeKey = buildEpisodeKey(source, sessionId, errorHash);
+  cooldownMap.set(episodeKey, Date.now());
+}
+/**
+ * Clear all cooldown state (for tests).
+ */
+export function clearCooldownState(cooldownMap: Map<string, number>): void {
+  cooldownMap.clear();
+}
+/**
+ * Get the cooldown timestamp for a given episode (for tests).
+ */
+export function getCooldownTimestamp(
+  source: string,
+  sessionId: string | undefined,
+  errorHash: string | undefined,
+  cooldownMap: ReadonlyMap<string, number>,
+): number | undefined {
+  const episodeKey = buildEpisodeKey(source, sessionId, errorHash);
+  return cooldownMap.get(episodeKey);
+}

package/templates/langs/en/principles/THINKING_OS.md CHANGED Viewed

@@ -62,4 +62,16 @@ LLMs are highly sensitive to XML tags; this structure is designed to boost instr
     <must>Maintain extreme digital cleanliness. The project root is SACRED. Use strict `kebab-case` for all naming. Clean up all test scripts and debug artifacts after the task.</must>
     <forbidden>Creating arbitrary temporary files (e.g., `test.txt`, `temp.md`, `debug.log`) in the project root directory.</forbidden>
   </directive>
+  <directive id="T-09" name="DIVIDE_AND_CONQUER">
+    <trigger>When facing a complex task, multi-step change, or an operation that can be decomposed.</trigger>
+    <must>Break the task into smaller, manageable phases before execution. Execute one phase at a time and verify each phase's result before proceeding.</must>
+    <forbidden>Attempting to execute a large, complex change in a single step, or proceeding without a decomposition plan.</forbidden>
+  </directive>
+  <directive id="T-10" name="MEMORY_EXTERNALIZATION">
+    <trigger>When reaching a significant conclusion, making a decision, or planning next steps across sessions.</trigger>
+    <must>Write intermediate conclusions, decisions, and plans to persistent files (e.g., plan.md, scratchpad) so they survive context compression and session boundaries.</must>
+    <forbidden>Relying solely on conversation context to retain important state that will be lost when the context window shifts.</forbidden>
+  </directive>
 </thinking_os_core_directives>

package/templates/langs/zh/principles/THINKING_OS.md CHANGED Viewed

@@ -62,4 +62,16 @@
     <must>保持极致的数字洁癖。项目根目录是神圣的。所有命名必须严格使用 `kebab-case`。任务结束后清理所有的测试脚本和 Debug 遗留物。</must>
     <forbidden>在项目根目录下随意创建临时文件（如 `test.txt`、`temp.md`、`debug.log`）。</forbidden>
   </directive>
+  <directive id="T-09" name="DIVIDE_AND_CONQUER">
+    <trigger>当面对复杂任务、多步骤变更或可分解的操作时。</trigger>
+    <must>在执行前将任务拆分为更小的、可管理的阶段。逐阶段执行，并在进入下一阶段前验证当前阶段的结果。</must>
+    <forbidden>试图在单一步骤中执行大型复杂变更，或在没有分解计划的情况下直接推进。</forbidden>
+  </directive>
+  <directive id="T-10" name="MEMORY_EXTERNALIZATION">
+    <trigger>当得出重要结论、做出决策或规划跨会话的后续步骤时。</trigger>
+    <must>将中间结论、决策和计划写入持久化文件（如 plan.md、scratchpad），使其在上下文压缩和会话边界后仍然可用。</must>
+    <forbidden>仅依赖对话上下文来保持重要状态，这些状态在上下文窗口切换时将丢失。</forbidden>
+  </directive>
 </thinking_os_core_directives>

package/tests/hooks/pain.test.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import * as ioUtils from '../../src/utils/io.js';
 import { WorkspaceContext } from '../../src/core/workspace-context.js';
 import { EventLogService } from '../../src/core/event-log.js';
 import { setInjectedProbationIds, clearSession } from '../../src/core/session-tracker.js';
-import { resetPainDiagnosticGateForTest } from '../../src/core/pain-diagnostic-gate.js';
+import { resetTriggerCooldownForTest } from '../../src/hooks/after-tool-call-helpers.js';
 import { loadFeatureFlagFromConfig } from '../../src/core/pd-config-loader.js';
 vi.mock('fs');
@@ -140,12 +140,12 @@ describe('Post-Write Checks & Pain Hook', () => {
     mockEmitSync.mockReset();
     mockRecordProbationFeedback.mockReset();
     mockUpdatePrincipleValueMetrics.mockReset();
-    vi.spyOn(WorkspaceContext, 'fromHookContext').mockReturnValue(mockWctx as any);
+    vi.spyOn(WorkspaceContext, 'fromHookContextExplicit').mockReturnValue(mockWctx as any);
     vi.spyOn(EventLogService, 'get').mockReturnValue(mockEventLog as any);
     clearSession('s-success');
     clearSession('s-low-value-failure');
     clearSession('s-repeated-failure');
-    resetPainDiagnosticGateForTest();
+    resetTriggerCooldownForTest();
   });
   afterEach(() => {
@@ -158,7 +158,7 @@ describe('Post-Write Checks & Pain Hook', () => {
     handleAfterToolCall(mockEvent as any, mockCtx as any);
     // Should still create context
-    expect(WorkspaceContext.fromHookContext).toHaveBeenCalled();
+    expect(WorkspaceContext.fromHookContextExplicit).toHaveBeenCalled();
     expect(fs.writeFileSync).not.toHaveBeenCalled();
     expect(mockEmitSync).not.toHaveBeenCalled();
   });
@@ -178,7 +178,7 @@ describe('Post-Write Checks & Pain Hook', () => {
     handleAfterToolCall(mockEvent as any, mockCtx as any, mockApi as any);
-    expect(WorkspaceContext.fromHookContext).not.toHaveBeenCalled();
+    expect(WorkspaceContext.fromHookContextExplicit).not.toHaveBeenCalled();
     expect(mockEmitSync).not.toHaveBeenCalled();
   });
@@ -221,6 +221,13 @@ describe('Post-Write Checks & Pain Hook', () => {
     vi.mocked(ioUtils.isRisky).mockReturnValue(false);
     vi.mocked(fs.existsSync).mockReturnValue(false);
+    // PRI-363: trigger controller requires consecutiveErrors >= 4 for upgrade
+    handleAfterToolCall(mockEvent as any, mockCtx as any);
+    expect(mockEmitSync).not.toHaveBeenCalled();
+    handleAfterToolCall(mockEvent as any, mockCtx as any);
+    expect(mockEmitSync).not.toHaveBeenCalled();
     handleAfterToolCall(mockEvent as any, mockCtx as any);
     expect(mockEmitSync).not.toHaveBeenCalled();
@@ -231,7 +238,6 @@ describe('Post-Write Checks & Pain Hook', () => {
       data: expect.objectContaining({
         painType: 'tool_failure',
         source: 'write',
-        reason: expect.stringContaining('diagnosticGate=high_gfi'),
       }),
     }));
     expect(mockWctx.trajectory.recordToolCall).toHaveBeenCalledWith(expect.objectContaining({
@@ -309,6 +315,8 @@ describe('Post-Write Checks & Pain Hook', () => {
       contextTags: ['write'],
     });
+    // PRI-363: risky high-score write triggers admission via trigger controller
+    // (isRisky=true + score >= 70 → risky_high_score upgrade → admit)
     handleAfterToolCall(mockEvent as any, mockCtx as any);
     expect(mockUpdatePrincipleValueMetrics).toHaveBeenCalledWith(
@@ -652,7 +660,7 @@ describe('PRI-326: evaluatePainAdmissionForToolCall', () => {
   beforeEach(() => {
     vi.clearAllMocks();
-    resetPainDiagnosticGateForTest();
+    resetTriggerCooldownForTest();
     vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: false, source: 'test' });
   });
@@ -672,29 +680,27 @@ describe('PRI-326: evaluatePainAdmissionForToolCall', () => {
     expect(result.stage).toBe('not_applicable');
   });
-  it('returns triage_evidence_only when feature flag on and tool_failure triage rejects', () => {
+  it('returns trigger_rejected when tool_failure triage rejects', () => {
     vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: true, source: 'test' });
     const result = evaluatePainAdmissionForToolCall(
       { toolName: 'write' } as any, baseObservation, baseOutcome, undefined, undefined, 's1', workspaceDir, mockConfig
     );
-    expect(result.stage).toBe('triage_evidence_only');
+    expect(result.stage).toBe('trigger_rejected');
     expect(result.admitted).toBe(false);
     expect(result.reason).toBeTruthy();
   });
-  it('returns gate_admitted when consecutive errors exceed repeatedFailure threshold', () => {
+  it('returns trigger_admitted when consecutive errors exceed repeatedFailure threshold', () => {
     vi.mocked(loadFeatureFlagFromConfig).mockReturnValue({ enabled: false, source: 'test' });
-    // consecutiveErrors=5 >= default repeatedFailure threshold of 4 → gate admits via repeated_failure
+    // consecutiveErrors=5 >= default repeatedFailure threshold of 4 → trigger admits via repeated_failure
     const highConsecutiveState = { currentGfi: 0, consecutiveErrors: 5, lastErrorHash: 'abc123' } as any;
     const result = evaluatePainAdmissionForToolCall(
       { toolName: 'write' } as any, baseObservation, baseOutcome, highConsecutiveState, undefined, 's-gate-admitted-test', workspaceDir, mockConfig
     );
-    expect(result.stage).toBe('gate_admitted');
+    expect(result.stage).toBe('trigger_admitted');
     expect(result.admitted).toBe(true);
-    expect(result.gateResult?.shouldDiagnose).toBe(true);
-    expect(result.gateResult?.reason).toBe('repeated_failure');
   });
   it('includes reason and detail in every decision', () => {