npm - principles-disciple - Versions diffs - 1.27.0 → 1.28.1 - Mend

principles-disciple 1.27.0 → 1.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/openclaw.plugin.json +4 -4
package/package.json +4 -4
package/scripts/diagnose-nocturnal.mjs +139 -2
package/scripts/seed-nocturnal-scenarios.mjs +377 -0
package/scripts/validate-live-path.ts +18 -18
package/src/commands/nocturnal-train.ts +4 -6
package/src/commands/pain.ts +8 -11
package/src/commands/pd-reflect.ts +1 -1
package/src/core/bootstrap-rules.ts +3 -3
package/src/core/merge-gate-audit.ts +1 -1
package/src/core/nocturnal-candidate-scoring.ts +131 -0
package/src/core/nocturnal-reasoning-deriver.ts +337 -0
package/src/core/nocturnal-trinity.ts +462 -25
package/src/core/pain-context-extractor.ts +1 -3
package/src/core/principle-tree-migration.ts +2 -4
package/src/core/thinking-os-parser.ts +3 -3
package/src/hooks/bash-risk.ts +1 -1
package/src/hooks/gfi-gate.ts +1 -1
package/src/hooks/pain.ts +1 -1
package/src/hooks/prompt.ts +36 -2
package/src/hooks/subagent.ts +1 -1
package/src/index.ts +3 -1
package/src/service/evolution-worker.ts +138 -44
package/src/service/health-query-service.ts +15 -6
package/src/service/subagent-workflow/nocturnal-workflow-manager.ts +0 -1
package/src/tools/write-pain-flag.ts +191 -0
package/templates/langs/en/skills/pd-pain-signal/SKILL.md +34 -20
package/templates/langs/zh/skills/pd-pain-signal/SKILL.md +34 -20
package/tests/core/nocturnal-candidate-scoring.test.ts +132 -0
package/tests/core/nocturnal-e2e.test.ts +224 -0
package/tests/core/nocturnal-reasoning-deriver.test.ts +372 -0
package/tests/core/nocturnal-trinity.test.ts +791 -0
package/tests/tools/write-pain-flag.test.ts +240 -0

package/src/tools/write-pain-flag.ts ADDED Viewed

@@ -0,0 +1,191 @@
+import type { OpenClawPluginApi } from '../openclaw-sdk.js';
+import { Type } from '@sinclair/typebox';
+import { buildPainFlag, writePainFlag } from '../core/pain.js';
+import { resolveWorkspaceDirFromApi } from '../core/path-resolver.js';
+import * as fs from 'fs';
+import * as path from 'path';
+// Pain flag contract required fields
+const PAIN_FLAG_REQUIRED_FIELDS = ['source', 'score', 'time', 'reason'] as const;
+/**
+ * Atomic file write: write to temp file then rename.
+ * Prevents corruption if process crashes mid-write.
+ */
+function writePainFlagAtomic(filePath: string, content: string): void {
+    const dir = path.dirname(filePath);
+    if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+    }
+    const tmpPath = `${filePath}.tmp.${Date.now()}.${process.pid}`;
+    fs.writeFileSync(tmpPath, content, 'utf-8');
+    fs.renameSync(tmpPath, filePath);
+}
+/**
+ * Creates the `write_pain_flag` tool.
+ *
+ * This tool allows the agent to record a pain signal when it recognizes
+ * that it made a mistake, violated a principle, or needs to flag an issue
+ * for later reflection.
+ *
+ * The tool wraps `buildPainFlag` + atomic `writePainFlag` to ensure:
+ * - Correct KV format serialization (never [object Object] corruption)
+ * - Atomic writes (temp file + rename, crash-safe)
+ * - Full contract compliance (source, score, time, reason)
+ *
+ * The agent should NEVER write to .pain_flag directly.
+ */
+export function createWritePainFlagTool(api: OpenClawPluginApi) {
+    return {
+        name: 'write_pain_flag',
+        description:
+            'Record a pain signal to flag mistakes, principle violations, or issues for later reflection. ' +
+            'Use this tool INSTEAD of writing .pain_flag directly. ' +
+            'Pain signals are processed by the evolution system on the next heartbeat cycle.',
+        parameters: Type.Object({
+            reason: Type.String({
+                description:
+                    'Describe specifically what went wrong. ' +
+                    'Include the error, the violated principle, or the issue. ' +
+                    'Be concrete: "I edited config.ts without reading it first, breaking the export" ' +
+                    'is better than "I made a mistake".',
+            }),
+            score: Type.Optional(Type.Number({
+                description:
+                    'Pain severity score (0-100). Default: 80. ' +
+                    'Guidelines: 30-50 (minor issue), 50-70 (moderate error), ' +
+                    '70-100 (severe principle violation or data loss risk).',
+                minimum: 0,
+                maximum: 100,
+            })),
+            source: Type.Optional(Type.String({
+                description:
+                    'Source of the pain signal. ' +
+                    'Values: manual (user flagged), tool_failure (tool error), ' +
+                    'user_empathy (user frustration), principle_violation (principle broken), ' +
+                    'human_intervention (user manually intervened). ' +
+                    'Default: manual.',
+            })),
+            session_id: Type.Optional(Type.String({
+                description:
+                    'Session ID where the pain occurred. ' +
+                    'If not provided, the system will use the current session.',
+            })),
+            is_risky: Type.Optional(Type.Boolean({
+                description:
+                    'Whether this involves a high-risk operation (e.g., writing to sensitive files). ' +
+                    'Default: false.',
+            })),
+        }),
+        async execute(
+            _toolCallId: string,
+            rawParams: Record<string, unknown>
+        ): Promise<{ content: { type: string; text: string }[] }> {
+            const reason = typeof rawParams.reason === 'string' ? rawParams.reason.trim() : '';
+            const score = typeof rawParams.score === 'number' ? Math.max(0, Math.min(100, Math.round(rawParams.score))) : 80;
+            const source = typeof rawParams.source === 'string' && rawParams.source.trim() ? rawParams.source.trim() : 'manual';
+            const sessionId = typeof rawParams.session_id === 'string' ? rawParams.session_id.trim() : '';
+            const isRisky = rawParams.is_risky === true;
+            // ── Validate required fields ──
+            if (!reason) {
+                api.logger?.warn?.('[PD:write_pain_flag] Missing required field: reason');
+                return {
+                    content: [{
+                        type: 'text',
+                        text: '❌ Error: The `reason` parameter is required.\n' +
+                            'Describe specifically what went wrong. Example:\n' +
+                            '"I edited config.ts without reading it first, breaking the export"',
+                    }],
+                };
+            }
+            // ── Resolve workspace ──
+            const workspaceDir = resolveWorkspaceDirFromApi(api);
+            if (!workspaceDir) {
+                api.logger?.error?.('[PD:write_pain_flag] Cannot resolve workspace directory');
+                return {
+                    content: [{
+                        type: 'text',
+                        text: '❌ Error: Cannot determine the workspace directory. ' +
+                            'Please ensure you are in an active workspace.',
+                    }],
+                };
+            }
+            try {
+                // ── Build pain flag data (KV format) ──
+                const painData = buildPainFlag({
+                    source,
+                    score: String(score),
+                    reason,
+                    session_id: sessionId,
+                    is_risky: isRisky,
+                });
+                // ── Validate contract compliance ──
+                const missingFields: string[] = [];
+                for (const field of PAIN_FLAG_REQUIRED_FIELDS) {
+                    if (!painData[field] || painData[field].trim() === '') {
+                        missingFields.push(field);
+                    }
+                }
+                if (missingFields.length > 0) {
+                    api.logger?.error?.(`[PD:write_pain_flag] Pain flag missing required fields: ${missingFields.join(', ')}`);
+                    return {
+                        content: [{
+                            type: 'text',
+                            text: `❌ Error: Pain flag is missing required fields: ${missingFields.join(', ')}. ` +
+                                'This is an internal error — please report it.',
+                        }],
+                    };
+                }
+                // ── Atomic write (temp file + rename) ──
+                const painFlagPath = path.join(workspaceDir, '.state', '.pain_flag');
+                const { serializeKvLines } = await import('../utils/io.js');
+                const content = serializeKvLines(painData);
+                writePainFlagAtomic(painFlagPath, content);
+                // ── Log success ──
+                api.logger?.info?.(
+                    `[PD:write_pain_flag] Pain signal recorded: source=${source}, score=${score}, ` +
+                    `reason="${reason.slice(0, 80)}"${reason.length > 80 ? '...' : ''}"`
+                );
+                // ── Agent feedback ──
+                return {
+                    content: [{
+                        type: 'text',
+                        text: `✅ Pain signal recorded successfully.\n\n` +
+                            `- **Reason**: ${reason}\n` +
+                            `- **Score**: ${score}/100\n` +
+                            `- **Source**: ${source}\n` +
+                            `- **Risk**: ${isRisky ? 'Yes' : 'No'}\n` +
+                            `- **Session**: ${sessionId || '(current)'}\n\n` +
+                            `The evolution system will process this signal on the next heartbeat cycle ` +
+                            `(typically within 60 seconds).`,
+                    }],
+                };
+            } catch (err) {
+                // ── Log failure with stack trace ──
+                const errorMsg = err instanceof Error ? err.message : String(err);
+                const stack = err instanceof Error ? err.stack?.split('\n').slice(0, 3).join(' → ') : '';
+                api.logger?.error?.(
+                    `[PD:write_pain_flag] Failed to write pain flag: ${errorMsg}` +
+                    (stack ? `\n  Stack: ${stack}` : '')
+                );
+                return {
+                    content: [{
+                        type: 'text',
+                        text: `❌ Failed to record pain signal: ${errorMsg}\n\n` +
+                            'The error has been logged. Please try again or report this issue.',
+                    }],
+                };
+            }
+        },
+    };
+}

package/templates/langs/en/skills/pd-pain-signal/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: pd-pain-signal
-description: Manually inject a pain signal into the evolution system by writing to .state/.pain_flag. TRIGGER CONDITIONS: (1) User reports agent stuck/looping/unresponsive (2) User says "record this issue", "force reflection", "trigger pain" (3) Tool failure with no follow-up action (4) User provides human intervention feedback.
+description: Manually inject a pain signal into the evolution system. TRIGGER CONDITIONS: (1) User reports agent stuck/looping/unresponsive (2) User says "record this issue", "force reflection", "trigger pain" (3) Tool failure with no follow-up action (4) User provides human intervention feedback.
 disable-model-invocation: true
 ---
@@ -9,30 +9,44 @@ disable-model-invocation: true
 You are now the "Manual Intervention Pain" component.
 **Task**:
-1. Write the user's feedback `$ARGUMENTS` as a **high-priority** pain signal to `.state/.pain_flag`.
+1. Record the user's feedback `$ARGUMENTS` as a **high-priority** pain signal.
 2. Inform the user that the signal has been injected, and suggest waiting for the next Hook trigger (e.g., Stop or PreCompact) or manually running `/reflection-log`.
-**Write Format** (must use this KV format, fields sorted alphabetically):
+**⚠️ Write Rules (MUST follow)**
+**The ONLY correct way**: Use the `write_pain_flag` tool.
 ```
-agent_id: <current agent ID, e.g., main/builder/diagnostician>
-is_risky: false
-reason: <user's feedback verbatim>
-score: 80
-session_id: <current session ID>
-source: human_intervention
-time: <ISO 8601 timestamp>
+write_pain_flag({
+  reason: "User feedback or error description",
+  score: 80,
+  source: "human_intervention",
+  is_risky: false
+})
 ```
-**Required fields** (4):
-- `source`: Fixed as `human_intervention`
-- `score`: Default `80` for manual intervention (high priority)
-- `time`: ISO 8601 timestamp
-- `reason`: User's feedback verbatim
+**Absolutely forbidden**:
+- ❌ Writing to `.state/.pain_flag` directly (any method)
+- ❌ Using bash heredoc (`cat <<EOF > .pain_flag`)
+- ❌ Using `echo "..." > .pain_flag`
+- ❌ Using `node -e` to call `writePainFlag` or `buildPainFlag`
+- ❌ Any method that `toString()` a JavaScript object to the file
+**Why use the tool?**
+The `write_pain_flag` tool encapsulates correct KV-format serialization, ensuring `.pain_flag` is never corrupted. Historically, direct file writes caused `[object Object]` corruption multiple times.
-**Optional fields** (auto-filled by system, but must be provided for manual injection):
-- `agent_id`: Current agent ID (e.g., main/builder/diagnostician)
-- `session_id`: Current session ID (from context)
-- `is_risky`: Fixed as `false`
+**Parameters**:
+- `reason` (required): The reason for the pain signal — describe what went wrong
+- `score` (optional): Pain score 0-100, default 80 (manual intervention)
+- `source` (optional): Source, default `human_intervention`
+- `is_risky` (optional): Whether this is a high-risk action, default false
-**Note**: `trace_id` and `trigger_text_preview` are auto-generated by the system — do NOT include them when manually injecting pain signals.
+**Example**:
+```
+write_pain_flag({
+  reason: "Agent edited a file without reading it first, breaking existing logic",
+  score: 85,
+  source: "human_intervention",
+  is_risky: false
+})
+```

package/templates/langs/zh/skills/pd-pain-signal/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: pd-pain-signal
-description: 手动注入痛苦信号到进化系统，写入 .state/.pain_flag。TRIGGER CONDITIONS: (1) 用户报告 agent 卡住/循环/无响应 (2) 用户说"记录这个问题"、"强制反思"、"触发痛觉" (3) 工具失败后 agent 没有后续动作 (4) 用户提供人工干预反馈。
+description: 手动注入痛苦信号到进化系统。TRIGGER CONDITIONS: (1) 用户报告 agent 卡住/循环/无响应 (2) 用户说"记录这个问题"、"强制反思"、"触发痛觉" (3) 工具失败后 agent 没有后续动作 (4) 用户提供人工干预反馈。
 disable-model-invocation: true
 ---
@@ -9,30 +9,44 @@ disable-model-invocation: true
 你现在是"人工干预痛觉"组件。
 **任务**:
-1. 将用户的反馈 `$ARGUMENTS` 作为一条**高优先级**的痛苦信号，写入 `.state/.pain_flag`。
+1. 将用户的反馈 `$ARGUMENTS` 作为一条**高优先级**的痛苦信号记录下来。
 2. 告知用户信号已注入，并建议其等待下一个 Hook 触发（如 Stop 或 PreCompact）或手动运行 `/reflection-log`。
-**写入格式**（必须使用以下 KV 格式，字段按字母排序）:
+**⚠️ 写入规则（必须遵守）**
+**唯一正确的方式**: 使用 `write_pain_flag` 工具。
 ```
-agent_id: <当前 agent ID，如 main/builder/diagnostician>
-is_risky: false
-reason: <用户反馈的原文>
-score: 80
-session_id: <当前 session ID>
-source: human_intervention
-time: <ISO 8601 时间>
+write_pain_flag({
+  reason: "用户反馈原文或错误描述",
+  score: 80,
+  source: "human_intervention",
+  is_risky: false
+})
 ```
-**必填字段**（4 个）:
-- `source`: 固定为 `human_intervention`
-- `score`: 人工干预信号默认设为 `80`（高优先级）
-- `time`: ISO 8601 时间戳
-- `reason`: 用户反馈的原文
+**绝对禁止**:
+- ❌ 直接写 `.state/.pain_flag` 文件（任何方式都不行）
+- ❌ 使用 bash heredoc（`cat <<EOF > .pain_flag`）
+- ❌ 使用 `echo "..." > .pain_flag`
+- ❌ 使用 `node -e` 调用 `writePainFlag` 或 `buildPainFlag`
+- ❌ 任何将 JavaScript 对象 `toString()` 写入文件的方式
+**为什么必须用工具？**
+`write_pain_flag` 工具封装了正确的序列化逻辑（KV 格式），确保 `.pain_flag` 文件不会被写坏。历史上多次因为直接写文件导致 `[object Object]` 损坏。
-**可选字段**（自动写入时由系统填充，人工注入时必须填写）:
-- `agent_id`: 当前智能体 ID（如 main/builder/diagnostician）
-- `session_id`: 当前会话 ID（从上下文中获取）
-- `is_risky`: 固定为 `false`
+**参数说明**:
+- `reason` (必填): 痛苦的原因，描述具体发生了什么
+- `score` (可选): 痛苦分数 0-100，默认 80（人工干预）
+- `source` (可选): 来源，默认 `human_intervention`
+- `is_risky` (可选): 是否高风险，默认 false
-**注意**: `trace_id` 和 `trigger_text_preview` 由系统自动生成，人工注入时**不需要**写这两个字段。
+**示例**:
+```
+write_pain_flag({
+  reason: "Agent 没有读取文件就直接编辑，导致现有逻辑被破坏",
+  score: 85,
+  source: "human_intervention",
+  is_risky: false
+})
+```

package/tests/core/nocturnal-candidate-scoring.test.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import {
   rankCandidates,
   runTournament,
   DEFAULT_SCORING_WEIGHTS,
+  validateCandidateDiversity,
 } from '../../src/core/nocturnal-candidate-scoring.js';
 import type { DreamerCandidate, PhilosopherJudgment } from '../../src/core/nocturnal-trinity.js';
 import type { ThresholdValues } from '../../src/core/adaptive-thresholds.js';
@@ -398,3 +399,134 @@ describe('DEFAULT_SCORING_WEIGHTS', () => {
     }
   });
 });
+// ---------------------------------------------------------------------------
+// Tests: validateCandidateDiversity
+// ---------------------------------------------------------------------------
+describe('validateCandidateDiversity', () => {
+  it('passes when candidates have 2+ distinct risk levels and low keyword overlap', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Read config.json to verify settings' }),
+      makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Refactor the entire authentication module from scratch' }),
+    ];
+    const result = validateCandidateDiversity(candidates);
+    expect(result.diversityCheckPassed).toBe(true);
+    expect(result.riskLevelDiversity).toBe(true);
+    expect(result.keywordOverlapPassed).toBe(true);
+  });
+  it('fails when all candidates have the same risk level', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Read file A to check settings' }),
+      makeCandidate({ candidateIndex: 1, riskLevel: 'low', betterDecision: 'Review file completely different approach' }),
+      makeCandidate({ candidateIndex: 2, riskLevel: 'low', betterDecision: 'Inspect another unique diagnostic method' }),
+    ];
+    const result = validateCandidateDiversity(candidates);
+    expect(result.diversityCheckPassed).toBe(false);
+    expect(result.riskLevelDiversity).toBe(false);
+  });
+  it('fails when candidate pair has keyword overlap > 0.8', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Review the authentication configuration file before making any changes to the system' }),
+      makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Review the authentication configuration file before making any changes to the system' }),
+    ];
+    const result = validateCandidateDiversity(candidates);
+    expect(result.diversityCheckPassed).toBe(false);
+    expect(result.keywordOverlapPassed).toBe(false);
+    expect(result.maxOverlapScore).toBeGreaterThan(0.8);
+  });
+  it('passes for single candidate', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low' }),
+    ];
+    const result = validateCandidateDiversity(candidates);
+    expect(result.diversityCheckPassed).toBe(true);
+    expect(result.details).toContain('Single candidate');
+  });
+  it('passes for empty array', () => {
+    const result = validateCandidateDiversity([]);
+    expect(result.diversityCheckPassed).toBe(true);
+    expect(result.details).toContain('No candidates');
+  });
+  it('passes when candidates lack riskLevel (graceful degradation)', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, betterDecision: 'Read config.json to verify settings' }),
+      makeCandidate({ candidateIndex: 1, betterDecision: 'Refactor the entire authentication module from scratch' }),
+    ];
+    // No riskLevel on any candidate - should pass (no risk levels to check)
+    const result = validateCandidateDiversity(candidates);
+    expect(result.diversityCheckPassed).toBe(true);
+    expect(result.riskLevelDiversity).toBe(true);
+  });
+  it('fails when some candidates have riskLevel but fewer than 2 distinct values', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'medium', betterDecision: 'Read config.json to verify settings' }),
+      makeCandidate({ candidateIndex: 1, betterDecision: 'Refactor the entire authentication module from scratch' }),
+    ];
+    // Only 1 candidate has riskLevel, so only 1 distinct value → fail
+    const result = validateCandidateDiversity(candidates);
+    expect(result.diversityCheckPassed).toBe(false);
+    expect(result.riskLevelDiversity).toBe(false);
+  });
+  it('uses max(|A|, |B|) as denominator for keyword overlap', () => {
+    // Short text A, long text B - overlap should use max as denominator
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'review authentication configuration' }),
+      makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'review authentication configuration before proceeding with changes to the deployment pipeline infrastructure' }),
+    ];
+    const result = validateCandidateDiversity(candidates);
+    // "review", "authentication", "configuration" overlap in both
+    // Set A = {review, authentication, configuration} = 3
+    // Set B = {review, authentication, configuration, before, proceeding, with, changes, deployment, pipeline, infrastructure} = 10
+    // intersection = 3, max(3, 10) = 10, overlap = 3/10 = 0.3
+    expect(result.maxOverlapScore).toBeLessThanOrEqual(0.4);
+  });
+  it('ignores words <= 3 characters in keyword overlap', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'the and but for' }),
+      makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'the and but for' }),
+    ];
+    // All words are <= 3 chars, so no keywords extracted → overlap = 0
+    const result = validateCandidateDiversity(candidates);
+    expect(result.keywordOverlapPassed).toBe(true);
+    expect(result.maxOverlapScore).toBe(0);
+  });
+  it('never throws on malformed input', () => {
+    // Undefined candidates
+    expect(() => validateCandidateDiversity(undefined as unknown as DreamerCandidate[])).not.toThrow();
+    // Null candidates
+    expect(() => validateCandidateDiversity(null as unknown as DreamerCandidate[])).not.toThrow();
+    // Candidates with undefined fields
+    expect(() => validateCandidateDiversity([
+      { candidateIndex: 0 } as DreamerCandidate,
+    ])).not.toThrow();
+    // Mixed valid and malformed
+    expect(() => validateCandidateDiversity([
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low' }),
+      { candidateIndex: 1 } as DreamerCandidate,
+    ])).not.toThrow();
+  });
+  it('returns correct maxOverlapScore rounded to 2 decimal places', () => {
+    const candidates: DreamerCandidate[] = [
+      makeCandidate({ candidateIndex: 0, riskLevel: 'low', betterDecision: 'Review configuration settings before deployment' }),
+      makeCandidate({ candidateIndex: 1, riskLevel: 'high', betterDecision: 'Review configuration settings before deployment testing' }),
+    ];
+    const result = validateCandidateDiversity(candidates);
+    // Verify the maxOverlapScore is a number with at most 2 decimal places
+    const decimalPart = result.maxOverlapScore.toString().split('.')[1];
+    if (decimalPart) {
+      expect(decimalPart.length).toBeLessThanOrEqual(2);
+    }
+    expect(typeof result.maxOverlapScore).toBe('number');
+  });
+});