principles-disciple 1.73.0 → 1.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/INSTALL.md +1 -3
  2. package/openclaw.plugin.json +1 -1
  3. package/package.json +1 -1
  4. package/src/core/event-log.ts +0 -9
  5. package/src/core/migration.ts +0 -1
  6. package/src/core/path-resolver.ts +0 -1
  7. package/src/core/paths.ts +0 -1
  8. package/src/core/workspace-guidance-migrator.ts +179 -0
  9. package/src/hooks/gate-block-helper.ts +25 -20
  10. package/src/hooks/gate.ts +13 -61
  11. package/src/hooks/prompt.ts +1 -61
  12. package/src/index.ts +8 -12
  13. package/src/types/event-types.ts +0 -1
  14. package/src/utils/io.ts +0 -22
  15. package/templates/langs/en/core/AGENTS.md +5 -5
  16. package/templates/langs/en/core/BOOTSTRAP.md +1 -1
  17. package/templates/langs/en/principles/THINKING_OS.md +4 -3
  18. package/templates/langs/en/skills/admin/SKILL.md +2 -2
  19. package/templates/langs/en/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  20. package/templates/langs/en/skills/evolve-task/SKILL.md +2 -2
  21. package/templates/langs/en/skills/pd-grooming/SKILL.md +1 -1
  22. package/templates/langs/en/skills/pd-mentor/SKILL.md +1 -2
  23. package/templates/langs/en/skills/reflection/SKILL.md +2 -2
  24. package/templates/langs/en/skills/report/SKILL.md +1 -1
  25. package/templates/langs/zh/core/AGENTS.md +5 -5
  26. package/templates/langs/zh/core/BOOTSTRAP.md +1 -1
  27. package/templates/langs/zh/principles/THINKING_OS.md +4 -3
  28. package/templates/langs/zh/skills/admin/SKILL.md +2 -2
  29. package/templates/langs/zh/skills/ai-sprint-orchestration/runtime/.gitignore +2 -2
  30. package/templates/langs/zh/skills/evolve-task/SKILL.md +2 -2
  31. package/templates/langs/zh/skills/pd-grooming/SKILL.md +1 -1
  32. package/templates/langs/zh/skills/pd-mentor/SKILL.md +1 -2
  33. package/templates/langs/zh/skills/reflection/SKILL.md +2 -2
  34. package/templates/langs/zh/skills/report/SKILL.md +1 -1
  35. package/tests/core/migration.test.ts +7 -7
  36. package/tests/core/path-resolver.test.ts +1 -1
  37. package/tests/core/paths-refactor.test.ts +0 -22
  38. package/tests/core/workspace-context.test.ts +2 -2
  39. package/tests/core-anti-growth.test.ts +1 -1
  40. package/tests/hooks/confirm-first-removal.test.ts +188 -0
  41. package/tests/hooks/gate-no-path-write-tool.test.ts +172 -0
  42. package/src/core/confirm-first-gate.ts +0 -255
  43. package/templates/langs/en/skills/plan-script/SKILL.md +0 -32
  44. package/templates/langs/zh/skills/plan-script/SKILL.md +0 -32
  45. package/templates/workspace/PLAN.md +0 -2
  46. package/tests/hooks/confirm-first-gate.test.ts +0 -333
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Regression test: write tools without file_path must still go through RuleHost.
3
+ *
4
+ * PRI-286 P1: After removing confirm-first gate, write tools (apply_patch, patch, etc.)
5
+ * that have no file_path/path/file/target param must NOT be silently allowed.
6
+ * They must use a synthetic path `<tool:${toolName}>` and still evaluate via RuleHost.
7
+ *
8
+ * Uses vi.hoisted + mock of WorkspaceContext to avoid isolation issues in full suite.
9
+ * WorkspaceContext is the key — in full suite, other test files initialize the real
10
+ * context which caches a real EventLogService that doesn't have our mock methods.
11
+ */
12
+
13
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
14
+
15
+ // vi.hoisted ensures these are available to vi.mock factories at hoist time
16
+ const { mockEvaluate, mockEventLog, mockEvolution } = vi.hoisted(() => {
17
+ const mockEvaluate = vi.fn().mockReturnValue(undefined);
18
+ const mockEventLog = {
19
+ recordRuleHostEvaluated: vi.fn(),
20
+ recordRuleEnforced: vi.fn(),
21
+ recordRuleHostBlocked: vi.fn(),
22
+ recordRuleHostRequireApproval: vi.fn(),
23
+ recordRuleHostAutoCorrectProposed: vi.fn(),
24
+ recordRuleHostAutoCorrectApplied: vi.fn(),
25
+ recordGateBlock: vi.fn(),
26
+ recordSession: vi.fn(),
27
+ };
28
+ const mockEvolution = {
29
+ getTier: vi.fn().mockReturnValue(3),
30
+ getPoints: vi.fn().mockReturnValue(200),
31
+ };
32
+ return { mockEvaluate, mockEventLog, mockEvolution };
33
+ });
34
+
35
+ vi.mock('../../src/core/session-tracker.js', () => ({
36
+ getSession: vi.fn(() => ({ currentGfi: 0 })),
37
+ trackBlock: vi.fn(),
38
+ hasRecentThinking: vi.fn(() => false),
39
+ }));
40
+
41
+ vi.mock('../../src/core/evolution-engine.js', () => ({
42
+ getEvolutionEngine: vi.fn(() => mockEvolution),
43
+ }));
44
+
45
+ vi.mock('../../src/core/event-log.js', () => ({
46
+ EventLogService: { get: vi.fn(() => mockEventLog) },
47
+ }));
48
+
49
+ vi.mock('../../src/core/rule-host.js', () => ({
50
+ RuleHost: vi.fn(function(this: any, _stateDir: string, _logger: any) {
51
+ this.evaluate = mockEvaluate;
52
+ }),
53
+ }));
54
+
55
+ vi.mock('../../src/core/principle-tree-ledger.js', () => ({
56
+ loadLedger: vi.fn(),
57
+ listImplementationsByLifecycleState: vi.fn(() => []),
58
+ }));
59
+
60
+ // Mock WorkspaceContext to return a controlled instance with our mockEventLog.
61
+ // This prevents full-suite caching of real WorkspaceContext instances.
62
+ vi.mock('../../src/core/workspace-context.js', () => {
63
+ return {
64
+ WorkspaceContext: {
65
+ fromHookContext: vi.fn((ctx: any) => ({
66
+ workspaceDir: ctx.workspaceDir,
67
+ stateDir: ctx.workspaceDir + '/.state',
68
+ eventLog: mockEventLog,
69
+ trajectory: {
70
+ recordGateBlock: vi.fn(),
71
+ recordPainEvent: vi.fn(),
72
+ recordSession: vi.fn(),
73
+ },
74
+ config: {
75
+ get: vi.fn().mockReturnValue(undefined),
76
+ },
77
+ })),
78
+ },
79
+ };
80
+ });
81
+
82
+ // Dynamic import AFTER mocks are set up
83
+ const { handleBeforeToolCall } = await import('../../src/hooks/gate.js');
84
+
85
+ const workspaceDir = '/mock/workspace';
86
+ const sessionId = 'test-no-path';
87
+
88
+ describe('Write tools without file_path must go through RuleHost', () => {
89
+ beforeEach(() => {
90
+ vi.clearAllMocks();
91
+ mockEvaluate.mockReturnValue(undefined);
92
+ });
93
+
94
+ it('apply_patch with no path triggers RuleHost evaluate', () => {
95
+ mockEvaluate.mockReturnValue(undefined); // allow
96
+
97
+ const result = handleBeforeToolCall(
98
+ { toolName: 'apply_patch', params: { patch: 'some diff content' } } as any,
99
+ { workspaceDir, sessionId } as any,
100
+ );
101
+
102
+ // Should not be blocked (RuleHost returned undefined = allow)
103
+ expect(result).toBeUndefined();
104
+ // But RuleHost MUST have been called
105
+ expect(mockEvaluate).toHaveBeenCalledTimes(1);
106
+ // Verify synthetic path was used
107
+ const input = mockEvaluate.mock.calls[0][0];
108
+ expect(input.action.normalizedPath).toBe('<tool:apply_patch>');
109
+ });
110
+
111
+ it('apply_patch with no path: RuleHost block must return block', () => {
112
+ mockEvaluate.mockReturnValue({
113
+ decision: 'block',
114
+ matched: true,
115
+ reason: 'Test block: write tool without path',
116
+ ruleId: 'R_TEST',
117
+ principleId: 'P_TEST',
118
+ });
119
+
120
+ const result = handleBeforeToolCall(
121
+ { toolName: 'apply_patch', params: { patch: 'dangerous content' } } as any,
122
+ { workspaceDir, sessionId } as any,
123
+ );
124
+
125
+ expect(result).toBeDefined();
126
+ expect(result?.block).toBe(true);
127
+ expect(result?.blockReason).toContain('Test block: write tool without path');
128
+ expect(mockEvaluate).toHaveBeenCalledTimes(1);
129
+ expect(mockEvaluate.mock.calls[0][0].action.normalizedPath).toBe('<tool:apply_patch>');
130
+ });
131
+
132
+ it('patch tool with no path triggers RuleHost evaluate', () => {
133
+ mockEvaluate.mockReturnValue(undefined); // allow
134
+
135
+ const result = handleBeforeToolCall(
136
+ { toolName: 'patch', params: {} } as any,
137
+ { workspaceDir, sessionId } as any,
138
+ );
139
+
140
+ expect(result).toBeUndefined();
141
+ expect(mockEvaluate).toHaveBeenCalledTimes(1);
142
+ expect(mockEvaluate.mock.calls[0][0].action.normalizedPath).toBe('<tool:patch>');
143
+ });
144
+
145
+ it('Write tool with valid file_path still uses real path', () => {
146
+ mockEvaluate.mockReturnValue(undefined); // allow
147
+
148
+ const result = handleBeforeToolCall(
149
+ { toolName: 'write', params: { file_path: '/mock/workspace/src/app.ts', content: 'x' } } as any,
150
+ { workspaceDir, sessionId } as any,
151
+ );
152
+
153
+ expect(result).toBeUndefined();
154
+ expect(mockEvaluate).toHaveBeenCalledTimes(1);
155
+ expect(mockEvaluate.mock.calls[0][0].action.normalizedPath).toBe('src/app.ts');
156
+ });
157
+
158
+ it('bash with no file target still goes through RuleHost (existing behavior)', () => {
159
+ mockEvaluate.mockReturnValue(undefined); // allow
160
+
161
+ const result = handleBeforeToolCall(
162
+ { toolName: 'bash', params: { command: 'echo hello' } } as any,
163
+ { workspaceDir, sessionId } as any,
164
+ );
165
+
166
+ expect(result).toBeUndefined();
167
+ expect(mockEvaluate).toHaveBeenCalledTimes(1);
168
+ // Bash without file target uses the full command as path (existing heuristic)
169
+ const input = mockEvaluate.mock.calls[0][0];
170
+ expect(input.action.normalizedPath).toContain('echo hello');
171
+ });
172
+ });
@@ -1,255 +0,0 @@
1
- /**
2
- * Confirm-First Gate
3
- *
4
- * Hard enforcement for confirm-first Runtime V2 prompt activations.
5
- * When an owner-approved activation requires confirmation before coding,
6
- * this gate blocks mutating tools until the session has explicit owner approval.
7
- *
8
- * This is NOT a replacement for prompt injection — it's a hard fallback
9
- * for models that don't follow system prompt behavioral directives.
10
- *
11
- * Flow:
12
- * 1. Prompt hook (before_prompt_build) detects confirm-first directive and caches state
13
- * 2. Prompt hook detects user approval language and marks session approved
14
- * 3. Gate hook (before_tool_call) checks cached state synchronously
15
- */
16
-
17
- import { BASH_TOOLS_SET, WRITE_TOOLS } from '../constants/tools.js';
18
- import { SqliteConfirmFirstStateStore } from '@principles/core/runtime-v2';
19
-
20
- /** Per-session confirm-first state */
21
- interface ConfirmFirstSessionState {
22
- active: boolean;
23
- principleId?: string;
24
- }
25
-
26
- /** Size cap to prevent memory leaks from abandoned sessions */
27
- const MAX_SESSION_ENTRIES = 500;
28
-
29
- // TODO(PRI-268): stale directive cleanup
30
- const sessionDirectiveState = new Map<string, ConfirmFirstSessionState>();
31
- // TODO(PRI-267): per-task approval scope
32
- const sessionApprovalState = new Map<string, boolean>();
33
-
34
- let confirmFirstStore: SqliteConfirmFirstStateStore | null = null;
35
-
36
- export function setConfirmFirstStore(store: SqliteConfirmFirstStateStore | null): void {
37
- confirmFirstStore = store;
38
- }
39
-
40
- function evictOldestIfFull(map: Map<string, unknown>): void {
41
- if (map.size >= MAX_SESSION_ENTRIES) {
42
- const firstKey = map.keys().next().value;
43
- if (firstKey !== undefined) map.delete(firstKey);
44
- }
45
- }
46
-
47
- export interface ConfirmFirstGateResult {
48
- action: 'allow' | 'block' | 'skip';
49
- reason?: string;
50
- nextAction?: string;
51
- principleId?: string;
52
- }
53
-
54
- /**
55
- * Check if a tool is mutating (write, edit, delete, or mutating exec).
56
- */
57
- function isMutatingTool(toolName: string, params?: Record<string, unknown>): boolean {
58
- // Direct write/edit/delete tools are always mutating
59
- if (WRITE_TOOLS.has(toolName)) return true;
60
-
61
- // For exec/bash, only mutating if the command content is mutating
62
- if (BASH_TOOLS_SET.has(toolName)) {
63
- const command = String(params?.command || params?.args || '');
64
- if (!command) return false;
65
- return />\s*|>>\s*|\brm\b|\bmv\b|\bmkdir\b|\btouch\b|\bcp\s|\bsed\s+-i|\bchmod\b|\bchown\b|\bdel\s|\bRemove-Item\b|\bSet-Content\b|\bOut-File\b|\bNew-Item\b/.test(command);
66
- }
67
-
68
- return false;
69
- }
70
-
71
- /**
72
- * Detect if user message contains clear approval language.
73
- * Rejects negated forms (e.g., "don't proceed", "不同意", "确认一下").
74
- */
75
- export function detectApprovalMarker(message: string): boolean {
76
- const trimmed = message.trim();
77
-
78
- // Negation prefixes — reject if present before approval keywords
79
- const zhNegation = /不|别|暂不|先不|无法|不能|没准备好|还没|尚未/;
80
- const enNegation = /don'?t|not\s+ready|can'?t|won'?t|stop|hold|cannot|isn'?t|aren'?t|haven'?t|shouldn'?t/i;
81
-
82
- // Single-word Chinese markers require exact match (the word alone, not embedded in a sentence)
83
- const zhExactMarkers = /^(?:确认|批准|同意|执行吧|开始执行)$/;
84
- // Multi-word Chinese markers
85
- const zhPhraseMarkers = /按计划执行|可以执行|就这么做|去执行|照.*做|没问题.*执行/;
86
-
87
- // English markers — unambiguous single-word approvals only
88
- const enMarkers = /\bapproved\b|\bgo\s*ahead\b|\blgtm\b/i;
89
- // English phrase markers — require explicit approval context
90
- const enPhraseMarkers = /\byes,?\s*(do\s+it|proceed|execute)\b|\bdo\s+it\b|\bproceed\s+with\s+the\s+plan\b|\bexecute\s+the\s+plan\b|\bplease\s+proceed\s+with\s+the\s+plan\b/i;
91
-
92
- // Check Chinese
93
- if (zhExactMarkers.test(trimmed) || zhPhraseMarkers.test(trimmed)) {
94
- // Reject if negation prefix present
95
- if (zhNegation.test(trimmed)) return false;
96
- return true;
97
- }
98
-
99
- // Check English
100
- if (enMarkers.test(trimmed) || enPhraseMarkers.test(trimmed)) {
101
- if (enNegation.test(trimmed)) return false;
102
- return true;
103
- }
104
-
105
- return false;
106
- }
107
-
108
- /**
109
- * Set confirm-first directive state for a session (called from prompt hook).
110
- */
111
- export function setConfirmFirstDirective(
112
- sessionId: string,
113
- active: boolean,
114
- principleId?: string,
115
- ): void {
116
- evictOldestIfFull(sessionDirectiveState);
117
- sessionDirectiveState.set(sessionId, { active, principleId });
118
- if (confirmFirstStore) {
119
- try {
120
- confirmFirstStore.upsertDirective(sessionId, active, principleId ?? null);
121
- } catch (storeErr) {
122
- console.warn(`[PD:ConfirmFirst] Store write failed for directive (session=${sessionId}), degraded to cache-only: ${String(storeErr)}`);
123
- }
124
- }
125
- }
126
-
127
- /**
128
- * Mark a session as approved (called from prompt hook when approval detected).
129
- */
130
- export function setConfirmFirstApproval(sessionId: string): void {
131
- evictOldestIfFull(sessionApprovalState);
132
- sessionApprovalState.set(sessionId, true);
133
- if (confirmFirstStore) {
134
- try {
135
- confirmFirstStore.upsertApproval(sessionId);
136
- } catch (storeErr) {
137
- console.warn(`[PD:ConfirmFirst] Store write failed for approval (session=${sessionId}), degraded to cache-only: ${String(storeErr)}`);
138
- }
139
- }
140
- }
141
-
142
- /**
143
- * Synchronous gate evaluation — checks cached state only.
144
- * Called from before_tool_call hook (must be synchronous).
145
- */
146
- export function evaluateConfirmFirstGateSync(
147
- sessionId: string | undefined,
148
- toolName: string,
149
- params: Record<string, unknown> | undefined,
150
- ): ConfirmFirstGateResult {
151
- if (!sessionId) return { action: 'skip' };
152
-
153
- // 1. Check if session is already approved
154
- if (sessionApprovalState.get(sessionId)) {
155
- return { action: 'allow' };
156
- }
157
-
158
- // 2. Check if confirm-first directive is active for this session
159
- const directive = sessionDirectiveState.get(sessionId);
160
- if (!directive?.active) {
161
- return { action: 'skip' };
162
- }
163
-
164
- // 3. Check if tool is mutating
165
- if (!isMutatingTool(toolName, params)) {
166
- return { action: 'allow' };
167
- }
168
-
169
- // 4. Block: mutating tool with active confirm-first and no approval
170
- return {
171
- action: 'block',
172
- reason: 'confirm_first_required',
173
- nextAction:
174
- 'Summarize requirements, list ambiguities, propose a plan, and wait for explicit owner approval before mutating files.',
175
- principleId: directive.principleId,
176
- };
177
- }
178
-
179
- /**
180
- * Reset state for a session (e.g., on /reset).
181
- */
182
- export function resetConfirmFirst(sessionId: string): void {
183
- sessionDirectiveState.delete(sessionId);
184
- sessionApprovalState.delete(sessionId);
185
- if (confirmFirstStore) {
186
- try {
187
- confirmFirstStore.deleteState(sessionId);
188
- } catch (storeErr) {
189
- console.warn(`[PD:ConfirmFirst] Store delete failed for session=${sessionId}: ${String(storeErr)}`);
190
- }
191
- }
192
- }
193
-
194
- /**
195
- * Check if a session has been approved (for testing).
196
- */
197
- export function isSessionApproved(sessionId: string): boolean {
198
- return sessionApprovalState.get(sessionId) === true;
199
- }
200
-
201
- /**
202
- * Check if a session has an active directive (for testing).
203
- */
204
- export function hasActiveDirective(sessionId: string): boolean {
205
- return sessionDirectiveState.get(sessionId)?.active === true;
206
- }
207
-
208
- /**
209
- * Clear all state (for testing).
210
- */
211
- export function clearAllConfirmFirstState(): void {
212
- sessionDirectiveState.clear();
213
- sessionApprovalState.clear();
214
- if (confirmFirstStore) {
215
- try {
216
- confirmFirstStore.deleteAllState();
217
- } catch (storeErr) {
218
- console.warn(`[PD:ConfirmFirst] Store clearAll failed: ${String(storeErr)}`);
219
- }
220
- }
221
- }
222
-
223
- export function hydrateFromStore(sessionId: string): void {
224
- if (!confirmFirstStore) return;
225
- if (sessionDirectiveState.has(sessionId)) return;
226
-
227
- try {
228
- const record = confirmFirstStore.getState(sessionId);
229
- if (!record) return;
230
-
231
- evictOldestIfFull(sessionDirectiveState);
232
- sessionDirectiveState.set(sessionId, {
233
- active: record.directiveActive,
234
- principleId: record.directivePrincipleId ?? undefined,
235
- });
236
-
237
- if (record.approvalActive) {
238
- evictOldestIfFull(sessionApprovalState);
239
- sessionApprovalState.set(sessionId, true);
240
- }
241
- } catch (storeErr) {
242
- console.warn(`[PD:ConfirmFirst] Store hydration failed for session=${sessionId}: ${String(storeErr)}`);
243
- }
244
- }
245
-
246
- export function pruneStoreStaleRows(): number {
247
- if (!confirmFirstStore) return 0;
248
- try {
249
- return confirmFirstStore.pruneStaleRows();
250
- } catch (storeErr) {
251
- console.warn(`[PD:ConfirmFirst] Store pruning failed: ${String(storeErr)}`);
252
- return 0;
253
- }
254
- }
255
-
@@ -1,32 +0,0 @@
1
- ---
2
- name: plan-script
3
- description: Create a step-by-step movie-script style execution plan. Includes target files, verification metrics, and rollback strategy.
4
- disable-model-invocation: true
5
- ---
6
-
7
- # Plan Script
8
-
9
- **Goal**: Produce a "foolproof" executable plan to ensure controlled execution.
10
-
11
- Please generate plan in the following structure:
12
-
13
- ## 1. Target Files (Authorization List)
14
- - List file paths **uniquely authorized** for modification in this plan.
15
- - Format: `- path/to/file`
16
-
17
- ## 2. Steps (Execution Steps)
18
- 1. Operations specific to filenames and tool calls.
19
- 2. Each step includes expected intermediate state.
20
-
21
- ## 3. Metrics (Verification Metrics)
22
- - How to quantitatively prove this plan succeeded? (e.g., tests pass, command returns 0, specific string appears in logs).
23
-
24
- ## 4. Active Mental Models
25
- - Select exactly **2** meta-cognitive models from `.principles/THINKING_OS.md` that are most relevant to the current task.
26
- - Format: `- [T-0X] Model Name: Why is it needed for this specific task?`
27
-
28
- ## 5. Rollback (Rollback Strategy)
29
- - If step 2 fails, how to one-click restore to safe state?
30
-
31
- ---
32
- **Action**: Update above content to `PLAN.md` and set `STATUS: READY`.
@@ -1,32 +0,0 @@
1
- ---
2
- name: plan-script
3
- description: Create a step-by-step movie-script style execution plan. Includes target files, verification metrics, and rollback strategy.
4
- disable-model-invocation: true
5
- ---
6
-
7
- # Plan Script (计划编排)
8
-
9
- **目标**: 产生一份“傻瓜式”可执行计划,确保执行过程受控。
10
-
11
- 请按以下结构生成计划:
12
-
13
- ## 1. Target Files (授权清单)
14
- - 列出本次计划**唯一授权**修改的文件路径。
15
- - 格式:`- path/to/file`
16
-
17
- ## 2. Steps (执行步骤)
18
- 1. 具体到文件名和工具调用的操作。
19
- 2. 每个步骤包含预期的中间状态。
20
-
21
- ## 3. Metrics (验证指标)
22
- - 如何量化证明本计划成功了?(如:测试通过、命令返回 0、日志出现特定字符串)。
23
-
24
- ## 4. Active Mental Models (激活的思维模型)
25
- - 从 `.principles/THINKING_OS.md` 中挑选 **2 个** 最适合当前任务的元认知模型。
26
- - 格式:`- [T-0X] 模型名称:为什么在这个任务中需要它?`
27
-
28
- ## 5. Rollback (回滚方案)
29
- - 如果步骤 2 失败,如何一键恢复到安全状态?
30
-
31
- ---
32
- **动作**: 请将以上内容更新至 `PLAN.md`,并设置 `STATUS: READY`。
@@ -1,2 +0,0 @@
1
- STATUS: DRAFT
2
- Steps...