principles-disciple 1.49.0 → 1.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/config/defaults/runtime.ts +1 -4
- package/src/core/correction-cue-learner.ts +9 -6
- package/src/core/init.ts +12 -0
- package/src/core/nocturnal-arbiter.ts +15 -10
- package/src/core/nocturnal-trinity.ts +52 -31
- package/src/core/pain.ts +56 -0
- package/src/core/principle-compiler/ledger-registrar.ts +1 -1
- package/src/core/thinking-models.ts +43 -0
- package/src/hooks/gate-block-helper.ts +2 -1
- package/src/hooks/lifecycle.ts +10 -3
- package/src/hooks/llm.ts +10 -3
- package/src/service/evolution-pain-context.ts +2 -2
- package/src/service/evolution-worker.ts +3 -2
- package/src/service/keyword-optimization-service.ts +1 -1
- package/src/service/nocturnal-runtime.ts +86 -6
- package/src/service/nocturnal-target-selector.ts +1 -1
- package/src/service/queue-io.ts +2 -2
- package/tests/core/nocturnal-arbiter.test.ts +10 -2
- package/tests/tools/write-pain-flag.test.ts +103 -1
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -79,9 +79,6 @@ export const EVOLUTION_WORKER_POLL_INTERVAL_MS = 15 * ONE_MINUTE_MS;
|
|
|
79
79
|
/** Evolution queue batch size */
|
|
80
80
|
export const EVOLUTION_QUEUE_BATCH_SIZE = 10;
|
|
81
81
|
|
|
82
|
-
/** Pain queue dedup window (30 minutes) */
|
|
83
|
-
export const PAIN_QUEUE_DEDUP_WINDOW_MS = 30 * ONE_MINUTE_MS;
|
|
84
|
-
|
|
85
82
|
// ── Session Tracker Settings ───────────────────────────────────────────────────
|
|
86
83
|
|
|
87
84
|
export const SESSION_TOKEN_WARNING_THRESHOLD = 8000;
|
|
@@ -90,7 +87,7 @@ export const SESSION_MAX_IDLE_MS = 30 * ONE_MINUTE_MS;
|
|
|
90
87
|
// ── Event Log Buffer Settings ───────────────────────────────────────────────────
|
|
91
88
|
|
|
92
89
|
export const EVENT_LOG_BUFFER_SIZE = 20;
|
|
93
|
-
export const EVENT_LOG_FLUSH_INTERVAL_MS = 30 *
|
|
90
|
+
export const EVENT_LOG_FLUSH_INTERVAL_MS = 30 * MS_PER_SECOND;
|
|
94
91
|
|
|
95
92
|
// ── Default Busy Timeout ───────────────────────────────────────────────────────
|
|
96
93
|
|
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
CORRECTION_SEED_KEYWORDS,
|
|
22
22
|
MAX_CORRECTION_KEYWORDS,
|
|
23
23
|
} from './correction-types.js';
|
|
24
|
-
import {
|
|
24
|
+
import { checkKeywordOptCooldown, recordKeywordOptRun } from '../service/nocturnal-runtime.js';
|
|
25
25
|
import { atomicWriteFileSync } from '../utils/io.js';
|
|
26
26
|
|
|
27
27
|
const KEYWORD_STORE_FILE = 'correction_keywords.json';
|
|
@@ -266,21 +266,24 @@ export class CorrectionCueLearner {
|
|
|
266
266
|
*/
|
|
267
267
|
canRunKeywordOptimization(): boolean {
|
|
268
268
|
// D-39-12, D-39-13: Per-workspace throttle, 4 calls/day
|
|
269
|
-
|
|
269
|
+
// Uses dedicated keywordOptRunTimestamps array to avoid pollution from regular nocturnal runs (#321)
|
|
270
|
+
const cooldown = checkKeywordOptCooldown(this.stateDir, {
|
|
270
271
|
maxRunsPerWindow: 4,
|
|
271
272
|
quotaWindowMs: 24 * 60 * 60 * 1000,
|
|
272
273
|
});
|
|
273
|
-
return !cooldown.
|
|
274
|
+
return !cooldown.quotaExhausted;
|
|
274
275
|
}
|
|
275
276
|
|
|
276
277
|
/**
|
|
277
278
|
* Records that an optimization was performed.
|
|
278
|
-
* Updates lastOptimizedAt for the store
|
|
279
|
-
*
|
|
279
|
+
* Updates lastOptimizedAt for the store and records the run in the
|
|
280
|
+
* keyword-optimization quota (dedicated from regular nocturnal quota).
|
|
281
|
+
* @throws Error if quota recording fails — caller should propagate
|
|
280
282
|
*/
|
|
281
|
-
recordOptimizationPerformed(): void {
|
|
283
|
+
async recordOptimizationPerformed(): Promise<void> {
|
|
282
284
|
this.store.lastOptimizedAt = new Date().toISOString();
|
|
283
285
|
this.flush();
|
|
286
|
+
await recordKeywordOptRun(this.stateDir);
|
|
284
287
|
}
|
|
285
288
|
|
|
286
289
|
/**
|
package/src/core/init.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { PD_DIRS } from './paths.js';
|
|
|
6
6
|
import { defaultContextConfig } from '../types.js';
|
|
7
7
|
import { loadStore, setPrincipleState, type PrincipleTrainingState } from './principle-training-state.js';
|
|
8
8
|
import { atomicWriteFileSync } from '../utils/io.js';
|
|
9
|
+
import { createDefaultKeywordStore, saveKeywordStore } from './empathy-keyword-matcher.js';
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* Default PROFILE.json content
|
|
@@ -245,6 +246,17 @@ export function ensureStateTemplates(ctx: { logger: PluginLogger }, stateDir: st
|
|
|
245
246
|
fs.copyFileSync(dictTemplate, dictDest);
|
|
246
247
|
ctx.logger.info(`[PD] Initialized pain dictionary in stateDir: ${dictDest} (Lang: ${language})`);
|
|
247
248
|
}
|
|
249
|
+
|
|
250
|
+
// 3. Initialize empathy keyword store for new users
|
|
251
|
+
// loadKeywordStore() creates the file if missing, but we call it explicitly
|
|
252
|
+
// here so the file exists before any agent/workflow tries to read it
|
|
253
|
+
const empathyFile = path.join(stateDir, 'empathy_keywords.json');
|
|
254
|
+
if (!fs.existsSync(empathyFile)) {
|
|
255
|
+
const lang = language === 'zh' || language === 'en' ? language : 'en';
|
|
256
|
+
const store = createDefaultKeywordStore(lang);
|
|
257
|
+
saveKeywordStore(stateDir, store);
|
|
258
|
+
ctx.logger.info(`[PD] Initialized empathy keyword store in stateDir: ${empathyFile}`);
|
|
259
|
+
}
|
|
248
260
|
} catch (err) {
|
|
249
261
|
ctx.logger.error(`[PD] Failed to initialize state templates: ${String(err)}`);
|
|
250
262
|
}
|
|
@@ -128,19 +128,24 @@ function containsPlaceholder(val: string): boolean {
|
|
|
128
128
|
|
|
129
129
|
/**
|
|
130
130
|
* Check if a string contains raw/private content patterns.
|
|
131
|
-
*
|
|
131
|
+
* Only matches actual code syntax or credential literals — not
|
|
132
|
+
* incidental words that happen to appear in code context.
|
|
132
133
|
*/
|
|
133
134
|
function containsRawContent(val: string): boolean {
|
|
134
|
-
//
|
|
135
|
-
const
|
|
136
|
-
/function\s+\w+\s*\(/, // function definitions
|
|
137
|
-
/class\s+\w+/, // class definitions
|
|
138
|
-
/const\s+\w+\s*=/, //
|
|
139
|
-
/
|
|
140
|
-
/
|
|
141
|
-
/
|
|
135
|
+
// Actual code syntax — these are unambiguous and won't appear in normal decision text
|
|
136
|
+
const codePatterns = [
|
|
137
|
+
/function\s+\w+\s*\(/, // function definitions: "function foo("
|
|
138
|
+
/class\s+\w+/, // class definitions: "class Foo"
|
|
139
|
+
/const\s+\w+\s*=/, // const declarations: "const x ="
|
|
140
|
+
/let\s+\w+\s*=/, // let declarations: "let x ="
|
|
141
|
+
/import\s+.+\s+from/, // import statements: "import ... from"
|
|
142
|
+
/export\s+(default\s+)?/, // export statements
|
|
143
|
+
/\bapi[_-]?key\s*=\s*\S+/i, // api_key= or api-key= with a value
|
|
144
|
+
/\bpassword\s*=\s*\S+/i, // password= with a value
|
|
145
|
+
/\bsecret\s*=\s*\S+/i, // secret= with a value
|
|
146
|
+
/\btoken\s*=\s*\S+/i, // token= with a value
|
|
142
147
|
];
|
|
143
|
-
return
|
|
148
|
+
return codePatterns.some((p) => p.test(val));
|
|
144
149
|
}
|
|
145
150
|
|
|
146
151
|
// ---------------------------------------------------------------------------
|
|
@@ -704,6 +704,10 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
704
704
|
return /timeout/i.test(detail) ? 'runtime_timeout' : 'runtime_run_failed';
|
|
705
705
|
}
|
|
706
706
|
|
|
707
|
+
private sleep(ms: number): Promise<void> {
|
|
708
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
709
|
+
}
|
|
710
|
+
|
|
707
711
|
async invokeDreamer(
|
|
708
712
|
snapshot: NocturnalSessionSnapshot,
|
|
709
713
|
principleId: string,
|
|
@@ -808,45 +812,60 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
808
812
|
_config: TrinityConfig
|
|
809
813
|
): Promise<TrinityDraftArtifact | null> {
|
|
810
814
|
this.lastFailureReason = null;
|
|
811
|
-
const runId = `scribe-${randomUUID()}`;
|
|
812
|
-
const sessionFile = this.createSessionFile('scribe');
|
|
813
815
|
const prompt = this.buildScribePrompt(dreamerOutput, philosopherOutput, snapshot, principleId);
|
|
814
816
|
const model = this.resolveModel();
|
|
815
817
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
extraSystemPrompt: NOCTURNAL_SCRIBE_PROMPT,
|
|
822
|
-
config: this.loadFullConfig(),
|
|
823
|
-
provider: model.provider,
|
|
824
|
-
model: model.model,
|
|
825
|
-
timeoutMs: this.stageTimeoutMs,
|
|
826
|
-
runId,
|
|
827
|
-
disableTools: true,
|
|
828
|
-
});
|
|
818
|
+
// Retry up to 2 times on JSON parse / missing-field errors (common LLM output issues)
|
|
819
|
+
const maxAttempts = 3;
|
|
820
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
821
|
+
const runId = `scribe-${randomUUID()}`;
|
|
822
|
+
const sessionFile = this.createSessionFile('scribe');
|
|
829
823
|
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
824
|
+
try {
|
|
825
|
+
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
826
|
+
sessionId: runId,
|
|
827
|
+
sessionFile,
|
|
828
|
+
prompt,
|
|
829
|
+
extraSystemPrompt: NOCTURNAL_SCRIBE_PROMPT,
|
|
830
|
+
config: this.loadFullConfig(),
|
|
831
|
+
provider: model.provider,
|
|
832
|
+
model: model.model,
|
|
833
|
+
timeoutMs: this.stageTimeoutMs,
|
|
834
|
+
runId,
|
|
835
|
+
disableTools: true,
|
|
836
|
+
});
|
|
837
|
+
|
|
838
|
+
const outputText = this.extractPayloadText(result);
|
|
839
|
+
if (!outputText) {
|
|
840
|
+
this.recordFailure('runtime_session_read_failed', 'Scribe returned empty response');
|
|
841
|
+
if (attempt < maxAttempts) { await this.sleep(1000); continue; }
|
|
842
|
+
return null;
|
|
843
|
+
}
|
|
835
844
|
|
|
836
|
-
|
|
837
|
-
|
|
845
|
+
// DEBUG: Log Scribe's actual output
|
|
846
|
+
this.api.logger?.info(`[Trinity:Scribe] Output preview (attempt ${attempt}): ${outputText.slice(0, 800)}`);
|
|
838
847
|
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
848
|
+
const artifact = this.parseScribeOutput(outputText, snapshot, principleId, telemetry);
|
|
849
|
+
if (artifact) return artifact;
|
|
850
|
+
|
|
851
|
+
// JSON parse or missing-field error — retry
|
|
852
|
+
if (attempt < maxAttempts) {
|
|
853
|
+
await this.sleep(1500);
|
|
854
|
+
continue;
|
|
855
|
+
}
|
|
856
|
+
return null;
|
|
857
|
+
} catch (err) {
|
|
858
|
+
this.recordFailure(this.classifyRuntimeError(err), err);
|
|
859
|
+
if (attempt < maxAttempts) { await this.sleep(2000); continue; }
|
|
860
|
+
return null;
|
|
861
|
+
} finally {
|
|
862
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
863
|
+
try { fs.unlinkSync(sessionFile); } catch (err) { this.api.logger?.warn?.(`[Trinity] Failed to delete session file: ${sessionFile}`); }
|
|
864
|
+
}
|
|
846
865
|
}
|
|
866
|
+
return null;
|
|
847
867
|
}
|
|
848
868
|
|
|
849
|
-
|
|
850
869
|
async close(): Promise<void> {
|
|
851
870
|
// Clean up temp directory
|
|
852
871
|
try {
|
|
@@ -857,7 +876,9 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
857
876
|
}
|
|
858
877
|
fs.rmSync(this.tempDir, { recursive: true, force: true });
|
|
859
878
|
}
|
|
860
|
-
} catch {
|
|
879
|
+
} catch (err) {
|
|
880
|
+
this.api.logger?.warn?.(`[Trinity] Session cleanup failed: ${String(err)}`);
|
|
881
|
+
}
|
|
861
882
|
}
|
|
862
883
|
|
|
863
884
|
// ---------------------------------------------------------------------------
|
package/src/core/pain.ts
CHANGED
|
@@ -162,6 +162,62 @@ export function writePainFlag(projectDir: string, painData: PainFlagData): void
|
|
|
162
162
|
atomicWriteFileSync(painFlagPath, serializeKvLines(painData));
|
|
163
163
|
}
|
|
164
164
|
|
|
165
|
+
/**
|
|
166
|
+
* Combined trajectory record + pain flag write.
|
|
167
|
+
*
|
|
168
|
+
* Records the pain event to trajectory first to get the AUTOINCREMENT ID,
|
|
169
|
+
* then builds and writes the pain flag with that ID embedded.
|
|
170
|
+
* This guarantees the pain→principle→compile chain has the exact matching ID.
|
|
171
|
+
*
|
|
172
|
+
* Error handling: if trajectory write fails, continues without pain_event_id.
|
|
173
|
+
* If flag write fails, the error propagates to the caller.
|
|
174
|
+
*/
|
|
175
|
+
export function recordAndWritePainFlag(
|
|
176
|
+
wctx: {
|
|
177
|
+
trajectory?: { recordPainEvent(input: { sessionId: string; source: string; score: number; reason?: string | null; severity?: string | null; origin?: string | null; confidence?: number | null; text?: string }): number } | null;
|
|
178
|
+
workspaceDir: string;
|
|
179
|
+
},
|
|
180
|
+
trajectoryParams: {
|
|
181
|
+
sessionId: string;
|
|
182
|
+
source: string;
|
|
183
|
+
score: number;
|
|
184
|
+
reason?: string | null;
|
|
185
|
+
severity?: string | null;
|
|
186
|
+
origin?: string | null;
|
|
187
|
+
confidence?: number | null;
|
|
188
|
+
text?: string;
|
|
189
|
+
},
|
|
190
|
+
painFlagParams: {
|
|
191
|
+
source: string;
|
|
192
|
+
score: string;
|
|
193
|
+
reason: string;
|
|
194
|
+
session_id?: string;
|
|
195
|
+
agent_id?: string;
|
|
196
|
+
is_risky?: boolean;
|
|
197
|
+
trace_id?: string;
|
|
198
|
+
trigger_text_preview?: string;
|
|
199
|
+
}
|
|
200
|
+
): void {
|
|
201
|
+
const trajectoryPainId = wctx.trajectory?.recordPainEvent({
|
|
202
|
+
sessionId: trajectoryParams.sessionId,
|
|
203
|
+
source: trajectoryParams.source,
|
|
204
|
+
score: trajectoryParams.score,
|
|
205
|
+
reason: trajectoryParams.reason ?? null,
|
|
206
|
+
severity: trajectoryParams.severity ?? null,
|
|
207
|
+
origin: trajectoryParams.origin ?? null,
|
|
208
|
+
confidence: trajectoryParams.confidence ?? null,
|
|
209
|
+
text: trajectoryParams.text,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
const painData = buildPainFlag({
|
|
213
|
+
...painFlagParams,
|
|
214
|
+
pain_event_id:
|
|
215
|
+
trajectoryPainId !== undefined && trajectoryPainId >= 0 ? String(trajectoryPainId) : undefined,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
writePainFlag(wctx.workspaceDir, painData);
|
|
219
|
+
}
|
|
220
|
+
|
|
165
221
|
/**
|
|
166
222
|
* Converts a JSON pain flag object to KV format.
|
|
167
223
|
*/
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Registers a compiled rule into the principle tree ledger:
|
|
5
5
|
* 1. Creates a LedgerRule with type 'gate', enforcement 'block', status 'proposed'
|
|
6
|
-
* 2. Creates an Implementation with type 'code', lifecycleState '
|
|
6
|
+
* 2. Creates an Implementation with type 'code', lifecycleState 'active'
|
|
7
7
|
*
|
|
8
8
|
* IDEMPOTENCY: If the rule already exists, returns existing registration.
|
|
9
9
|
* ROLLBACK: If implementation creation fails after rule creation, attempts cleanup.
|
|
@@ -64,91 +64,134 @@ const BUILTIN_PATTERNS: BuiltinPatternEntry[] = [
|
|
|
64
64
|
{
|
|
65
65
|
id: 'T-01',
|
|
66
66
|
patterns: [
|
|
67
|
+
// Thinking process language (AI reasoning output)
|
|
67
68
|
/let me (first )?(understand|map|outline|survey|review the (structure|architecture|dependencies))/i,
|
|
68
69
|
/before (changing|editing|touching) anything/i,
|
|
69
70
|
/让我先(梳理|理解|看看|盘点).*(结构|架构|依赖|全貌)/i,
|
|
70
71
|
/在执行任何.*/i,
|
|
72
|
+
// Decision directive language (nocturnal artifact betterDecision)
|
|
73
|
+
/review (the |)(docs|architecture|structure|requirements|constraints|dependencies|authorization)/i,
|
|
74
|
+
/read (the |)(docs|architecture|requirements|constraints|logs?|error|stack)/i,
|
|
75
|
+
/check (the |)(docs|architecture|source|authorization|requirements|error)/i,
|
|
76
|
+
/understand (the |)(structure|architecture|requirements|constraints|full (context|picture))/i,
|
|
77
|
+
/first (to|and|,) (verify|check|review|read|understand|confirm|diagnose)/i,
|
|
78
|
+
/(verify|check|review|read|confirm) .+ first/i,
|
|
71
79
|
],
|
|
72
80
|
baselineScenarios: ['exploration', 'discovery'],
|
|
73
81
|
},
|
|
74
82
|
{
|
|
75
83
|
id: 'T-02',
|
|
76
84
|
patterns: [
|
|
85
|
+
// Thinking process language
|
|
77
86
|
/(type|test|contract|schema|interface) (constraint|requirement|check|validation)/i,
|
|
78
87
|
/(必须|需要).*(遵守|符合|满足).*(类型|测试|契约|接口|规范)/i,
|
|
88
|
+
// Decision directive language
|
|
89
|
+
/(trust|follow|respect|adhere to|meet) (the |)(constraint|requirement|contract|interface|specification|schema|type)/i,
|
|
90
|
+
/(check|verify|validate|ensure) (the |)(constraint|requirement|contract|interface|specification|type|schema)/i,
|
|
79
91
|
],
|
|
80
92
|
baselineScenarios: ['constraint-check', 'contract-verification'],
|
|
81
93
|
},
|
|
82
94
|
{
|
|
83
95
|
id: 'T-03',
|
|
84
96
|
patterns: [
|
|
97
|
+
// Thinking process language
|
|
85
98
|
/based on (the |this )?(evidence|logs?|output|error|stack trace|test result)/i,
|
|
86
99
|
/let me (check|verify|confirm|read|look at) (the |)(actual|source|code|file|log)/i,
|
|
87
100
|
/根据(日志|证据|输出|报错|堆栈|测试结果)/i,
|
|
101
|
+
// Decision directive language (evidence-based decision)
|
|
102
|
+
/(verify|check|confirm) (the |)(error|error code|error message|actual|evidence|logs?|output|stack)/i,
|
|
103
|
+
/(error|failure) (logs?|output|stack trace|message|code) (before|then)/i,
|
|
104
|
+
/(look at|read) (the |)(error|logs?|output|stack|actual|evidence|file|code)/i,
|
|
88
105
|
],
|
|
89
106
|
baselineScenarios: ['evidence-gathering', 'verification'],
|
|
90
107
|
},
|
|
91
108
|
{
|
|
92
109
|
id: 'T-04',
|
|
93
110
|
patterns: [
|
|
111
|
+
// Thinking process language
|
|
94
112
|
/this (is|would be) (irreversible|destructive|permanent|not easily undone)/i,
|
|
95
113
|
/(reversible|can be undone|safely roll back)/i,
|
|
96
114
|
/(不可逆|破坏性|无法回滚|可以回滚|安全撤销)/i,
|
|
115
|
+
// Decision directive language
|
|
116
|
+
/(irreversible|destructive|permanent|high impact) (action|change|operation)/i,
|
|
117
|
+
/(must be|need to be|should be) (reviewed|confirmed|verified|checked) (first|before)/i,
|
|
118
|
+
/(get|obtain|ask for) (confirmation|authorization|approval|permission) (first|before)/i,
|
|
97
119
|
],
|
|
98
120
|
baselineScenarios: ['risk-management', 'reversibility'],
|
|
99
121
|
},
|
|
100
122
|
{
|
|
101
123
|
id: 'T-05',
|
|
102
124
|
patterns: [
|
|
125
|
+
// Thinking process language
|
|
103
126
|
/we (must|should) (not|never|avoid|prevent|ensure we don't)/i,
|
|
104
127
|
/(critical|important) (not to|that we don't|to avoid)/i,
|
|
105
128
|
/(绝不能|必须避免|不可|禁止|确保不会)/i,
|
|
129
|
+
// Decision directive language
|
|
130
|
+
/(must|should|need to) (not |never |avoid )?(check|verify|review|read|confirm|ask)/i,
|
|
131
|
+
/(safety|safe) (check|gate|checklist|guard)/i,
|
|
106
132
|
],
|
|
107
133
|
baselineScenarios: ['guardrails', 'safety-rails'],
|
|
108
134
|
},
|
|
109
135
|
{
|
|
110
136
|
id: 'T-06',
|
|
111
137
|
patterns: [
|
|
138
|
+
// Thinking process language
|
|
112
139
|
/(simpl(er|est|ify)|minimal|straightforward|lean) (approach|solution|fix|implementation)/i,
|
|
113
140
|
/(simple is better|keep it simple|no need to over)/i,
|
|
114
141
|
/(最简|更简单|精简|没必要过度设计)/i,
|
|
142
|
+
// Decision directive language
|
|
143
|
+
/(minimal|smallest|narrowest) (change|modification|impact|approach)/i,
|
|
144
|
+
/(only|just) (change|modify|touch|edit|affect) (the |what)/i,
|
|
115
145
|
],
|
|
116
146
|
baselineScenarios: ['simplification', 'pragmatism'],
|
|
117
147
|
},
|
|
118
148
|
{
|
|
119
149
|
id: 'T-07',
|
|
120
150
|
patterns: [
|
|
151
|
+
// Thinking process language
|
|
121
152
|
/(minimal|smallest|narrowest|least) (change|diff|modification|impact)/i,
|
|
122
153
|
/only (change|modify|touch|edit) (the |what)/i,
|
|
123
154
|
/(最小改动|最小变更|只改|只动必要部分)/i,
|
|
155
|
+
// Decision directive language
|
|
156
|
+
/(limit|narrow) (the |)(change|impact|scope|blast radius)/i,
|
|
124
157
|
],
|
|
125
158
|
baselineScenarios: ['minimal-diff', 'blast-radius-control'],
|
|
126
159
|
},
|
|
127
160
|
{
|
|
128
161
|
id: 'T-08',
|
|
129
162
|
patterns: [
|
|
163
|
+
// Thinking process language
|
|
130
164
|
/this (error|failure|issue) (tells us|indicates|signals|suggests|means)/i,
|
|
131
165
|
/let me (stop|pause|step back|reconsider|rethink)/i,
|
|
132
166
|
/这个(错误|失败|问题).*(说明|意味着|提示)/i,
|
|
133
167
|
/让我(停下|暂停|退一步|重新考虑|重新审视)/i,
|
|
168
|
+
// Decision directive language
|
|
169
|
+
/(diagnose|analyze|investigate) (the |)(root |)(cause|reason|error|failure) (first|before)/i,
|
|
170
|
+
/(understand|diagnose|analyze) (the |)(error|failure|issue|problem) (first|before)/i,
|
|
134
171
|
],
|
|
135
172
|
baselineScenarios: ['reflection', 'pain-response'],
|
|
136
173
|
},
|
|
137
174
|
{
|
|
138
175
|
id: 'T-09',
|
|
139
176
|
patterns: [
|
|
177
|
+
// Thinking process language
|
|
140
178
|
/(break|split|decompose|divide) (this |the task |it )?(into|down)/i,
|
|
141
179
|
/(step 1|first,? (we|i|let's)|phase 1)/i,
|
|
142
180
|
/(拆分|分解|分步|分阶段|第一步)/i,
|
|
181
|
+
// Decision directive language
|
|
182
|
+
/(break|split|decompose|divide) (the |)(task|problem|action) (into|into steps)/i,
|
|
143
183
|
],
|
|
144
184
|
baselineScenarios: ['decomposition', 'phased-execution'],
|
|
145
185
|
},
|
|
146
186
|
{
|
|
147
187
|
id: 'T-10',
|
|
148
188
|
patterns: [
|
|
189
|
+
// Thinking process language
|
|
149
190
|
/let me (write|save|record|note down|document)/i,
|
|
150
191
|
/memory.*scratchpad|write.*plan\.md|write.*memory|memory.*persist/i,
|
|
151
192
|
/(让我.*写入|写入.*memory|记录.*scratchpad)/i,
|
|
193
|
+
// Decision directive language
|
|
194
|
+
/(write|save|record|note) (the |)(conclusion|decision|plan|next step) (to|in) (file|plan\.md|scratchpad)/i,
|
|
152
195
|
],
|
|
153
196
|
baselineScenarios: ['memory-persistence', 'state-externalization'],
|
|
154
197
|
},
|
|
@@ -105,7 +105,7 @@ export function recordGateBlockAndReturn(
|
|
|
105
105
|
if (sessionId) {
|
|
106
106
|
const GATE_BLOCK_PAIN_SCORE = 30; // Moderate — not a failure but a blocked intent
|
|
107
107
|
try {
|
|
108
|
-
wctx.trajectory?.recordPainEvent?.({
|
|
108
|
+
const trajectoryPainId = wctx.trajectory?.recordPainEvent?.({
|
|
109
109
|
sessionId,
|
|
110
110
|
source: 'gate_blocked',
|
|
111
111
|
score: GATE_BLOCK_PAIN_SCORE,
|
|
@@ -135,6 +135,7 @@ export function recordGateBlockAndReturn(
|
|
|
135
135
|
session_id: sessionId,
|
|
136
136
|
agent_id: 'main',
|
|
137
137
|
is_risky: false,
|
|
138
|
+
pain_event_id: trajectoryPainId !== undefined && trajectoryPainId >= 0 ? String(trajectoryPainId) : undefined,
|
|
138
139
|
});
|
|
139
140
|
writePainFlag(workspaceDir, flag);
|
|
140
141
|
}
|
package/src/hooks/lifecycle.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as fs from 'fs';
|
|
2
2
|
import * as path from 'path';
|
|
3
3
|
import * as readline from 'readline';
|
|
4
|
-
import {
|
|
4
|
+
import { recordAndWritePainFlag } from '../core/pain.js';
|
|
5
5
|
import { atomicWriteFileSync } from '../utils/io.js';
|
|
6
6
|
import { WorkspaceContext } from '../core/workspace-context.js';
|
|
7
7
|
import { PD_DIRS } from '../core/paths.js';
|
|
@@ -154,7 +154,14 @@ export async function extractPainFromSessionFile(sessionFile: string, ctx: Plugi
|
|
|
154
154
|
|
|
155
155
|
const hasFatal = painPoints.some(p => p.includes('[FATAL INTERCEPT]'));
|
|
156
156
|
if (hasFatal) {
|
|
157
|
-
|
|
157
|
+
recordAndWritePainFlag(wctx, {
|
|
158
|
+
sessionId: ctx.sessionId || 'unknown',
|
|
159
|
+
source: 'intercept_extraction',
|
|
160
|
+
score: 100,
|
|
161
|
+
reason: 'Hard intercept detected in session history compaction.',
|
|
162
|
+
severity: 'severe',
|
|
163
|
+
origin: 'system_infer',
|
|
164
|
+
}, {
|
|
158
165
|
source: 'intercept_extraction',
|
|
159
166
|
score: '100',
|
|
160
167
|
reason: 'Hard intercept detected in session history compaction.',
|
|
@@ -162,7 +169,7 @@ export async function extractPainFromSessionFile(sessionFile: string, ctx: Plugi
|
|
|
162
169
|
trigger_text_preview: painPoints.find(p => p.includes('[FATAL INTERCEPT]'))?.substring(0, 150) || 'Fatal intercept',
|
|
163
170
|
session_id: ctx.sessionId || '',
|
|
164
171
|
agent_id: ctx.agentId || '',
|
|
165
|
-
})
|
|
172
|
+
});
|
|
166
173
|
}
|
|
167
174
|
} catch (err) {
|
|
168
175
|
ctx.logger?.error?.(`[PD:Lifecycle] Failed to write pain signals: ${String(err)}`);
|
package/src/hooks/llm.ts
CHANGED
|
@@ -2,7 +2,7 @@ import * as fs from 'fs';
|
|
|
2
2
|
import * as path from 'path';
|
|
3
3
|
import type { PluginHookLlmOutputEvent, PluginHookAgentContext } from '../openclaw-sdk.js';
|
|
4
4
|
import { trackLlmOutput, recordThinkingCheckpoint, resetFriction } from '../core/session-tracker.js';
|
|
5
|
-
import {
|
|
5
|
+
import { recordAndWritePainFlag } from '../core/pain.js';
|
|
6
6
|
import { normalizeSeverity } from '../core/empathy-types.js';
|
|
7
7
|
import { ControlUiDatabase } from '../core/control-ui-db.js';
|
|
8
8
|
import { DetectionService } from '../core/detection-service.js';
|
|
@@ -234,7 +234,14 @@ export function handleLlmOutput(
|
|
|
234
234
|
const snippet = text.length > 200 ? text.substring(0, 100) + '...' + text.substring(text.length - 100) : text;
|
|
235
235
|
|
|
236
236
|
try {
|
|
237
|
-
|
|
237
|
+
recordAndWritePainFlag(wctx, {
|
|
238
|
+
sessionId: ctx.sessionId || 'unknown',
|
|
239
|
+
source,
|
|
240
|
+
score: painScore,
|
|
241
|
+
reason: matchedReason,
|
|
242
|
+
severity: painScore >= 70 ? 'severe' : painScore >= 40 ? 'moderate' : 'mild',
|
|
243
|
+
origin: 'system_infer',
|
|
244
|
+
}, {
|
|
238
245
|
source,
|
|
239
246
|
score: String(painScore),
|
|
240
247
|
reason: matchedReason,
|
|
@@ -242,7 +249,7 @@ export function handleLlmOutput(
|
|
|
242
249
|
trigger_text_preview: snippet,
|
|
243
250
|
session_id: ctx.sessionId || '',
|
|
244
251
|
agent_id: ctx.agentId || '',
|
|
245
|
-
})
|
|
252
|
+
});
|
|
246
253
|
} catch (e) {
|
|
247
254
|
ctx.logger?.warn?.(`[PD:LLM] Failed to write pain flag: ${String(e)}`);
|
|
248
255
|
}
|
|
@@ -9,6 +9,7 @@ import type { WorkspaceContext } from '../core/workspace-context.js';
|
|
|
9
9
|
import { readPainFlagContract } from '../core/pain.js';
|
|
10
10
|
import type { EvolutionQueueItem } from './evolution-queue-migration.js';
|
|
11
11
|
import type { RecentPainContext } from './evolution-queue-migration.js';
|
|
12
|
+
import { SLEEP_REFLECTION_DEDUP_WINDOW_MS } from './queue-io.js';
|
|
12
13
|
|
|
13
14
|
/**
|
|
14
15
|
* Read recent pain context from PAIN_FLAG file.
|
|
@@ -63,14 +64,13 @@ export function hasRecentSimilarReflection(
|
|
|
63
64
|
painSourceKey: string,
|
|
64
65
|
now: number,
|
|
65
66
|
): EvolutionQueueItem | null {
|
|
66
|
-
const DEDUP_WINDOW_MS = 4 * 60 * 60 * 1000; // 4 hours
|
|
67
67
|
return queue.find((t) => {
|
|
68
68
|
if (t.taskKind !== 'sleep_reflection') return false;
|
|
69
69
|
// Only match completed tasks (exclude failed to allow retries)
|
|
70
70
|
if (t.status !== 'completed') return false;
|
|
71
71
|
if (!t.completed_at) return false;
|
|
72
72
|
const age = now - new Date(t.completed_at).getTime();
|
|
73
|
-
if (age >
|
|
73
|
+
if (age > SLEEP_REFLECTION_DEDUP_WINDOW_MS) return false;
|
|
74
74
|
const taskPainKey = buildPainSourceKey(t.recentPainContext ?? { mostRecent: null, recentPainCount: 0, recentMaxPainScore: 0 });
|
|
75
75
|
// If either side has no pain context, they don't match
|
|
76
76
|
if (!taskPainKey) return false;
|
|
@@ -7,7 +7,7 @@ import * as path from 'path';
|
|
|
7
7
|
import type { OpenClawPluginServiceContext, OpenClawPluginApi, PluginLogger } from '../openclaw-sdk.js';
|
|
8
8
|
import { DictionaryService } from '../core/dictionary-service.js';
|
|
9
9
|
import { DetectionService } from '../core/detection-service.js';
|
|
10
|
-
import { ensureStateTemplates } from '../core/init.js';
|
|
10
|
+
import { ensureStateTemplates, ensureCorePrinciples } from '../core/init.js';
|
|
11
11
|
import { SystemLogger } from '../core/system-logger.js';
|
|
12
12
|
import { WorkspaceContext } from '../core/workspace-context.js';
|
|
13
13
|
import type { EventLog } from '../core/event-log.js';
|
|
@@ -1747,7 +1747,7 @@ async function processEvolutionQueue(wctx: WorkspaceContext, logger: PluginLogge
|
|
|
1747
1747
|
|
|
1748
1748
|
if (parsedResult?.updated) {
|
|
1749
1749
|
koService.applyResult(parsedResult);
|
|
1750
|
-
learner.recordOptimizationPerformed();
|
|
1750
|
+
await learner.recordOptimizationPerformed();
|
|
1751
1751
|
logger?.info?.(`[PD:EvolutionWorker] keyword_optimization applied mutations: ${parsedResult.summary}`);
|
|
1752
1752
|
} else {
|
|
1753
1753
|
logger?.info?.(`[PD:EvolutionWorker] keyword_optimization completed with no updates`);
|
|
@@ -2080,6 +2080,7 @@ export const EvolutionWorkerService: ExtendedEvolutionWorkerService = {
|
|
|
2080
2080
|
const {config} = wctx;
|
|
2081
2081
|
const language = config.get('language') || 'en';
|
|
2082
2082
|
ensureStateTemplates({ logger }, wctx.stateDir, language);
|
|
2083
|
+
ensureCorePrinciples(wctx.stateDir, logger);
|
|
2083
2084
|
|
|
2084
2085
|
const initialDelay = 5000;
|
|
2085
2086
|
const interval = config.get('intervals.worker_poll_ms') || (15 * 60 * 1000);
|
|
@@ -92,7 +92,7 @@ export class KeywordOptimizationService {
|
|
|
92
92
|
learner.recordFalsePositive(term);
|
|
93
93
|
this.logger?.info?.(`[KeywordOptimizationService] FP recorded for term="${term}" (weight x0.8)`);
|
|
94
94
|
} catch (fpErr) {
|
|
95
|
-
this.logger?.warn?.(`[KeywordOptimizationService
|
|
95
|
+
this.logger?.warn?.(`[KeywordOptimizationService] recordFalsePositive failed for term="${term}": ${String(fpErr)}`);
|
|
96
96
|
}
|
|
97
97
|
}
|
|
98
98
|
}
|
|
@@ -24,6 +24,7 @@ import type { SessionState } from '../core/session-tracker.js';
|
|
|
24
24
|
import { listSessions } from '../core/session-tracker.js';
|
|
25
25
|
import { withLockAsync } from '../utils/file-lock.js';
|
|
26
26
|
import { atomicWriteFileSync } from '../utils/io.js';
|
|
27
|
+
import { DEFAULT_IDLE_THRESHOLD_MS, DEFAULT_QUOTA_WINDOW_MS } from '../config/defaults/runtime.js';
|
|
27
28
|
|
|
28
29
|
// ---------------------------------------------------------------------------
|
|
29
30
|
// System Session Detection
|
|
@@ -86,9 +87,6 @@ function isSystemSession(state: SessionState): boolean {
|
|
|
86
87
|
/** File name for nocturnal runtime bookkeeping */
|
|
87
88
|
export const NOCTURNAL_RUNTIME_FILE = 'nocturnal-runtime.json';
|
|
88
89
|
|
|
89
|
-
/** Default idle threshold: workspace is considered idle if no activity for this duration (ms) */
|
|
90
|
-
export const DEFAULT_IDLE_THRESHOLD_MS = 30 * 60 * 1000; // 30 minutes
|
|
91
|
-
|
|
92
90
|
/** Default cooldown between nocturnal runs (ms) */
|
|
93
91
|
export const DEFAULT_GLOBAL_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour
|
|
94
92
|
|
|
@@ -98,9 +96,6 @@ export const DEFAULT_PRINCIPLE_COOLDOWN_MS = 6 * 60 * 60 * 1000; // 6 hours
|
|
|
98
96
|
/** Default maximum nocturnal runs per quota window */
|
|
99
97
|
export const DEFAULT_MAX_RUNS_PER_WINDOW = 3;
|
|
100
98
|
|
|
101
|
-
/** Default quota window size (ms) */
|
|
102
|
-
export const DEFAULT_QUOTA_WINDOW_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
103
|
-
|
|
104
99
|
/** Abandoned session threshold: sessions inactive for longer than this are ignored (ms) */
|
|
105
100
|
export const DEFAULT_ABANDONED_THRESHOLD_MS = 2 * 60 * 60 * 1000; // 2 hours
|
|
106
101
|
|
|
@@ -144,6 +139,12 @@ export interface NocturnalRuntimeState {
|
|
|
144
139
|
*/
|
|
145
140
|
recentRunTimestamps: string[];
|
|
146
141
|
|
|
142
|
+
/**
|
|
143
|
+
* Sliding window of keyword optimization run timestamps.
|
|
144
|
+
* Separate from regular nocturnal runs to avoid quota pollution (fixes #321).
|
|
145
|
+
*/
|
|
146
|
+
keywordOptRunTimestamps: string[];
|
|
147
|
+
|
|
147
148
|
/** Metadata about last run (for debugging) */
|
|
148
149
|
lastRunMeta?: {
|
|
149
150
|
targetPrincipleId?: string;
|
|
@@ -206,6 +207,7 @@ function createDefaultState(): NocturnalRuntimeState {
|
|
|
206
207
|
return {
|
|
207
208
|
principleCooldowns: {},
|
|
208
209
|
recentRunTimestamps: [],
|
|
210
|
+
keywordOptRunTimestamps: [],
|
|
209
211
|
};
|
|
210
212
|
}
|
|
211
213
|
|
|
@@ -225,6 +227,7 @@ export async function readState(stateDir: string): Promise<NocturnalRuntimeState
|
|
|
225
227
|
return {
|
|
226
228
|
principleCooldowns: parsed.principleCooldowns ?? {},
|
|
227
229
|
recentRunTimestamps: parsed.recentRunTimestamps ?? [],
|
|
230
|
+
keywordOptRunTimestamps: parsed.keywordOptRunTimestamps ?? [],
|
|
228
231
|
lastRunAt: parsed.lastRunAt,
|
|
229
232
|
lastSuccessfulRunAt: parsed.lastSuccessfulRunAt,
|
|
230
233
|
globalCooldownUntil: parsed.globalCooldownUntil,
|
|
@@ -248,6 +251,7 @@ export function readStateSync(stateDir: string): NocturnalRuntimeState {
|
|
|
248
251
|
return {
|
|
249
252
|
principleCooldowns: parsed.principleCooldowns ?? {},
|
|
250
253
|
recentRunTimestamps: parsed.recentRunTimestamps ?? [],
|
|
254
|
+
keywordOptRunTimestamps: parsed.keywordOptRunTimestamps ?? [],
|
|
251
255
|
lastRunAt: parsed.lastRunAt,
|
|
252
256
|
lastSuccessfulRunAt: parsed.lastSuccessfulRunAt,
|
|
253
257
|
globalCooldownUntil: parsed.globalCooldownUntil,
|
|
@@ -447,6 +451,81 @@ export function checkCooldown(
|
|
|
447
451
|
};
|
|
448
452
|
}
|
|
449
453
|
|
|
454
|
+
/**
|
|
455
|
+
* Check keyword optimization cooldown using its dedicated timestamp array.
|
|
456
|
+
* Fixes #321: keyword optimization quota must not be polluted by regular nocturnal runs.
|
|
457
|
+
*
|
|
458
|
+
* @param stateDir - State directory
|
|
459
|
+
* @param options - Quota options (default: 4 runs per 24 hours)
|
|
460
|
+
*/
|
|
461
|
+
export function checkKeywordOptCooldown(
|
|
462
|
+
stateDir: string,
|
|
463
|
+
options: {
|
|
464
|
+
maxRunsPerWindow?: number;
|
|
465
|
+
quotaWindowMs?: number;
|
|
466
|
+
} = {}
|
|
467
|
+
): CooldownCheckResult {
|
|
468
|
+
const {
|
|
469
|
+
maxRunsPerWindow = 4,
|
|
470
|
+
quotaWindowMs = 24 * 60 * 60 * 1000,
|
|
471
|
+
} = options;
|
|
472
|
+
|
|
473
|
+
const now = Date.now();
|
|
474
|
+
const state = readStateSync(stateDir);
|
|
475
|
+
|
|
476
|
+
const windowStart = now - quotaWindowMs;
|
|
477
|
+
const recentKeywordOpts: number[] = [];
|
|
478
|
+
for (const ts of state.keywordOptRunTimestamps) {
|
|
479
|
+
const parsed = new Date(ts).getTime();
|
|
480
|
+
if (Number.isNaN(parsed)) {
|
|
481
|
+
console.warn(`[NocturnalRuntime] Malformed timestamp in keywordOptRunTimestamps: "${ts}"`);
|
|
482
|
+
continue;
|
|
483
|
+
}
|
|
484
|
+
if (parsed > windowStart) {
|
|
485
|
+
recentKeywordOpts.push(parsed);
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Keyword optimization uses a dedicated quota (keywordOptRunTimestamps)
|
|
490
|
+
// separate from the regular nocturnal run quota (runStartTimestamps).
|
|
491
|
+
// Global/principle cooldowns from regular nocturnal runs do NOT apply here.
|
|
492
|
+
return {
|
|
493
|
+
globalCooldownActive: false,
|
|
494
|
+
globalCooldownUntil: null,
|
|
495
|
+
globalCooldownRemainingMs: 0,
|
|
496
|
+
principleCooldownActive: false,
|
|
497
|
+
principleCooldownUntil: null,
|
|
498
|
+
principleCooldownRemainingMs: 0,
|
|
499
|
+
quotaExhausted: recentKeywordOpts.length >= maxRunsPerWindow,
|
|
500
|
+
runsRemaining: Math.max(0, maxRunsPerWindow - recentKeywordOpts.length),
|
|
501
|
+
};
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Record a keyword optimization run in its dedicated timestamp array.
|
|
506
|
+
* Does NOT affect regular nocturnal quota tracking.
|
|
507
|
+
*
|
|
508
|
+
* @param stateDir - State directory
|
|
509
|
+
* @param quotaWindowMs - Window size in ms (default: 24 hours)
|
|
510
|
+
*/
|
|
511
|
+
export async function recordKeywordOptRun(
|
|
512
|
+
stateDir: string,
|
|
513
|
+
quotaWindowMs: number = 24 * 60 * 60 * 1000
|
|
514
|
+
): Promise<void> {
|
|
515
|
+
const state = await readState(stateDir);
|
|
516
|
+
const now = new Date().toISOString();
|
|
517
|
+
|
|
518
|
+
state.keywordOptRunTimestamps.push(now);
|
|
519
|
+
|
|
520
|
+
const windowStart = Date.now() - quotaWindowMs;
|
|
521
|
+
state.keywordOptRunTimestamps = state.keywordOptRunTimestamps
|
|
522
|
+
.map(ts => new Date(ts).getTime())
|
|
523
|
+
.filter(ts => ts > windowStart)
|
|
524
|
+
.map(ts => new Date(ts).toISOString());
|
|
525
|
+
|
|
526
|
+
await writeState(stateDir, state);
|
|
527
|
+
}
|
|
528
|
+
|
|
450
529
|
/**
|
|
451
530
|
* Records a cooldown event for quota tracking (keyword_optimization etc.).
|
|
452
531
|
* Adds a timestamp to recentRunTimestamps and prunes entries outside the window.
|
|
@@ -567,6 +646,7 @@ export async function clearAllCooldowns(stateDir: string): Promise<void> {
|
|
|
567
646
|
state.globalCooldownUntil = undefined;
|
|
568
647
|
state.principleCooldowns = {};
|
|
569
648
|
state.recentRunTimestamps = [];
|
|
649
|
+
state.keywordOptRunTimestamps = [];
|
|
570
650
|
state.lastRunMeta = undefined;
|
|
571
651
|
await writeState(stateDir, state);
|
|
572
652
|
}
|
|
@@ -47,9 +47,9 @@ import {
|
|
|
47
47
|
import {
|
|
48
48
|
checkWorkspaceIdle,
|
|
49
49
|
checkCooldown,
|
|
50
|
-
DEFAULT_IDLE_THRESHOLD_MS,
|
|
51
50
|
type IdleCheckResult,
|
|
52
51
|
} from './nocturnal-runtime.js';
|
|
52
|
+
import { DEFAULT_IDLE_THRESHOLD_MS } from '../config/defaults/runtime.js';
|
|
53
53
|
import { detectViolation } from '../core/nocturnal-compliance.js';
|
|
54
54
|
|
|
55
55
|
// ---------------------------------------------------------------------------
|
package/src/service/queue-io.ts
CHANGED
|
@@ -34,7 +34,7 @@ export const LOCK_MAX_RETRIES = 50;
|
|
|
34
34
|
export const LOCK_RETRY_DELAY_MS = 50;
|
|
35
35
|
export const LOCK_STALE_MS = 30_000;
|
|
36
36
|
|
|
37
|
-
export const
|
|
37
|
+
export const SLEEP_REFLECTION_DEDUP_WINDOW_MS = 4 * 60 * 60 * 1000; // 4 hours
|
|
38
38
|
|
|
39
39
|
// ---------------------------------------------------------------------------
|
|
40
40
|
// requireQueueLock — thin wrapper that adds LockUnavailableError
|
|
@@ -129,7 +129,7 @@ function hasRecentSimilarReflection(
|
|
|
129
129
|
if (t.status !== 'completed') return false;
|
|
130
130
|
if (!t.completed_at) return false;
|
|
131
131
|
const age = now - new Date(t.completed_at).getTime();
|
|
132
|
-
if (age >
|
|
132
|
+
if (age > SLEEP_REFLECTION_DEDUP_WINDOW_MS) return false;
|
|
133
133
|
const taskPainKey = buildPainSourceKey(t.recentPainContext ?? { mostRecent: null, recentPainCount: 0, recentMaxPainScore: 0 });
|
|
134
134
|
if (!taskPainKey) return false;
|
|
135
135
|
return taskPainKey === painSourceKey;
|
|
@@ -349,13 +349,21 @@ describe('Nocturnal Arbiter', () => {
|
|
|
349
349
|
expect(result.failures.some(f => f.reason.includes('raw/private content'))).toBe(true);
|
|
350
350
|
});
|
|
351
351
|
|
|
352
|
-
it('rejects betterDecision containing credential pattern', () => {
|
|
353
|
-
|
|
352
|
+
it('rejects betterDecision containing credential assignment pattern', () => {
|
|
353
|
+
// api_key= with an actual value is a credential leak
|
|
354
|
+
const artifact = makeValidArtifact({ betterDecision: 'Check the api_key=ABC123 before proceeding' });
|
|
354
355
|
const result = validateArtifact(artifact);
|
|
355
356
|
expect(result.passed).toBe(false);
|
|
356
357
|
expect(result.failures.some(f => f.reason.includes('raw/private content'))).toBe(true);
|
|
357
358
|
});
|
|
358
359
|
|
|
360
|
+
it('accepts credential-like words without assignment (not raw content)', () => {
|
|
361
|
+
// "api_key" as a word in natural decision text is not a credential leak
|
|
362
|
+
const artifact = makeValidArtifact({ betterDecision: 'Check the api_key before proceeding' });
|
|
363
|
+
const result = validateArtifact(artifact);
|
|
364
|
+
expect(result.passed).toBe(true);
|
|
365
|
+
});
|
|
366
|
+
|
|
359
367
|
it('accepts text without raw content patterns', () => {
|
|
360
368
|
const artifact = makeValidArtifact({
|
|
361
369
|
badDecision: 'Proceeded with editing without reading the file first',
|
|
@@ -1,9 +1,28 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
|
2
2
|
import * as fs from 'fs';
|
|
3
3
|
import * as os from 'os';
|
|
4
4
|
import * as path from 'path';
|
|
5
5
|
import { createWritePainFlagTool } from '../../src/tools/write-pain-flag.js';
|
|
6
6
|
|
|
7
|
+
// ─── Mock state shared between isolated tests ────────────────────────────────
|
|
8
|
+
type MockFn = (...args: unknown[]) => unknown;
|
|
9
|
+
let mockRecordPainEventFn: MockFn | undefined;
|
|
10
|
+
let mockRecordPainEventThrows = false;
|
|
11
|
+
|
|
12
|
+
function makeTrajectoryMock() {
|
|
13
|
+
const fn = vi.fn((...args: unknown[]) => {
|
|
14
|
+
if (mockRecordPainEventThrows) throw new Error('trajectory DB write error');
|
|
15
|
+
return mockRecordPainEventFn ? mockRecordPainEventFn(...args) : 1;
|
|
16
|
+
});
|
|
17
|
+
return { recordPainEvent: fn };
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
vi.mock('../../src/core/trajectory.js', () => ({
|
|
21
|
+
TrajectoryRegistry: {
|
|
22
|
+
get: vi.fn(() => makeTrajectoryMock()),
|
|
23
|
+
},
|
|
24
|
+
}));
|
|
25
|
+
|
|
7
26
|
function safeRmDir(dir: string): void {
|
|
8
27
|
try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
9
28
|
}
|
|
@@ -106,6 +125,8 @@ describe('write_pain_flag tool', () => {
|
|
|
106
125
|
expect(fields.score).toBe('85');
|
|
107
126
|
expect(fields.reason).toBe('Agent forgot to read file before editing');
|
|
108
127
|
expect(fields.time).toBeDefined();
|
|
128
|
+
expect(fields.pain_event_id).toBeDefined();
|
|
129
|
+
expect(fields.pain_event_id).toMatch(/^\d+$/);
|
|
109
130
|
});
|
|
110
131
|
|
|
111
132
|
// ─────────────────────────────────────────────────────────
|
|
@@ -253,4 +274,85 @@ describe('write_pain_flag tool', () => {
|
|
|
253
274
|
expect(text).toContain('manual'); // default source
|
|
254
275
|
expect(text).toContain('No'); // default is_risky
|
|
255
276
|
});
|
|
277
|
+
|
|
278
|
+
// ─────────────────────────────────────────────────────────
|
|
279
|
+
// Graceful degradation: trajectory write failure still writes flag
|
|
280
|
+
// ─────────────────────────────────────────────────────────
|
|
281
|
+
it('writes pain flag even when recordPainEvent throws', async () => {
|
|
282
|
+
// Configure the module-level mock to throw
|
|
283
|
+
mockRecordPainEventThrows = true;
|
|
284
|
+
|
|
285
|
+
const api = createMockApi(workspaceDir) as any;
|
|
286
|
+
const tool = createWritePainFlagTool(api);
|
|
287
|
+
|
|
288
|
+
const result = await tool.execute('test-degradation', {
|
|
289
|
+
reason: 'Pain signal with failing trajectory',
|
|
290
|
+
score: 60,
|
|
291
|
+
source: 'user_empathy',
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Should still succeed (graceful degradation)
|
|
295
|
+
expect(result.content[0].text).toContain('✅');
|
|
296
|
+
expect(result.content[0].text).toContain('60');
|
|
297
|
+
|
|
298
|
+
// Flag file should still exist
|
|
299
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
300
|
+
expect(fs.existsSync(painFlagPath)).toBe(true);
|
|
301
|
+
const content = fs.readFileSync(painFlagPath, 'utf-8');
|
|
302
|
+
expect(content).toContain('Pain signal with failing trajectory');
|
|
303
|
+
|
|
304
|
+
// pain_event_id should NOT be present since trajectory failed
|
|
305
|
+
expect(content).not.toContain('pain_event_id:');
|
|
306
|
+
|
|
307
|
+
// Should have logged a warning
|
|
308
|
+
expect(api._logs.some((l: any) => l.level === 'warn')).toBe(true);
|
|
309
|
+
|
|
310
|
+
// Reset for next test
|
|
311
|
+
mockRecordPainEventThrows = false;
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
// ─────────────────────────────────────────────────────────
|
|
315
|
+
// recordPainEvent called with correct arguments
|
|
316
|
+
// ─────────────────────────────────────────────────────────
|
|
317
|
+
it('calls recordPainEvent with correct arguments from tool params', async () => {
|
|
318
|
+
// Set up the spy result via the module-level mock factory
|
|
319
|
+
mockRecordPainEventFn = vi.fn(() => 42);
|
|
320
|
+
|
|
321
|
+
const api = createMockApi(workspaceDir) as any;
|
|
322
|
+
const tool = createWritePainFlagTool(api);
|
|
323
|
+
|
|
324
|
+
await tool.execute('test-spy', {
|
|
325
|
+
reason: 'Spy test reason',
|
|
326
|
+
score: 55,
|
|
327
|
+
source: 'tool_failure',
|
|
328
|
+
session_id: 'session-spy-001',
|
|
329
|
+
is_risky: true,
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
// Access the spy through the TrajectoryRegistry.get mock
|
|
333
|
+
const { TrajectoryRegistry } = await import('../../src/core/trajectory.js');
|
|
334
|
+
const mockGet = TrajectoryRegistry.get as ReturnType<typeof vi.fn>;
|
|
335
|
+
const mockTraj = mockGet.mock.results.at(-1)!.value as { recordPainEvent: ReturnType<typeof vi.fn> };
|
|
336
|
+
|
|
337
|
+
expect(mockTraj.recordPainEvent).toHaveBeenCalledOnce();
|
|
338
|
+
expect(mockTraj.recordPainEvent).toHaveBeenCalledWith(
|
|
339
|
+
expect.objectContaining({
|
|
340
|
+
sessionId: 'session-spy-001',
|
|
341
|
+
source: 'tool_failure',
|
|
342
|
+
score: 55,
|
|
343
|
+
reason: 'Spy test reason',
|
|
344
|
+
severity: null,
|
|
345
|
+
origin: 'manual',
|
|
346
|
+
confidence: null,
|
|
347
|
+
})
|
|
348
|
+
);
|
|
349
|
+
|
|
350
|
+
// Also verify pain_event_id: 42 appears in the flag file
|
|
351
|
+
const painFlagPath = path.join(stateDir, '.pain_flag');
|
|
352
|
+
const content = fs.readFileSync(painFlagPath, 'utf-8');
|
|
353
|
+
expect(content).toContain('pain_event_id: 42');
|
|
354
|
+
|
|
355
|
+
// Reset for next test
|
|
356
|
+
mockRecordPainEventFn = undefined;
|
|
357
|
+
});
|
|
256
358
|
});
|