principles-disciple 1.17.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/src/core/nocturnal-compliance.ts +1 -0
- package/src/core/nocturnal-trinity.ts +463 -140
- package/src/service/evolution-worker.ts +13 -6
- package/src/service/nocturnal-target-selector.ts +9 -2
- package/templates/langs/zh/skills/ai-sprint-orchestration/references/specs/nocturnal-trinity-quality-enhancement.json +111 -0
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/lib/task-specs.mjs +1 -1
- package/templates/langs/zh/skills/ai-sprint-orchestration/scripts/run.mjs +1 -1
- package/tests/service/evolution-worker.nocturnal.test.ts +0 -547
|
@@ -23,10 +23,16 @@
|
|
|
23
23
|
* RUNTIME ADAPTER:
|
|
24
24
|
* - useStubs=true: uses synchronous stub implementations (no external calls)
|
|
25
25
|
* - useStubs=false: requires a TrinityRuntimeAdapter for real subagent execution
|
|
26
|
-
* - Adapter uses
|
|
26
|
+
* - Adapter uses api.runtime.agent.runEmbeddedPiAgent() which works in background contexts
|
|
27
|
+
* (unlike api.runtime.subagent.* which requires gateway request scope)
|
|
28
|
+
* - IMPORTANT: provider and model must be passed explicitly — runEmbeddedPiAgent does NOT
|
|
29
|
+
* read config.agents.defaults.model and falls back to openai/gpt-5.4 if not specified
|
|
27
30
|
*/
|
|
28
31
|
|
|
29
32
|
import { randomUUID } from 'crypto';
|
|
33
|
+
import * as fs from 'fs';
|
|
34
|
+
import * as os from 'os';
|
|
35
|
+
import * as path from 'path';
|
|
30
36
|
import type { NocturnalSessionSnapshot } from './nocturnal-trajectory-extractor.js';
|
|
31
37
|
import { computeThinkingModelDelta } from './nocturnal-trajectory-extractor.js';
|
|
32
38
|
import type { TrinityArtificerContext } from './nocturnal-artificer.js';
|
|
@@ -42,6 +48,13 @@ import {
|
|
|
42
48
|
type ThresholdValues,
|
|
43
49
|
} from './adaptive-thresholds.js';
|
|
44
50
|
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
// Configurable Model Fallback (avoid hardcoded strings deep in adapters)
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
const FALLBACK_PROVIDER = process.env.OPENCLAW_DEFAULT_PROVIDER || 'minimax-portal';
|
|
56
|
+
const FALLBACK_MODEL = process.env.OPENCLAW_DEFAULT_MODEL || 'MiniMax-M2.7';
|
|
57
|
+
|
|
45
58
|
// ---------------------------------------------------------------------------
|
|
46
59
|
// Embedded Role Prompts
|
|
47
60
|
// ---------------------------------------------------------------------------
|
|
@@ -106,6 +119,13 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
106
119
|
- Provide a principle-grounded rationale (explicitly references the principle)
|
|
107
120
|
- Include a confidence score (0.0-1.0, higher = more confident)
|
|
108
121
|
|
|
122
|
+
### betterDecision FORMAT — Must be executable:
|
|
123
|
+
- MUST start with a concrete action verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug
|
|
124
|
+
- MUST reference a specific, concrete target (file, command, config, etc.)
|
|
125
|
+
- MUST describe a bounded, executable action — not a vague principle
|
|
126
|
+
- Examples: "Read the file before editing to verify current content", "Check user permissions before executing privileged commands"
|
|
127
|
+
- Anti-examples: "Per T-01, pause all tasks..." (starts with "Per"), "Be more careful" (vague verb "be")
|
|
128
|
+
|
|
109
129
|
### Candidates should DIFFER from each other:
|
|
110
130
|
- Different candidates should represent genuinely different approaches
|
|
111
131
|
- Do not generate candidates with identical betterDecisions
|
|
@@ -177,13 +197,23 @@ You MUST respond with ONLY a valid JSON object. No markdown, no explanation, no
|
|
|
177
197
|
## Evaluation Criteria
|
|
178
198
|
|
|
179
199
|
### Score Components (0-1 scale each):
|
|
180
|
-
1. **Principle Alignment** (weight: 0.
|
|
181
|
-
2. **Specificity** (weight: 0.
|
|
182
|
-
3. **Actionability** (weight: 0.
|
|
200
|
+
1. **Principle Alignment** (weight: 0.35) — Does the betterDecision properly reflect the target principle?
|
|
201
|
+
2. **Specificity** (weight: 0.25) — Is badDecision specific? Is betterDecision actionable?
|
|
202
|
+
3. **Actionability** (weight: 0.25) — Does betterDecision describe a specific next step?
|
|
203
|
+
4. **Executability** (weight: 0.15) — Does betterDecision start with a bounded verb (read, check, verify, edit, write, etc.) and reference a concrete target?
|
|
204
|
+
|
|
205
|
+
### Executability Check:
|
|
206
|
+
A betterDecision is executable if it:
|
|
207
|
+
- STARTS with a concrete action verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug
|
|
208
|
+
- References a specific, concrete target (file, command, config, etc.)
|
|
209
|
+
- Describes a bounded, executable action — not a vague principle
|
|
210
|
+
- Examples that PASS: "Read the file before editing", "Check user permissions before executing"
|
|
211
|
+
- Examples that FAIL: "Per T-01, pause all tasks..." (starts with "Per"), "Be more careful" (vague)
|
|
183
212
|
|
|
184
213
|
### Ranking Rules:
|
|
185
214
|
- Candidates are ranked by score (highest = rank 1)
|
|
186
|
-
- Ties broken by: higher principle alignment, then lower candidateIndex
|
|
215
|
+
- Ties broken by: higher executability, then higher principle alignment, then lower candidateIndex
|
|
216
|
+
- If a candidate's betterDecision is NOT executable, penalize its score by 0.2
|
|
187
217
|
|
|
188
218
|
## Validation
|
|
189
219
|
|
|
@@ -294,11 +324,13 @@ export interface TrinityRuntimeAdapter {
|
|
|
294
324
|
* Invoke the Philosopher stage.
|
|
295
325
|
* @param dreamerOutput Dreamer's output
|
|
296
326
|
* @param principleId Target principle ID
|
|
327
|
+
* @param snapshot Session snapshot (for violation evidence)
|
|
297
328
|
* @returns Philosopher output JSON
|
|
298
329
|
*/
|
|
299
330
|
invokePhilosopher(
|
|
300
331
|
_dreamerOutput: DreamerOutput,
|
|
301
|
-
_principleId: string
|
|
332
|
+
_principleId: string,
|
|
333
|
+
_snapshot: NocturnalSessionSnapshot
|
|
302
334
|
): Promise<PhilosopherOutput>;
|
|
303
335
|
|
|
304
336
|
/**
|
|
@@ -334,40 +366,40 @@ export interface TrinityRuntimeAdapter {
|
|
|
334
366
|
|
|
335
367
|
/**
|
|
336
368
|
* OpenClaw-backed Trinity runtime adapter.
|
|
337
|
-
* Uses
|
|
338
|
-
*
|
|
369
|
+
* Uses api.runtime.agent.runEmbeddedPiAgent() which works in background contexts
|
|
370
|
+
* (unlike api.runtime.subagent.* which requires gateway request scope).
|
|
339
371
|
*/
|
|
340
372
|
export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
341
|
-
|
|
373
|
+
|
|
342
374
|
private readonly api: {
|
|
343
375
|
runtime: {
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
376
|
+
agent: {
|
|
377
|
+
runEmbeddedPiAgent: (_opts: {
|
|
378
|
+
sessionId: string;
|
|
379
|
+
sessionFile: string;
|
|
380
|
+
prompt: string;
|
|
348
381
|
extraSystemPrompt?: string;
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
getSessionMessages: (_opts: {
|
|
356
|
-
sessionKey: string;
|
|
357
|
-
limit: number;
|
|
382
|
+
config?: unknown;
|
|
383
|
+
provider?: string;
|
|
384
|
+
model?: string;
|
|
385
|
+
timeoutMs: number;
|
|
386
|
+
runId: string;
|
|
387
|
+
disableTools?: boolean;
|
|
358
388
|
}) => Promise<{
|
|
359
|
-
|
|
389
|
+
payloads?: { isError?: boolean; text?: string }[];
|
|
360
390
|
}>;
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
}) => Promise<void>;
|
|
391
|
+
};
|
|
392
|
+
config?: {
|
|
393
|
+
loadConfig?: () => unknown;
|
|
365
394
|
};
|
|
366
395
|
};
|
|
396
|
+
config?: unknown;
|
|
397
|
+
logger?: { info: (msg: string) => void; warn: (msg: string) => void; error: (msg: string) => void };
|
|
367
398
|
};
|
|
368
399
|
|
|
369
400
|
|
|
370
401
|
private readonly stageTimeoutMs: number;
|
|
402
|
+
private readonly tempDir: string;
|
|
371
403
|
|
|
372
404
|
constructor(
|
|
373
405
|
api: OpenClawTrinityRuntimeAdapter['api'],
|
|
@@ -375,6 +407,106 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
375
407
|
) {
|
|
376
408
|
this.api = api;
|
|
377
409
|
this.stageTimeoutMs = stageTimeoutMs;
|
|
410
|
+
// Cross-platform temp directory for session files
|
|
411
|
+
this.tempDir = path.join(os.tmpdir(), `pd-trinity-${process.pid}`);
|
|
412
|
+
// Clean up any stale temp files from previous crashed runs
|
|
413
|
+
this.cleanupStaleTempDirs();
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* Clean up temp directories from previous crashed runs.
|
|
418
|
+
* Matches pattern pd-trinity-* in the OS temp directory.
|
|
419
|
+
*/
|
|
420
|
+
private cleanupStaleTempDirs(): void {
|
|
421
|
+
try {
|
|
422
|
+
const osTempDir = os.tmpdir();
|
|
423
|
+
if (!fs.existsSync(osTempDir)) return;
|
|
424
|
+
const entries = fs.readdirSync(osTempDir);
|
|
425
|
+
for (const entry of entries) {
|
|
426
|
+
if (entry.startsWith('pd-trinity-') && entry !== path.basename(this.tempDir)) {
|
|
427
|
+
const fullPath = path.join(osTempDir, entry);
|
|
428
|
+
fs.rmSync(fullPath, { recursive: true, force: true });
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
} catch {
|
|
432
|
+
// Non-fatal: stale temp files will be cleaned up eventually
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/**
|
|
437
|
+
* Load the full OpenClaw config (including models.providers).
|
|
438
|
+
*
|
|
439
|
+
* Why: `this.api.config` is the plugin config, not the full OpenClaw config.
|
|
440
|
+
* It does NOT contain `models.providers`, which is needed to resolve provider
|
|
441
|
+
* model definitions. `api.runtime.config.loadConfig()` returns the full config.
|
|
442
|
+
*
|
|
443
|
+
* Fallback: If loadConfig() is unavailable, we return the plugin config.
|
|
444
|
+
* The caller (resolveModel) handles this with a minimax-portal fallback.
|
|
445
|
+
*/
|
|
446
|
+
private loadFullConfig(): Record<string, unknown> | undefined {
|
|
447
|
+
// Try runtime.config.loadConfig() first (available in native plugin context)
|
|
448
|
+
const loadConfig = this.api.runtime?.config?.loadConfig;
|
|
449
|
+
if (loadConfig && typeof loadConfig === 'function') {
|
|
450
|
+
try {
|
|
451
|
+
return loadConfig() as Record<string, unknown> | undefined;
|
|
452
|
+
} catch (err) {
|
|
453
|
+
this.api.logger?.warn?.(`[Trinity] loadConfig() failed, falling back to plugin config: ${err instanceof Error ? err.message : String(err)}`);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
// Fallback: plugin config (limited — won't have models.providers)
|
|
457
|
+
// resolveModel() handles this with a minimax-portal/MiniMax-M2.7 fallback
|
|
458
|
+
return this.api.config as Record<string, unknown> | undefined;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* Resolve the provider and model from the OpenClaw config.
|
|
463
|
+
* runEmbeddedPiAgent does NOT read config.agents.defaults.model —
|
|
464
|
+
* it requires explicit params.provider and params.model.
|
|
465
|
+
*/
|
|
466
|
+
private resolveModel(): { provider: string; model: string } {
|
|
467
|
+
const config = this.loadFullConfig();
|
|
468
|
+
const agents = config?.agents as Record<string, unknown> | undefined;
|
|
469
|
+
const defaults = agents?.defaults as Record<string, unknown> | undefined;
|
|
470
|
+
const modelConfig = defaults?.model;
|
|
471
|
+
|
|
472
|
+
if (typeof modelConfig === 'string' && modelConfig.includes('/')) {
|
|
473
|
+
const parts = modelConfig.split('/');
|
|
474
|
+
return { provider: parts[0], model: parts.slice(1).join('/') };
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
if (modelConfig && typeof modelConfig === 'object') {
|
|
478
|
+
const mc = modelConfig as Record<string, unknown>;
|
|
479
|
+
const primary = mc.primary as string | undefined;
|
|
480
|
+
if (primary && primary.includes('/')) {
|
|
481
|
+
const parts = primary.split('/');
|
|
482
|
+
return { provider: parts[0], model: parts.slice(1).join('/') };
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Last resort fallback — read from env vars to avoid hardcoded strings
|
|
487
|
+
this.api.logger?.warn?.(`[Trinity] Could not resolve model from config, using fallback: ${FALLBACK_PROVIDER}/${FALLBACK_MODEL}`);
|
|
488
|
+
return { provider: FALLBACK_PROVIDER, model: FALLBACK_MODEL };
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Create a valid JSONL session file for runEmbeddedPiAgent.
|
|
493
|
+
*/
|
|
494
|
+
private createSessionFile(stage: string): string {
|
|
495
|
+
if (!fs.existsSync(this.tempDir)) {
|
|
496
|
+
fs.mkdirSync(this.tempDir, { recursive: true });
|
|
497
|
+
}
|
|
498
|
+
return path.join(this.tempDir, `${stage}-${randomUUID()}.jsonl`);
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
/**
|
|
502
|
+
* Extract text from runEmbeddedPiAgent result.
|
|
503
|
+
*/
|
|
504
|
+
private extractPayloadText(result: { payloads?: { isError?: boolean; text?: string }[] }): string {
|
|
505
|
+
return (result.payloads ?? [])
|
|
506
|
+
.filter(p => !p.isError)
|
|
507
|
+
.map(p => p.text?.trim() ?? '')
|
|
508
|
+
.filter(Boolean)
|
|
509
|
+
.join('\n');
|
|
378
510
|
}
|
|
379
511
|
|
|
380
512
|
async invokeDreamer(
|
|
@@ -382,145 +514,162 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
382
514
|
principleId: string,
|
|
383
515
|
maxCandidates: number
|
|
384
516
|
): Promise<DreamerOutput> {
|
|
385
|
-
const
|
|
386
|
-
const
|
|
387
|
-
|
|
517
|
+
const runId = `dreamer-${randomUUID()}`;
|
|
518
|
+
const sessionFile = this.createSessionFile('dreamer');
|
|
388
519
|
const prompt = this.buildDreamerPrompt(snapshot, principleId, maxCandidates);
|
|
520
|
+
const model = this.resolveModel();
|
|
389
521
|
|
|
390
|
-
|
|
391
|
-
const { runId } = await this.api.runtime.subagent.run({
|
|
392
|
-
sessionKey,
|
|
393
|
-
message: prompt,
|
|
394
|
-
extraSystemPrompt: systemPrompt,
|
|
395
|
-
deliver: false,
|
|
396
|
-
});
|
|
522
|
+
this.api.logger?.info(`[Trinity:Dreamer] Using model: ${model.provider}/${model.model}`);
|
|
397
523
|
|
|
398
|
-
|
|
399
|
-
|
|
524
|
+
try {
|
|
525
|
+
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
526
|
+
sessionId: runId,
|
|
527
|
+
sessionFile,
|
|
528
|
+
prompt,
|
|
529
|
+
extraSystemPrompt: NOCTURNAL_DREAMER_PROMPT,
|
|
530
|
+
config: this.loadFullConfig(),
|
|
531
|
+
provider: model.provider,
|
|
532
|
+
model: model.model,
|
|
400
533
|
timeoutMs: this.stageTimeoutMs,
|
|
534
|
+
runId,
|
|
535
|
+
disableTools: true,
|
|
401
536
|
});
|
|
402
537
|
|
|
403
|
-
|
|
538
|
+
const outputText = this.extractPayloadText(result);
|
|
539
|
+
if (!outputText) {
|
|
404
540
|
return {
|
|
405
541
|
valid: false,
|
|
406
542
|
candidates: [],
|
|
407
|
-
reason:
|
|
543
|
+
reason: 'Dreamer returned empty response',
|
|
408
544
|
generatedAt: new Date().toISOString(),
|
|
409
545
|
};
|
|
410
546
|
}
|
|
411
547
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
limit: 5,
|
|
415
|
-
});
|
|
548
|
+
// DEBUG: Log Dreamer's actual output
|
|
549
|
+
this.api.logger?.info(`[Trinity:Dreamer] Output preview: ${outputText.slice(0, 500)}`);
|
|
416
550
|
|
|
417
|
-
const outputText = this.extractAssistantText(messages.messages as { role: string; text?: string; content?: string }[]);
|
|
418
551
|
return this.parseDreamerOutput(outputText);
|
|
552
|
+
} catch (err) {
|
|
553
|
+
return {
|
|
554
|
+
valid: false,
|
|
555
|
+
candidates: [],
|
|
556
|
+
reason: `Dreamer failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
557
|
+
generatedAt: new Date().toISOString(),
|
|
558
|
+
};
|
|
419
559
|
} finally {
|
|
420
|
-
|
|
421
|
-
sessionKey,
|
|
422
|
-
deleteTranscript: true,
|
|
423
|
-
}).catch(() => { /* intentionally empty - fire-and-forget session cleanup */ });
|
|
560
|
+
try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
|
|
424
561
|
}
|
|
425
562
|
}
|
|
426
563
|
|
|
427
564
|
async invokePhilosopher(
|
|
428
565
|
dreamerOutput: DreamerOutput,
|
|
429
|
-
principleId: string
|
|
566
|
+
principleId: string,
|
|
567
|
+
snapshot: NocturnalSessionSnapshot
|
|
430
568
|
): Promise<PhilosopherOutput> {
|
|
431
|
-
const
|
|
432
|
-
const
|
|
433
|
-
|
|
434
|
-
const
|
|
569
|
+
const runId = `philosopher-${randomUUID()}`;
|
|
570
|
+
const sessionFile = this.createSessionFile('philosopher');
|
|
571
|
+
const prompt = this.buildPhilosopherPrompt(dreamerOutput, principleId, snapshot);
|
|
572
|
+
const model = this.resolveModel();
|
|
435
573
|
|
|
436
574
|
try {
|
|
437
|
-
const
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
runId,
|
|
575
|
+
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
576
|
+
sessionId: runId,
|
|
577
|
+
sessionFile,
|
|
578
|
+
prompt,
|
|
579
|
+
extraSystemPrompt: NOCTURNAL_PHILOSOPHER_PROMPT,
|
|
580
|
+
config: this.loadFullConfig(),
|
|
581
|
+
provider: model.provider,
|
|
582
|
+
model: model.model,
|
|
446
583
|
timeoutMs: this.stageTimeoutMs,
|
|
584
|
+
runId,
|
|
585
|
+
disableTools: true,
|
|
447
586
|
});
|
|
448
587
|
|
|
449
|
-
|
|
588
|
+
const outputText = this.extractPayloadText(result);
|
|
589
|
+
if (!outputText) {
|
|
450
590
|
return {
|
|
451
591
|
valid: false,
|
|
452
592
|
judgments: [],
|
|
453
593
|
overallAssessment: '',
|
|
454
|
-
reason:
|
|
594
|
+
reason: 'Philosopher returned empty response',
|
|
455
595
|
generatedAt: new Date().toISOString(),
|
|
456
596
|
};
|
|
457
597
|
}
|
|
458
598
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
limit: 5,
|
|
462
|
-
});
|
|
599
|
+
// DEBUG: Log Philosopher's actual output
|
|
600
|
+
this.api.logger?.info(`[Trinity:Philosopher] Output preview: ${outputText.slice(0, 500)}`);
|
|
463
601
|
|
|
464
|
-
const outputText = this.extractAssistantText(messages.messages as { role: string; text?: string; content?: string }[]);
|
|
465
602
|
return this.parsePhilosopherOutput(outputText);
|
|
603
|
+
} catch (err) {
|
|
604
|
+
return {
|
|
605
|
+
valid: false,
|
|
606
|
+
judgments: [],
|
|
607
|
+
overallAssessment: '',
|
|
608
|
+
reason: `Philosopher failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
609
|
+
generatedAt: new Date().toISOString(),
|
|
610
|
+
};
|
|
466
611
|
} finally {
|
|
467
|
-
|
|
468
|
-
sessionKey,
|
|
469
|
-
deleteTranscript: true,
|
|
470
|
-
}).catch(() => { /* intentionally empty - fire-and-forget session cleanup */ });
|
|
612
|
+
try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
|
|
471
613
|
}
|
|
472
614
|
}
|
|
473
615
|
|
|
474
|
-
|
|
616
|
+
|
|
475
617
|
async invokeScribe(
|
|
476
618
|
dreamerOutput: DreamerOutput,
|
|
477
619
|
philosopherOutput: PhilosopherOutput,
|
|
478
620
|
snapshot: NocturnalSessionSnapshot,
|
|
479
621
|
principleId: string,
|
|
480
622
|
telemetry: TrinityTelemetry,
|
|
481
|
-
|
|
623
|
+
|
|
482
624
|
_config: TrinityConfig
|
|
483
625
|
): Promise<TrinityDraftArtifact | null> {
|
|
484
|
-
const
|
|
485
|
-
const
|
|
486
|
-
|
|
626
|
+
const runId = `scribe-${randomUUID()}`;
|
|
627
|
+
const sessionFile = this.createSessionFile('scribe');
|
|
487
628
|
const prompt = this.buildScribePrompt(dreamerOutput, philosopherOutput, snapshot, principleId);
|
|
629
|
+
const model = this.resolveModel();
|
|
488
630
|
|
|
489
631
|
try {
|
|
490
|
-
const
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
runId,
|
|
632
|
+
const result = await this.api.runtime.agent.runEmbeddedPiAgent({
|
|
633
|
+
sessionId: runId,
|
|
634
|
+
sessionFile,
|
|
635
|
+
prompt,
|
|
636
|
+
extraSystemPrompt: NOCTURNAL_SCRIBE_PROMPT,
|
|
637
|
+
config: this.loadFullConfig(),
|
|
638
|
+
provider: model.provider,
|
|
639
|
+
model: model.model,
|
|
499
640
|
timeoutMs: this.stageTimeoutMs,
|
|
641
|
+
runId,
|
|
642
|
+
disableTools: true,
|
|
500
643
|
});
|
|
501
644
|
|
|
502
|
-
|
|
645
|
+
const outputText = this.extractPayloadText(result);
|
|
646
|
+
if (!outputText) {
|
|
503
647
|
return null;
|
|
504
648
|
}
|
|
505
649
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
limit: 5,
|
|
509
|
-
});
|
|
650
|
+
// DEBUG: Log Scribe's actual output
|
|
651
|
+
this.api.logger?.info(`[Trinity:Scribe] Output preview: ${outputText.slice(0, 800)}`);
|
|
510
652
|
|
|
511
|
-
const outputText = this.extractAssistantText(messages.messages as { role: string; text?: string; content?: string }[]);
|
|
512
653
|
return this.parseScribeOutput(outputText, snapshot, principleId, telemetry);
|
|
654
|
+
} catch (err) {
|
|
655
|
+
return null;
|
|
513
656
|
} finally {
|
|
514
|
-
|
|
515
|
-
sessionKey,
|
|
516
|
-
deleteTranscript: true,
|
|
517
|
-
}).catch(() => { /* intentionally empty - fire-and-forget session cleanup */ });
|
|
657
|
+
try { fs.unlinkSync(sessionFile); } catch { /* ignore */ }
|
|
518
658
|
}
|
|
519
659
|
}
|
|
520
660
|
|
|
521
|
-
|
|
661
|
+
|
|
522
662
|
async close(): Promise<void> {
|
|
523
|
-
//
|
|
663
|
+
// Clean up temp directory
|
|
664
|
+
try {
|
|
665
|
+
if (fs.existsSync(this.tempDir)) {
|
|
666
|
+
const files = fs.readdirSync(this.tempDir);
|
|
667
|
+
for (const file of files) {
|
|
668
|
+
fs.unlinkSync(path.join(this.tempDir, file));
|
|
669
|
+
}
|
|
670
|
+
fs.rmSync(this.tempDir, { recursive: true, force: true });
|
|
671
|
+
}
|
|
672
|
+
} catch { /* ignore cleanup errors */ }
|
|
524
673
|
}
|
|
525
674
|
|
|
526
675
|
// ---------------------------------------------------------------------------
|
|
@@ -528,51 +677,167 @@ export class OpenClawTrinityRuntimeAdapter implements TrinityRuntimeAdapter {
|
|
|
528
677
|
// ---------------------------------------------------------------------------
|
|
529
678
|
|
|
530
679
|
|
|
531
|
-
private extractAssistantText(
|
|
532
|
-
messages: { role: string; text?: string; content?: string }[]
|
|
533
|
-
): string {
|
|
534
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
535
|
-
const msg = messages[i] as { role: string; text?: string; content?: string };
|
|
536
|
-
if (msg.role === 'assistant') {
|
|
537
|
-
return msg.text ?? msg.content ?? '';
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
return '';
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
|
|
544
680
|
private buildDreamerPrompt(
|
|
545
681
|
snapshot: NocturnalSessionSnapshot,
|
|
546
682
|
principleId: string,
|
|
547
683
|
maxCandidates: number
|
|
548
684
|
): string {
|
|
549
|
-
|
|
685
|
+
// Build detailed tool failure list
|
|
686
|
+
const failures = snapshot.toolCalls
|
|
687
|
+
.filter(tc => tc.outcome === 'failure')
|
|
688
|
+
.map(tc => {
|
|
689
|
+
let desc = `- ${tc.toolName}`;
|
|
690
|
+
if (tc.filePath) desc += ` on ${tc.filePath}`;
|
|
691
|
+
desc += ` → FAILED: ${tc.errorMessage || 'unknown error'}`;
|
|
692
|
+
return desc;
|
|
693
|
+
});
|
|
550
694
|
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
695
|
+
// Build detailed pain event list
|
|
696
|
+
const pains = snapshot.painEvents
|
|
697
|
+
.filter(pe => pe.score >= 50)
|
|
698
|
+
.map(pe => `- Pain (score: ${pe.score}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
|
|
699
|
+
|
|
700
|
+
// Build gate block list
|
|
701
|
+
const blocks = snapshot.gateBlocks
|
|
702
|
+
.map(gb => `- Gate blocked ${gb.toolName}: ${gb.reason}`);
|
|
703
|
+
|
|
704
|
+
// Build assistant decision context (last 3 turns max)
|
|
705
|
+
const recentTurns = snapshot.assistantTurns
|
|
706
|
+
.slice(-3)
|
|
707
|
+
.map((t, i) => `[Turn ${i+1}] ${t.sanitizedText.slice(0, 300)}`)
|
|
708
|
+
.join('\n');
|
|
709
|
+
|
|
710
|
+
// Build user correction cues (if any)
|
|
711
|
+
const userCues = snapshot.userTurns
|
|
712
|
+
.filter(ut => ut.correctionDetected)
|
|
713
|
+
.map(ut => `- User correction: ${ut.correctionCue || 'detected'}`)
|
|
714
|
+
.join('\n');
|
|
715
|
+
|
|
716
|
+
const sections = [
|
|
717
|
+
`## Target Principle`,
|
|
718
|
+
`**Principle ID**: ${principleId}`,
|
|
719
|
+
``,
|
|
720
|
+
`## Session Context`,
|
|
721
|
+
`**Session ID**: ${snapshot.sessionId}`,
|
|
722
|
+
``,
|
|
723
|
+
];
|
|
724
|
+
|
|
725
|
+
if (failures.length > 0) {
|
|
726
|
+
sections.push(`## Tool Failures (${failures.length})`);
|
|
727
|
+
sections.push(failures.join('\n'));
|
|
728
|
+
sections.push('');
|
|
729
|
+
}
|
|
558
730
|
|
|
559
|
-
|
|
731
|
+
if (pains.length > 0) {
|
|
732
|
+
sections.push(`## Pain Signals (${pains.length})`);
|
|
733
|
+
sections.push(pains.join('\n'));
|
|
734
|
+
sections.push('');
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
if (blocks.length > 0) {
|
|
738
|
+
sections.push(`## Gate Blocks (${blocks.length})`);
|
|
739
|
+
sections.push(blocks.join('\n'));
|
|
740
|
+
sections.push('');
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
if (recentTurns) {
|
|
744
|
+
sections.push(`## Assistant Decision Context`);
|
|
745
|
+
sections.push(recentTurns);
|
|
746
|
+
sections.push('');
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
if (userCues) {
|
|
750
|
+
sections.push(`## User Corrections`);
|
|
751
|
+
sections.push(userCues);
|
|
752
|
+
sections.push('');
|
|
753
|
+
}
|
|
560
754
|
|
|
561
|
-
|
|
755
|
+
sections.push(`## Task`,
|
|
756
|
+
`Analyze the above session and generate ${maxCandidates} candidate corrections.`,
|
|
757
|
+
`Each candidate must:`,
|
|
758
|
+
`1. Identify a specific bad decision from the session`,
|
|
759
|
+
`2. Propose a concrete better decision grounded in principle ${principleId}`,
|
|
760
|
+
`3. The betterDecision MUST START with a bounded verb: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug`,
|
|
761
|
+
`4. Explain the rationale referencing the principle`,
|
|
762
|
+
``,
|
|
763
|
+
`Respond with ONLY a valid JSON object matching the DreamerOutput contract.`
|
|
764
|
+
);
|
|
765
|
+
|
|
766
|
+
return sections.join('\n');
|
|
562
767
|
}
|
|
563
768
|
|
|
564
769
|
|
|
565
770
|
private buildPhilosopherPrompt(
|
|
566
771
|
dreamerOutput: DreamerOutput,
|
|
567
|
-
principleId: string
|
|
772
|
+
principleId: string,
|
|
773
|
+
snapshot: NocturnalSessionSnapshot
|
|
568
774
|
): string {
|
|
569
775
|
const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
|
|
570
|
-
return `Target Principle: ${principleId}
|
|
571
776
|
|
|
572
|
-
|
|
573
|
-
|
|
777
|
+
// Build violation summary from snapshot for Philosopher to validate candidates
|
|
778
|
+
const failures = snapshot.toolCalls
|
|
779
|
+
.filter(tc => tc.outcome === 'failure')
|
|
780
|
+
.map(tc => `- ${tc.toolName}${tc.filePath ? ` on ${tc.filePath}` : ''} → FAILED: ${tc.errorMessage || 'unknown error'}`);
|
|
781
|
+
|
|
782
|
+
const pains = snapshot.painEvents
|
|
783
|
+
.filter(pe => pe.score >= 50)
|
|
784
|
+
.map(pe => `- Pain (score: ${pe.score}, severity: ${pe.severity || 'N/A'}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
|
|
785
|
+
|
|
786
|
+
const blocks = snapshot.gateBlocks
|
|
787
|
+
.map(gb => `- Gate blocked ${gb.toolName}: ${gb.reason}`);
|
|
788
|
+
|
|
789
|
+
const userCues = snapshot.userTurns
|
|
790
|
+
.filter(ut => ut.correctionDetected)
|
|
791
|
+
.map(ut => `- User correction: ${ut.correctionCue || 'detected'}`);
|
|
792
|
+
|
|
793
|
+
const sections = [
|
|
794
|
+
`## Target Principle`,
|
|
795
|
+
`**Principle ID**: ${principleId}`,
|
|
796
|
+
``,
|
|
797
|
+
`## Session Violation Summary`,
|
|
798
|
+
`**Session ID**: ${snapshot.sessionId}`,
|
|
799
|
+
];
|
|
800
|
+
|
|
801
|
+
if (failures.length > 0) {
|
|
802
|
+
sections.push(`\n### Tool Failures (${failures.length})`);
|
|
803
|
+
sections.push(failures.join('\n'));
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
if (pains.length > 0) {
|
|
807
|
+
sections.push(`\n### Pain Signals (${pains.length})`);
|
|
808
|
+
sections.push(pains.join('\n'));
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
if (blocks.length > 0) {
|
|
812
|
+
sections.push(`\n### Gate Blocks (${blocks.length})`);
|
|
813
|
+
sections.push(blocks.join('\n'));
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
if (userCues.length > 0) {
|
|
817
|
+
sections.push(`\n### User Corrections (${userCues.length})`);
|
|
818
|
+
sections.push(userCues.join('\n'));
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
sections.push(
|
|
822
|
+
``,
|
|
823
|
+
`## Dreamer's Candidates`,
|
|
824
|
+
candidatesJson,
|
|
825
|
+
``,
|
|
826
|
+
`## Task`,
|
|
827
|
+
`Evaluate each candidate against the violation summary above.`,
|
|
828
|
+
`For each candidate:`,
|
|
829
|
+
`1. Is the badDecision accurate — does it match the actual violations in the session?`,
|
|
830
|
+
`2. Is the betterDecision specific and actionable?`,
|
|
831
|
+
`3. Does the betterDecision START with a bounded verb (read, check, verify, edit, write, etc.)?`,
|
|
832
|
+
`4. Does the rationale correctly reference principle ${principleId}?`,
|
|
833
|
+
`5. Is the confidence score justified?`,
|
|
834
|
+
``,
|
|
835
|
+
`**Penalize executability**: If betterDecision does NOT start with a bounded verb, reduce score by 0.2.`,
|
|
836
|
+
``,
|
|
837
|
+
`Respond with ONLY a valid JSON object matching the PhilosopherOutput contract.`
|
|
838
|
+
);
|
|
574
839
|
|
|
575
|
-
|
|
840
|
+
return sections.join('\n');
|
|
576
841
|
}
|
|
577
842
|
|
|
578
843
|
|
|
@@ -584,16 +849,74 @@ Please evaluate each candidate and rank them by principle alignment, specificity
|
|
|
584
849
|
): string {
|
|
585
850
|
const candidatesJson = JSON.stringify(dreamerOutput.candidates, null, 2);
|
|
586
851
|
const judgmentsJson = JSON.stringify(philosopherOutput.judgments, null, 2);
|
|
587
|
-
return `Target Principle: ${principleId}
|
|
588
|
-
Session ID: ${snapshot.sessionId}
|
|
589
852
|
|
|
590
|
-
|
|
591
|
-
|
|
853
|
+
// Build violation evidence for Scribe to ground the final artifact
|
|
854
|
+
const violations: string[] = [];
|
|
855
|
+
|
|
856
|
+
const failures = snapshot.toolCalls.filter(tc => tc.outcome === 'failure');
|
|
857
|
+
for (const tc of failures) {
|
|
858
|
+
violations.push(`- Tool failure: ${tc.toolName}${tc.filePath ? ` on ${tc.filePath}` : ''} → ${tc.errorMessage || 'unknown error'}`);
|
|
859
|
+
}
|
|
592
860
|
|
|
593
|
-
|
|
594
|
-
|
|
861
|
+
const pains = snapshot.painEvents.filter(pe => pe.score >= 50);
|
|
862
|
+
for (const pe of pains) {
|
|
863
|
+
violations.push(`- Pain signal (score: ${pe.score}): ${pe.reason || 'no reason'} [source: ${pe.source}]`);
|
|
864
|
+
}
|
|
595
865
|
|
|
596
|
-
|
|
866
|
+
const blocks = snapshot.gateBlocks;
|
|
867
|
+
for (const gb of blocks) {
|
|
868
|
+
violations.push(`- Gate blocked: ${gb.toolName} → ${gb.reason}`);
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
const sections = [
|
|
872
|
+
`## Target Principle`,
|
|
873
|
+
`**Principle ID**: ${principleId}`,
|
|
874
|
+
``,
|
|
875
|
+
`## Original Violation Evidence`,
|
|
876
|
+
`**Session ID**: ${snapshot.sessionId}`,
|
|
877
|
+
];
|
|
878
|
+
|
|
879
|
+
if (violations.length > 0) {
|
|
880
|
+
sections.push(violations.join('\n'));
|
|
881
|
+
} else {
|
|
882
|
+
sections.push(`(No specific violations found in snapshot)`);
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
sections.push(
|
|
886
|
+
``,
|
|
887
|
+
`## Dreamer's Candidates`,
|
|
888
|
+
candidatesJson,
|
|
889
|
+
``,
|
|
890
|
+
`## Philosopher's Judgments`,
|
|
891
|
+
judgmentsJson,
|
|
892
|
+
``,
|
|
893
|
+
`## Task`,
|
|
894
|
+
`Select the best candidate (Philosopher's rank 1) and synthesize it into a final TrinityDraftArtifact.`,
|
|
895
|
+
`Use the Original Violation Evidence above to ensure your final badDecision and betterDecision`,
|
|
896
|
+
`are grounded in the actual session events, not just Dreamer's interpretation.`,
|
|
897
|
+
``,
|
|
898
|
+
`## CRITICAL: betterDecision Format Requirements`,
|
|
899
|
+
`Your betterDecision MUST pass executability validation. It MUST:`,
|
|
900
|
+
`1. START with a concrete action verb from this list: read, check, verify, edit, write, create, delete, search, grep, find, list, review, examine, inspect, test, run, execute, analyze, diagnose, debug`,
|
|
901
|
+
`2. Reference a SPECIFIC, concrete target (file path, command name, config key, etc.)`,
|
|
902
|
+
`3. Describe a BOUNDED, executable action — not a vague principle or process`,
|
|
903
|
+
``,
|
|
904
|
+
`**Examples that PASS executability check**:`,
|
|
905
|
+
`- "Read the file before editing to verify current content"`,
|
|
906
|
+
`- "Check user permissions before executing privileged commands"`,
|
|
907
|
+
`- "Verify the routing infrastructure is operational before analyzing system state"`,
|
|
908
|
+
`- "Edit the config file to set timeout=30000ms"`,
|
|
909
|
+
``,
|
|
910
|
+
`**Examples that FAIL executability check**:`,
|
|
911
|
+
`- "Per T-01, pause all analysis tasks..." (starts with "Per", not a bounded verb)`,
|
|
912
|
+
`- "The agent should have first checked..." (starts with "The", not the action verb)`,
|
|
913
|
+
`- "Be more careful with routing tools" (vague verb "be")`,
|
|
914
|
+
`- "Ensure proper authorization" (vague verb "ensure")`,
|
|
915
|
+
``,
|
|
916
|
+
`Respond with ONLY a valid JSON object.`
|
|
917
|
+
);
|
|
918
|
+
|
|
919
|
+
return sections.join('\n');
|
|
597
920
|
}
|
|
598
921
|
|
|
599
922
|
|
|
@@ -1125,8 +1448,8 @@ export function invokeStubDreamer(
|
|
|
1125
1448
|
*/
|
|
1126
1449
|
export function invokeStubPhilosopher(
|
|
1127
1450
|
dreamerOutput: DreamerOutput,
|
|
1128
|
-
|
|
1129
|
-
|
|
1451
|
+
_principleId: string,
|
|
1452
|
+
_snapshot: NocturnalSessionSnapshot
|
|
1130
1453
|
): PhilosopherOutput {
|
|
1131
1454
|
if (!dreamerOutput.valid || dreamerOutput.candidates.length === 0) {
|
|
1132
1455
|
return {
|
|
@@ -1374,7 +1697,7 @@ export async function runTrinityAsync(options: RunTrinityOptions): Promise<Trini
|
|
|
1374
1697
|
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
1375
1698
|
|
|
1376
1699
|
// Step 2: Philosopher — rank candidates via real subagent
|
|
1377
|
-
const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId);
|
|
1700
|
+
const philosopherOutput = await adapter.invokePhilosopher(dreamerOutput, principleId, snapshot);
|
|
1378
1701
|
|
|
1379
1702
|
if (!philosopherOutput.valid || philosopherOutput.judgments.length === 0) {
|
|
1380
1703
|
failures.push({
|
|
@@ -1470,7 +1793,7 @@ function runTrinityWithStubs(
|
|
|
1470
1793
|
telemetry.candidateCount = dreamerOutput.candidates.length;
|
|
1471
1794
|
|
|
1472
1795
|
// Step 2: Philosopher — rank candidates (stub)
|
|
1473
|
-
const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId);
|
|
1796
|
+
const philosopherOutput = invokeStubPhilosopher(dreamerOutput, principleId, snapshot);
|
|
1474
1797
|
|
|
1475
1798
|
if (!philosopherOutput.valid || philosopherOutput.judgments.length === 0) {
|
|
1476
1799
|
failures.push({
|