principles-disciple 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ import {
15
15
  updateReviewStatus,
16
16
  getDatasetRecord,
17
17
  } from '../../src/core/nocturnal-dataset.js';
18
+ import { appendArtifactLineageRecord } from '../../src/core/nocturnal-artifact-lineage.js';
18
19
  import type { NocturnalDatasetRecord } from '../../src/core/nocturnal-dataset.js';
19
20
 
20
21
  // ---------------------------------------------------------------------------
@@ -123,9 +124,63 @@ describe('NocturnalExport exportORPOSamples', () => {
123
124
  expect(sample.rejected).toBeTruthy();
124
125
  expect(sample.rationale).toBeTruthy();
125
126
  expect(sample.datasetMetadata.exportId).toBe(result.manifest!.exportId);
127
+ expect(sample.datasetMetadata.evidenceSummary.lineageStatus).toBe('unknown');
126
128
  }
127
129
  });
128
130
 
131
+ it('degrades to evidence-bounded neutral text when lineage is missing', () => {
132
+ setupExportReady(tmpDir, 'art-no-lineage', 'gpt-4');
133
+
134
+ const result = exportORPOSamples(tmpDir, 'gpt-4');
135
+
136
+ expect(result.success).toBe(true);
137
+ const [sample] = fs.readFileSync(result.manifest!.exportPath, 'utf-8').trim().split('\n').map((line) => JSON.parse(line));
138
+ expect(sample.prompt).toBe('Take the next action without verified source evidence.');
139
+ expect(sample.rejected).toBe('Take the next action without verified source evidence.');
140
+ expect(sample.rationale).toContain('Source evidence is unknown');
141
+ expect(sample.datasetMetadata.evidenceSummary).toEqual({
142
+ lineageStatus: 'unknown',
143
+ painSignals: { status: 'unknown', count: null, ids: [] },
144
+ gateBlocks: { status: 'unknown', count: null, ids: [] },
145
+ });
146
+ });
147
+
148
+ it('exports observed lineage evidence when available', () => {
149
+ const record = setupExportReady(tmpDir, 'art-with-lineage', 'gpt-4');
150
+ appendArtifactLineageRecord(tmpDir, {
151
+ artifactKind: 'behavioral-sample',
152
+ artifactId: record.artifactId,
153
+ principleId: record.principleId,
154
+ ruleId: null,
155
+ sessionId: record.sessionId,
156
+ sourceSnapshotRef: record.sourceSnapshotRef,
157
+ sourcePainIds: ['pain-1', 'pain-2'],
158
+ sourceGateBlockIds: ['gate-1'],
159
+ storagePath: record.artifactPath,
160
+ implementationId: null,
161
+ createdAt: record.createdAt,
162
+ });
163
+
164
+ const result = exportORPOSamples(tmpDir, 'gpt-4');
165
+
166
+ expect(result.success).toBe(true);
167
+ const [sample] = fs.readFileSync(result.manifest!.exportPath, 'utf-8').trim().split('\n').map((line) => JSON.parse(line));
168
+ expect(sample.prompt).toContain('2 observed pain signals');
169
+ expect(sample.rejected).toContain('1 observed gate blocks');
170
+ expect(sample.rationale).toContain('Observed source evidence: 2 pain signals and 1 gate blocks');
171
+ expect(sample.datasetMetadata.evidenceSummary.lineageStatus).toBe('observed');
172
+ expect(sample.datasetMetadata.evidenceSummary.painSignals).toEqual({
173
+ status: 'observed',
174
+ count: 2,
175
+ ids: ['pain-1', 'pain-2'],
176
+ });
177
+ expect(sample.datasetMetadata.evidenceSummary.gateBlocks).toEqual({
178
+ status: 'observed',
179
+ count: 1,
180
+ ids: ['gate-1'],
181
+ });
182
+ });
183
+
129
184
  it('writes manifest alongside JSONL', () => {
130
185
  setupExportReady(tmpDir, 'art-manifest-1', 'gpt-4');
131
186
 
@@ -5,6 +5,8 @@ import {
5
5
  validateDraftArtifact,
6
6
  draftToArtifact,
7
7
  DEFAULT_TRINITY_CONFIG,
8
+ OpenClawTrinityRuntimeAdapter,
9
+ TrinityRuntimeContractError,
8
10
  type TrinityConfig,
9
11
  type DreamerOutput,
10
12
  type PhilosopherOutput,
@@ -25,12 +27,16 @@ function makeSnapshot(overrides: Partial<{
25
27
  failureCount: number;
26
28
  totalPainEvents: number;
27
29
  totalGateBlocks: number;
28
- }> = {}): {
29
- sessionId: string;
30
- stats: { failureCount: number; totalPainEvents: number; totalGateBlocks: number; totalAssistantTurns: number; totalToolCalls: number };
31
- } {
30
+ }> = {}) {
32
31
  return {
33
32
  sessionId: 'session-test-123',
33
+ startedAt: '2026-04-12T00:00:00.000Z',
34
+ updatedAt: '2026-04-12T00:05:00.000Z',
35
+ assistantTurns: [],
36
+ userTurns: [],
37
+ toolCalls: [],
38
+ painEvents: [],
39
+ gateBlocks: [],
34
40
  stats: {
35
41
  failureCount: overrides.failureCount ?? 0,
36
42
  totalPainEvents: overrides.totalPainEvents ?? 0,
@@ -236,6 +242,73 @@ describe('validateDreamerOutput', () => {
236
242
  });
237
243
  });
238
244
 
245
+ describe('OpenClawTrinityRuntimeAdapter contract hardening', () => {
246
+ function makeRuntimeApi(overrides: Partial<any> = {}) {
247
+ return {
248
+ runtime: {
249
+ agent: {
250
+ runEmbeddedPiAgent: vi.fn().mockResolvedValue({
251
+ payloads: [
252
+ { text: '{"valid":true,"candidates":[],"generatedAt":"2026-04-12T00:00:00.000Z"}' },
253
+ ],
254
+ }),
255
+ },
256
+ config: {
257
+ loadConfig: vi.fn().mockReturnValue({
258
+ agents: {
259
+ defaults: {
260
+ model: 'openai/gpt-5.4',
261
+ },
262
+ },
263
+ }),
264
+ },
265
+ ...overrides.runtime,
266
+ },
267
+ logger: {
268
+ info: vi.fn(),
269
+ warn: vi.fn(),
270
+ error: vi.fn(),
271
+ },
272
+ };
273
+ }
274
+
275
+ it('rejects missing runtime.agent.runEmbeddedPiAgent contract explicitly', () => {
276
+ expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(TrinityRuntimeContractError);
277
+ expect(() => new OpenClawTrinityRuntimeAdapter({ runtime: {} } as any)).toThrow(/runtime_unavailable/);
278
+ });
279
+
280
+ it('passes explicit provider/model overrides into runtime.agent.runEmbeddedPiAgent', async () => {
281
+ const api = makeRuntimeApi();
282
+ const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
283
+
284
+ await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
285
+
286
+ expect(api.runtime.agent.runEmbeddedPiAgent).toHaveBeenCalledWith(
287
+ expect.objectContaining({
288
+ provider: 'openai',
289
+ model: 'gpt-5.4',
290
+ }),
291
+ );
292
+ });
293
+
294
+ it('returns stable failure classes when runtime invocation fails', async () => {
295
+ const api = makeRuntimeApi({
296
+ runtime: {
297
+ agent: {
298
+ runEmbeddedPiAgent: vi.fn().mockRejectedValue(new Error('gateway unavailable')),
299
+ },
300
+ },
301
+ });
302
+ const adapter = new OpenClawTrinityRuntimeAdapter(api as any);
303
+
304
+ const result = await adapter.invokeDreamer(makeSnapshot({ failureCount: 1 }) as any, 'T-08', 2);
305
+
306
+ expect(result.valid).toBe(false);
307
+ expect(result.reason).toContain('runtime_run_failed');
308
+ expect(adapter.getLastFailureReason()).toContain('runtime_run_failed');
309
+ });
310
+ });
311
+
239
312
  // ---------------------------------------------------------------------------
240
313
  // Tests: validatePhilosopherOutput
241
314
  // ---------------------------------------------------------------------------
@@ -479,5 +479,32 @@ unknown_meta: some data`;
479
479
  fs.rmSync(freshDir, { recursive: true, force: true });
480
480
  }
481
481
  });
482
+
483
+ it('readPainFlagData reads only the canonical .state/.pain_flag path', () => {
484
+ const legacyRootPath = path.join(TEST_DIR, 'PAIN_FLAG');
485
+ fs.writeFileSync(
486
+ legacyRootPath,
487
+ `source: legacy_root
488
+ score: 90
489
+ reason: should be ignored
490
+ time: 2026-04-10T09:00:00.000Z`,
491
+ 'utf-8',
492
+ );
493
+ fs.writeFileSync(
494
+ path.join(STATE_DIR, '.pain_flag'),
495
+ `source: canonical_state
496
+ score: 80
497
+ reason: should be read
498
+ time: 2026-04-10T09:00:00.000Z`,
499
+ 'utf-8',
500
+ );
501
+
502
+ const result = readPainFlagData(TEST_DIR);
503
+
504
+ expect(result.source).toBe('canonical_state');
505
+ expect(result.score).toBe('80');
506
+ const legacyResult = readPainFlagData(path.join(TEST_DIR, '..'));
507
+ expect(legacyResult.source).not.toBe('legacy_root');
508
+ });
482
509
  });
483
510
  });
@@ -255,6 +255,11 @@ describe('promotion-gate', () => {
255
255
 
256
256
  expect(result.constraintChecks.length).toBeGreaterThan(0);
257
257
  expect(result.deltaCheck).toBeDefined();
258
+ expect(result.evidenceSummary).toEqual({
259
+ evidenceMode: 'eval-proxy',
260
+ shadowSampleCount: 0,
261
+ deltaSource: 'eval',
262
+ });
258
263
  });
259
264
  });
260
265
 
@@ -231,4 +231,23 @@ describe('ReplayEngine', () => {
231
231
  expect(fs.existsSync(reportDir)).toBe(true);
232
232
  expect(fs.readdirSync(reportDir).some((file) => file.endsWith('.json'))).toBe(true);
233
233
  });
234
+
235
+ it('marks empty replay evidence as needs-review instead of pass', () => {
236
+ seedLedgerAndImplementation();
237
+
238
+ const engine = new ReplayEngine(workspaceDir, stateDir);
239
+ const report = engine.runReplayForImplementation('IMPL-1', ['pain-negative']);
240
+
241
+ expect(report.overallDecision).toBe('needs-review');
242
+ expect(report.evidenceSummary).toEqual({
243
+ evidenceStatus: 'empty',
244
+ totalSamples: 0,
245
+ classifiedCounts: {
246
+ painNegative: 0,
247
+ successPositive: 0,
248
+ principleAnchor: 0,
249
+ },
250
+ });
251
+ expect(report.blockers[0]).toContain('NO REPLAY EVIDENCE');
252
+ });
234
253
  });