principles-disciple 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,6 +52,10 @@ import {
52
52
  readDatasetArtifact,
53
53
  type NocturnalDatasetRecord,
54
54
  } from './nocturnal-dataset.js';
55
+ import {
56
+ listArtifactLineageRecords,
57
+ type ArtifactLineageRecord,
58
+ } from './nocturnal-artifact-lineage.js';
55
59
  import { NocturnalPathResolver } from './nocturnal-paths.js';
56
60
 
57
61
  // ---------------------------------------------------------------------------
@@ -81,6 +85,23 @@ export interface ORPOSample {
81
85
  exportedAt: string;
82
86
  exportId: string;
83
87
  datasetFingerprint: string;
88
+ evidenceSummary: ORPOEvidenceSummary;
89
+ };
90
+ }
91
+
92
+ export type EvidenceState = 'observed' | 'not_observed' | 'unknown';
93
+
94
+ export interface ORPOEvidenceSummary {
95
+ lineageStatus: 'observed' | 'unknown';
96
+ painSignals: {
97
+ status: EvidenceState;
98
+ count: number | null;
99
+ ids: string[];
100
+ };
101
+ gateBlocks: {
102
+ status: EvidenceState;
103
+ count: number | null;
104
+ ids: string[];
84
105
  };
85
106
  }
86
107
 
@@ -140,10 +161,12 @@ function computeDatasetFingerprint(sampleFingerprints: string[]): string {
140
161
  function serializeORPOSample(
141
162
  record: NocturnalDatasetRecord,
142
163
  artifact: ReturnType<typeof readDatasetArtifact>,
164
+ evidenceSummary: ORPOEvidenceSummary,
143
165
  exportId: string,
144
166
  datasetFingerprint: string
145
167
  ): ORPOSample {
146
168
  const now = new Date().toISOString();
169
+ const rejected = buildEvidenceBoundedRejected(artifact, evidenceSummary);
147
170
 
148
171
  return {
149
172
  sampleFingerprint: record.sampleFingerprint,
@@ -151,12 +174,11 @@ function serializeORPOSample(
151
174
  sessionId: record.sessionId,
152
175
  principleId: record.principleId,
153
176
  targetModelFamily: record.targetModelFamily as string, // validated non-null by caller
154
- // For ORPO: prompt = badDecision, chosen = betterDecision, rejected = badDecision
155
- // This teaches the model to prefer betterDecision over badDecision
156
- prompt: artifact.badDecision,
177
+ // Export only evidence-bounded narratives. Free-form artifact text can overstate what was observed.
178
+ prompt: rejected,
157
179
  chosen: artifact.betterDecision,
158
- rejected: artifact.badDecision,
159
- rationale: artifact.rationale,
180
+ rejected,
181
+ rationale: buildEvidenceBoundedRationale(evidenceSummary),
160
182
  datasetMetadata: {
161
183
  sampleFingerprint: record.sampleFingerprint,
162
184
  artifactPath: record.artifactPath,
@@ -164,10 +186,83 @@ function serializeORPOSample(
164
186
  exportedAt: now,
165
187
  exportId,
166
188
  datasetFingerprint,
189
+ evidenceSummary,
190
+ },
191
+ };
192
+ }
193
+
194
+ function buildEvidenceSummary(
195
+ lineageRecord: ArtifactLineageRecord | null
196
+ ): ORPOEvidenceSummary {
197
+ if (!lineageRecord) {
198
+ return {
199
+ lineageStatus: 'unknown',
200
+ painSignals: { status: 'unknown', count: null, ids: [] },
201
+ gateBlocks: { status: 'unknown', count: null, ids: [] },
202
+ };
203
+ }
204
+
205
+ // Defensive: old lineage files on disk may lack these fields
206
+ const painIds = lineageRecord.sourcePainIds ?? [];
207
+ const gateBlockIds = lineageRecord.sourceGateBlockIds ?? [];
208
+ const painCount = painIds.length;
209
+ const gateCount = gateBlockIds.length;
210
+
211
+ return {
212
+ lineageStatus: 'observed',
213
+ painSignals: {
214
+ status: painCount > 0 ? 'observed' : 'not_observed',
215
+ count: painCount,
216
+ ids: [...painIds],
217
+ },
218
+ gateBlocks: {
219
+ status: gateCount > 0 ? 'observed' : 'not_observed',
220
+ count: gateCount,
221
+ ids: [...gateBlockIds],
167
222
  },
168
223
  };
169
224
  }
170
225
 
226
+ function buildEvidenceBoundedRejected(
227
+ artifact: ReturnType<typeof readDatasetArtifact>,
228
+ evidenceSummary: ORPOEvidenceSummary
229
+ ): string {
230
+ if (evidenceSummary.lineageStatus === 'unknown') {
231
+ return 'Take the next action without verified source evidence.';
232
+ }
233
+
234
+ const clauses: string[] = [];
235
+ if (evidenceSummary.painSignals.status === 'observed' && evidenceSummary.painSignals.count) {
236
+ clauses.push(`continue despite ${evidenceSummary.painSignals.count} observed pain signals`);
237
+ }
238
+ if (evidenceSummary.gateBlocks.status === 'observed' && evidenceSummary.gateBlocks.count) {
239
+ clauses.push(`ignore ${evidenceSummary.gateBlocks.count} observed gate blocks`);
240
+ }
241
+
242
+ if (clauses.length === 0) {
243
+ return 'Proceed without first verifying the relevant state from the source session.';
244
+ }
245
+
246
+ const prefix = artifact.badDecision.trim().length > 0
247
+ ? 'Proceed with the rejected action and '
248
+ : 'Take the rejected action and ';
249
+ return `${prefix}${clauses.join(' and ')}.`;
250
+ }
251
+
252
+ function buildEvidenceBoundedRationale(evidenceSummary: ORPOEvidenceSummary): string {
253
+ if (evidenceSummary.lineageStatus === 'unknown') {
254
+ return 'Source evidence is unknown. Export uses a neutral rationale instead of narrating unverified failures or violations.';
255
+ }
256
+
257
+ const painCount = evidenceSummary.painSignals.count ?? 0;
258
+ const gateCount = evidenceSummary.gateBlocks.count ?? 0;
259
+ if (painCount === 0 && gateCount === 0) {
260
+ return 'Source lineage is present but records no pain signals or gate blocks. Export keeps the corrective preference while avoiding invented failure narratives.';
261
+ }
262
+
263
+ return `Observed source evidence: ${painCount} pain signals and ${gateCount} gate blocks. Prefer the bounded corrective action over repeating the rejected choice.`;
264
+ }
265
+
171
266
  // ---------------------------------------------------------------------------
172
267
  // Core Export Function
173
268
  // ---------------------------------------------------------------------------
@@ -187,6 +282,7 @@ export function exportORPOSamples(
187
282
  ): ExportResult {
188
283
  const exportId = crypto.randomUUID();
189
284
  const now = new Date().toISOString();
285
+ const lineageRecords = listArtifactLineageRecords(workspaceDir, 'behavioral-sample');
190
286
 
191
287
  // Step 1: Collect eligible records
192
288
  // Use listDatasetRecords directly to have full control over the family filter
@@ -253,8 +349,12 @@ export function exportORPOSamples(
253
349
  continue;
254
350
  }
255
351
 
352
+ const lineageRecord =
353
+ lineageRecords.find((candidate) => candidate.artifactId === record.artifactId) ?? null;
354
+ const evidenceSummary = buildEvidenceSummary(lineageRecord);
355
+
256
356
  // Serialize
257
- orpoSamples.push(serializeORPOSample(record, artifact, exportId, ''));
357
+ orpoSamples.push(serializeORPOSample(record, artifact, evidenceSummary, exportId, ''));
258
358
  }
259
359
 
260
360
  // Step 4: Fail if all samples failed validation