@principles/pd-cli 1.95.0 → 1.97.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/commands/diagnose.d.ts +1 -0
  2. package/dist/commands/diagnose.d.ts.map +1 -1
  3. package/dist/commands/diagnose.js +44 -0
  4. package/dist/commands/diagnose.js.map +1 -1
  5. package/dist/commands/pain-record.d.ts.map +1 -1
  6. package/dist/commands/pain-record.js +4 -1
  7. package/dist/commands/pain-record.js.map +1 -1
  8. package/dist/commands/quality-scorecard.d.ts +9 -0
  9. package/dist/commands/quality-scorecard.d.ts.map +1 -0
  10. package/dist/commands/quality-scorecard.js +241 -0
  11. package/dist/commands/quality-scorecard.js.map +1 -0
  12. package/dist/index.js +22 -0
  13. package/dist/index.js.map +1 -1
  14. package/dist/services/quality-scorecard/data-extractor.d.ts +28 -0
  15. package/dist/services/quality-scorecard/data-extractor.d.ts.map +1 -0
  16. package/dist/services/quality-scorecard/data-extractor.js +118 -0
  17. package/dist/services/quality-scorecard/data-extractor.js.map +1 -0
  18. package/dist/services/quality-scorecard/local-evaluator.d.ts +18 -0
  19. package/dist/services/quality-scorecard/local-evaluator.d.ts.map +1 -0
  20. package/dist/services/quality-scorecard/local-evaluator.js +112 -0
  21. package/dist/services/quality-scorecard/local-evaluator.js.map +1 -0
  22. package/dist/services/quality-scorecard/strong-model-gate.d.ts +14 -0
  23. package/dist/services/quality-scorecard/strong-model-gate.d.ts.map +1 -0
  24. package/dist/services/quality-scorecard/strong-model-gate.js +128 -0
  25. package/dist/services/quality-scorecard/strong-model-gate.js.map +1 -0
  26. package/package.json +1 -1
  27. package/src/commands/diagnose.ts +45 -0
  28. package/src/commands/pain-record.ts +5 -2
  29. package/src/commands/quality-scorecard.ts +272 -0
  30. package/src/index.ts +25 -0
  31. package/src/services/quality-scorecard/data-extractor.ts +150 -0
  32. package/src/services/quality-scorecard/local-evaluator.ts +142 -0
  33. package/src/services/quality-scorecard/strong-model-gate.ts +160 -0
  34. package/tests/commands/diagnose.test.ts +69 -1
  35. package/tests/commands/pain-record-async.test.ts +4 -0
  36. package/tests/commands/product-path-regression.test.ts +81 -0
@@ -40,10 +40,26 @@ import { loadPdConfig, computeFlagsFromLoadResult } from '../services/pd-config-
40
40
  import { isFeatureEnabled, SPLIT_PIPELINE_TOTAL_TIMEOUT_MS } from '@principles/core/runtime-v2';
41
41
  import * as path from 'path';
42
42
 
43
+ function validateStalledThreshold(val: unknown): number | undefined {
44
+ if (val === undefined) {
45
+ return undefined;
46
+ }
47
+ const str = String(val).trim();
48
+ if (!/^[1-9]\d*$/.test(str)) {
49
+ throw new Error('stalled-threshold must be a positive integer.');
50
+ }
51
+ const num = parseInt(str, 10);
52
+ if (isNaN(num)) {
53
+ throw new Error('stalled-threshold must be a positive integer.');
54
+ }
55
+ return num;
56
+ }
57
+
43
58
  interface DiagnoseStatusOptions {
44
59
  taskId: string;
45
60
  workspace?: string;
46
61
  json?: boolean;
62
+ stalledThreshold?: unknown;
47
63
  }
48
64
 
49
65
  interface DiagnoseRunOptions {
@@ -69,6 +85,25 @@ interface DiagnoseRunOptions {
69
85
  * Inspects the current status of a diagnostician task.
70
86
  */
71
87
  export async function handleDiagnoseStatus(opts: DiagnoseStatusOptions): Promise<void> {
88
+ let stalledThresholdSeconds: number | undefined;
89
+ try {
90
+ stalledThresholdSeconds = validateStalledThreshold(opts.stalledThreshold);
91
+ } catch (err) {
92
+ const msg = err instanceof Error ? err.message : String(err);
93
+ if (opts.json) {
94
+ console.log(JSON.stringify({
95
+ ok: false,
96
+ reason: 'invalid_stalled_threshold',
97
+ nextAction: 'Provide a valid positive integer for --stalled-threshold (e.g., --stalled-threshold 300).',
98
+ }));
99
+ } else {
100
+ console.error(`error: ${msg}`);
101
+ console.error('nextAction: Provide a valid positive integer for --stalled-threshold (e.g., --stalled-threshold 300).');
102
+ }
103
+ process.exit(1);
104
+ return;
105
+ }
106
+
72
107
  const workspaceDir = resolveWorkspaceDir(opts.workspace);
73
108
  const stateManager = new RuntimeStateManager({ workspaceDir });
74
109
 
@@ -77,6 +112,7 @@ export async function handleDiagnoseStatus(opts: DiagnoseStatusOptions): Promise
77
112
  const result = await diagnoseStatus({
78
113
  taskId: opts.taskId,
79
114
  stateManager,
115
+ stalledThresholdSeconds,
80
116
  });
81
117
 
82
118
  if (!result) {
@@ -102,6 +138,15 @@ export async function handleDiagnoseStatus(opts: DiagnoseStatusOptions): Promise
102
138
  if (result.lastError) {
103
139
  console.log(` Last Error: ${result.lastError}`);
104
140
  }
141
+ if (result.reason) {
142
+ console.log(` Reason: ${result.reason}`);
143
+ }
144
+ if (result.age !== undefined && result.age !== null) {
145
+ console.log(` Age: ${result.age}s`);
146
+ }
147
+ if (result.nextAction) {
148
+ console.log(` Next Action: ${result.nextAction}`);
149
+ }
105
150
  console.log('');
106
151
  } finally {
107
152
  await stateManager.close();
@@ -168,11 +168,14 @@ export async function handlePainRecord(opts: RecordOptions): Promise<void> {
168
168
  }
169
169
 
170
170
  if (opts.json) {
171
- const out = { ...result };
171
+ const out: Record<string, unknown> = { ...result };
172
172
  // Ensure nextAction is present for actionable states
173
173
  if (out.status === 'submitted') {
174
174
  if (!out.nextAction) {
175
- out.nextAction = `pd diagnose run --task-id ${out.taskId} --workspace "${workspaceDir}"`;
175
+ out.nextAction = `pd diagnose run --task-id ${out.taskId} --workspace "${workspaceDir}" --runtime pi-ai --json`;
176
+ }
177
+ if (!out.reason) {
178
+ out.reason = out.message;
176
179
  }
177
180
  }
178
181
  console.log(JSON.stringify(out, null, 2));
@@ -0,0 +1,272 @@
1
+ /**
2
+ * pd quality scorecard — CLI command (PRI-361)
3
+ *
4
+ * JSON contract: --json mode outputs EXACTLY one JSON object to stdout.
5
+ * All progress/diagnostic output goes to stderr.
6
+ * Errors produce structured JSON: { ok: false, error, nextAction }.
7
+ */
8
+
9
+ import { mkdirSync, writeFileSync } from 'fs';
10
+ import { dirname } from 'path';
11
+ import type {
12
+ EpisodeEvaluation,
13
+ QualityScorecardReport,
14
+ StrongModelAdjudication,
15
+ } from '@principles/core/quality-scorecard';
16
+ import {
17
+ validateCliOptions,
18
+ needsAdjudication,
19
+ generateMarkdownReport,
20
+ generateHtmlReport,
21
+ generateJsonReport,
22
+ } from '@principles/core/quality-scorecard';
23
+ import { extractEpisodes, extractLogStats } from '../services/quality-scorecard/data-extractor.js';
24
+ import { evaluateWithLocalModel, checkLmStudioAvailable } from '../services/quality-scorecard/local-evaluator.js';
25
+ import { adjudicate, skippedAdjudication, determineFinalLabel } from '../services/quality-scorecard/strong-model-gate.js';
26
+
27
+ // ── Logging: stderr only, silent in JSON mode ──────────────────────
28
+
29
+ let jsonMode = false;
30
+
31
+ function log(msg: string): void {
32
+ if (!jsonMode) {
33
+ process.stderr.write(msg + '\n');
34
+ }
35
+ }
36
+
37
+ // ── Structured JSON output helpers ─────────────────────────────────
38
+
39
+ function writeJsonOutput(data: unknown): void {
40
+ process.stdout.write(JSON.stringify(data, null, 2) + '\n');
41
+ }
42
+
43
+ function writeJsonError(error: string, nextAction: string): void {
44
+ writeJsonOutput({ ok: false, error, nextAction });
45
+ }
46
+
47
+ // ── Summary computation ────────────────────────────────────────────
48
+
49
+ function computeSummary(evaluations: EpisodeEvaluation[]) {
50
+ const totalEpisodes = evaluations.length;
51
+ // localPassCount/localFailCount: based strictly on local model's own conclusion
52
+ // (localEval.mvpMet + totalScore), NOT finalLabel which may incorporate strong-model adjudication.
53
+ const localPassCount = evaluations.filter(e => e.localEvaluation.mvpMet && e.localEvaluation.totalScore >= 12).length;
54
+ const localFailCount = evaluations.filter(e => e.localEvaluation.totalScore <= 6).length;
55
+ const strongModelReviewedCount = evaluations.filter(e =>
56
+ e.strongModelAdjudication && e.strongModelAdjudication.adjudicationStatus !== 'skipped'
57
+ ).length;
58
+ const finalPassCount = evaluations.filter(e => e.finalLabel === 'pass').length;
59
+ const finalFailCount = evaluations.filter(e => e.finalLabel === 'fail').length;
60
+ const needsReviewCount = evaluations.filter(e => e.finalLabel === 'needs-review').length;
61
+ const localOnlyCount = evaluations.filter(e => e.finalLabel === 'local-pass' || e.finalLabel === 'local-fail').length;
62
+ const averageLocalScore = totalEpisodes > 0
63
+ ? evaluations.reduce((s, e) => s + e.localEvaluation.totalScore, 0) / totalEpisodes
64
+ : 0;
65
+ const mvpThresholdMetCount = evaluations.filter(e => e.localEvaluation.mvpMet).length;
66
+
67
+ return {
68
+ totalEpisodes, localPassCount, localFailCount, strongModelReviewedCount,
69
+ finalPassCount, finalFailCount, needsReviewCount, localOnlyCount,
70
+ averageLocalScore, mvpThresholdMetCount,
71
+ };
72
+ }
73
+
74
+ // ── Main handler ───────────────────────────────────────────────────
75
+
76
+ export async function handleQualityScorecard(opts: Record<string, unknown>): Promise<void> {
77
+ const isJson = Boolean(opts.json);
78
+ jsonMode = isJson;
79
+
80
+ // Resolve workspace paths
81
+ const { resolveWorkspaceDir } = await import('../resolve-workspace.js');
82
+ const { join } = await import('path');
83
+ const { existsSync } = await import('fs');
84
+ const workspace = resolveWorkspaceDir(opts.workspace as string | undefined);
85
+ const dbPath = join(workspace, '.state', 'trajectory.db');
86
+ const logsDir = join(workspace, '.state', 'logs');
87
+
88
+ // 1. Validate CLI options
89
+ const { options, errors } = validateCliOptions({
90
+ dbPath,
91
+ logsDir,
92
+ localModelBaseUrl: opts.localUrl ?? 'http://localhost:12341/v1',
93
+ localModelId: opts.localModel ?? 'qwen3.6-27b-mtp',
94
+ strongModelId: opts.strongModel ?? null,
95
+ limit: opts.limit ?? '0',
96
+ format: isJson ? 'json' : (opts.format ?? 'markdown'),
97
+ output: opts.output,
98
+ minPainScore: opts.minScore ?? '50',
99
+ skipStrongModel: opts.skipStrongModel ?? false,
100
+ });
101
+
102
+ if (errors.length > 0) {
103
+ const msg = errors.map(e => `${e.field}: ${e.message}`).join('; ');
104
+ if (isJson) {
105
+ writeJsonError(msg, 'Fix the invalid options and retry');
106
+ } else {
107
+ process.stderr.write(`❌ Invalid options:\n${errors.map(e => ` - ${e.field}: ${e.message}`).join('\n')}\n`);
108
+ }
109
+ process.exitCode = 1;
110
+ return;
111
+ }
112
+
113
+ // 2. Check files exist
114
+ if (!existsSync(options.dbPath)) {
115
+ const msg = `trajectory.db not found at: ${options.dbPath}`;
116
+ if (isJson) {
117
+ writeJsonError(msg, 'Ensure the workspace has PD data (run PD first to generate trajectory.db)');
118
+ } else {
119
+ process.stderr.write(`❌ ${msg}\n`);
120
+ }
121
+ process.exitCode = 1;
122
+ return;
123
+ }
124
+
125
+ // 3. Ensure output directory exists
126
+ const outputDir = dirname(options.output);
127
+ if (outputDir && !existsSync(outputDir)) {
128
+ try {
129
+ mkdirSync(outputDir, { recursive: true });
130
+ log(`Created output directory: ${outputDir}`);
131
+ } catch (err: unknown) {
132
+ const msg = err instanceof Error ? err.message : String(err);
133
+ if (isJson) {
134
+ writeJsonError(`Cannot create output directory: ${msg}`, 'Ensure the output path is writable');
135
+ } else {
136
+ process.stderr.write(`❌ Cannot create output directory: ${msg}\n`);
137
+ }
138
+ process.exitCode = 1;
139
+ return;
140
+ }
141
+ }
142
+
143
+ // 4. Check LM Studio
144
+ log('🔍 PD Quality Scorecard — Starting...');
145
+ log(` DB: ${options.dbPath}`);
146
+ log(` Local Model: ${options.localModelId} @ ${options.localModelBaseUrl}`);
147
+ log(` Strong Model: ${options.strongModelId ?? 'skipped'}`);
148
+
149
+ const lmStatus = await checkLmStudioAvailable(options.localModelBaseUrl);
150
+ if (!lmStatus.available) {
151
+ if (isJson) {
152
+ writeJsonError(`LM Studio not available: ${lmStatus.error}`, 'Start LM Studio or check --local-url');
153
+ } else {
154
+ process.stderr.write(`❌ LM Studio not available at ${options.localModelBaseUrl}: ${lmStatus.error}\n`);
155
+ }
156
+ process.exitCode = 1;
157
+ return;
158
+ }
159
+
160
+ if (!lmStatus.models.includes(options.localModelId)) {
161
+ if (isJson) {
162
+ writeJsonError(`Model "${options.localModelId}" not found. Available: ${lmStatus.models.join(', ')}`, 'Use --local-model with an available model');
163
+ } else {
164
+ process.stderr.write(`❌ Model "${options.localModelId}" not found. Available: ${lmStatus.models.join(', ')}\n`);
165
+ }
166
+ process.exitCode = 1;
167
+ return;
168
+ }
169
+
170
+ // 5. Extract data
171
+ log('\n📊 Extracting dogfood data...');
172
+ const { episodes, stats: extractStats } = await extractEpisodes(options.dbPath, {
173
+ minScore: options.minPainScore,
174
+ limit: options.limit,
175
+ });
176
+ log(` Found ${episodes.length} unique episodes (total pain events: ${extractStats.total})`);
177
+
178
+ const logStats = extractLogStats(options.logsDir);
179
+ log(` Event logs: ${logStats.totalEvents} events (${logStats.painSignalCount} pain signals)`);
180
+
181
+ // 6. Evaluate each episode
182
+ log('\n🤖 Running local model evaluation...');
183
+ const evaluations: EpisodeEvaluation[] = [];
184
+
185
+ for (let i = 0; i < episodes.length; i++) {
186
+ const ep = episodes[i];
187
+ if (!ep) continue;
188
+ log(` [${i + 1}/${episodes.length}] ${ep.episodeId} (score=${ep.score})...`);
189
+
190
+ const localEval = await evaluateWithLocalModel(ep, {
191
+ baseUrl: options.localModelBaseUrl,
192
+ model: options.localModelId,
193
+ }, (msg: string) => log(` ${msg}`));
194
+ log(` Local: ${localEval.totalScore}/14 MVP=${localEval.mvpMet} flags=[${localEval.flags.join(',')}]`);
195
+
196
+ // 7. Strong model adjudication
197
+ let adjudication: StrongModelAdjudication;
198
+ if (options.skipStrongModel || !options.strongModelId) {
199
+ adjudication = skippedAdjudication(
200
+ options.skipStrongModel
201
+ ? 'Strong model skipped by --skip-strong-model flag'
202
+ : 'No strong model configured'
203
+ );
204
+ } else {
205
+ const decision = needsAdjudication(ep, localEval);
206
+ if (decision.shouldAdjudicate) {
207
+ log(` Adjudicating (${decision.priority}: ${decision.reason})...`);
208
+ adjudication = await adjudicate(ep, localEval, { modelId: options.strongModelId, log: (msg: string) => log(` ${msg}`) });
209
+ log(` Adjudication: ${adjudication.adjudicationStatus}`);
210
+ } else {
211
+ adjudication = skippedAdjudication(decision.reason);
212
+ log(` Adjudication skipped: ${decision.reason}`);
213
+ }
214
+ }
215
+
216
+ const finalLabel = determineFinalLabel(localEval, adjudication);
217
+ evaluations.push({ episode: ep, localEvaluation: localEval, strongModelAdjudication: adjudication, finalLabel });
218
+ }
219
+
220
+ // 8. Build and write report
221
+ log('\n📝 Generating report...');
222
+ const summary = computeSummary(evaluations);
223
+ const report: QualityScorecardReport = {
224
+ generatedAt: new Date().toISOString(),
225
+ dataSource: {
226
+ painEventCount: extractStats.total,
227
+ evolutionTaskCount: 0,
228
+ principleEventCount: 0,
229
+ gateBlockCount: 0,
230
+ dateRange: extractStats.dateRange,
231
+ },
232
+ localEvaluatorConfig: {
233
+ model: options.localModelId,
234
+ baseUrl: options.localModelBaseUrl.replace(/\/v\d+$/, '/...'),
235
+ apiKeyStatus: 'not-required',
236
+ },
237
+ strongModelConfig: {
238
+ model: options.strongModelId,
239
+ status: options.skipStrongModel || !options.strongModelId ? 'skipped' : 'configured',
240
+ },
241
+ evaluations,
242
+ summary,
243
+ knownLimitations: [
244
+ 'Local model scores are advisory only — not final quality conclusions.',
245
+ 'Without strong-model adjudication, samples are marked local-pass/local-fail/needs-review.',
246
+ 'Deduplication is based on reason text similarity — may miss distinct episodes.',
247
+ 'Local model output is non-deterministic despite temperature=0.1.',
248
+ ],
249
+ };
250
+
251
+ let content: string;
252
+ switch (options.format) {
253
+ case 'html': content = generateHtmlReport(report); break;
254
+ case 'json': content = generateJsonReport(report); break;
255
+ case 'markdown':
256
+ default: content = generateMarkdownReport(report); break;
257
+ }
258
+
259
+ writeFileSync(options.output, content, 'utf-8');
260
+
261
+ log(`\n✅ Report written to: ${options.output}`);
262
+ log(` Format: ${options.format}`);
263
+ log(` Episodes: ${summary.totalEpisodes}`);
264
+ log(` Local Pass: ${summary.localPassCount} | Local Fail: ${summary.localFailCount}`);
265
+ log(` Strong Model Reviewed: ${summary.strongModelReviewedCount}`);
266
+ log(` Final Pass: ${summary.finalPassCount} | Final Fail: ${summary.finalFailCount} | Needs Review: ${summary.needsReviewCount}`);
267
+
268
+ // JSON mode: output exactly one JSON object to stdout
269
+ if (isJson) {
270
+ writeJsonOutput({ ok: true, report });
271
+ }
272
+ }
package/src/index.ts CHANGED
@@ -306,6 +306,7 @@ diagnoseCmd
306
306
  .description('Inspect diagnostician task status')
307
307
  .requiredOption('-t, --task-id <taskId>', 'Task ID to inspect')
308
308
  .option('-w, --workspace <path>', 'Workspace directory')
309
+ .option('--stalled-threshold <seconds>', 'Age threshold in seconds for classifying task as stalled')
309
310
  .option('--json', 'Output raw JSON')
310
311
  .action(async (opts) => {
311
312
  await handleDiagnoseStatus(opts);
@@ -926,4 +927,28 @@ consoleCmd.action(async (opts) => {
926
927
  });
927
928
  });
928
929
 
930
+ // ─── Quality Scorecard (PRI-361) ──────────────────────────────────
931
+
932
+ const qualityCmd = program
933
+ .command('quality')
934
+ .description('Quality scoring and evaluation');
935
+
936
+ qualityCmd
937
+ .command('scorecard')
938
+ .description('Generate quality scorecard report for PD pain→diagnosis→principle chain')
939
+ .option('-w, --workspace <path>', 'Workspace directory')
940
+ .option('--local-model <id>', 'LM Studio model ID', 'qwen3.6-27b-mtp')
941
+ .option('--local-url <url>', 'LM Studio base URL', 'http://localhost:12341/v1')
942
+ .option('--strong-model <id>', 'Strong model for adjudication (provider/model)')
943
+ .option('--skip-strong-model', 'Skip strong model adjudication', false)
944
+ .option('--min-score <n>', 'Minimum pain score to evaluate', '50')
945
+ .option('--limit <n>', 'Max episodes to evaluate (0=all)', '0')
946
+ .option('--format <fmt>', 'Output format: json, markdown, html', 'markdown')
947
+ .option('--output <path>', 'Output file path')
948
+ .option('--json', 'Output as JSON', false)
949
+ .action(async (opts) => {
950
+ const { handleQualityScorecard } = await import('./commands/quality-scorecard.js');
951
+ await handleQualityScorecard(opts);
952
+ });
953
+
929
954
  program.parse();
@@ -0,0 +1,150 @@
1
+ /**
2
+ * PRI-361 — Data Extractor (I/O layer in pd-cli)
3
+ *
4
+ * Reads trajectory.db and event logs. Uses runtime validation
5
+ * from @principles/core — no `as RawPainEvent[]` casts.
6
+ */
7
+
8
+ import type { PainEpisode } from '@principles/core/quality-scorecard';
9
+ import {
10
+ validatePainRow,
11
+ validateEvolutionRow,
12
+ validatePrincipleEventRow,
13
+ validateGateRow,
14
+ sanitize,
15
+ truncate,
16
+ } from '@principles/core/quality-scorecard';
17
+ import { readdirSync, readFileSync } from 'fs';
18
+ import { join } from 'path';
19
+
20
+ export interface ExtractionResult {
21
+ episodes: PainEpisode[];
22
+ stats: { total: number; dateRange: { from: string; to: string } };
23
+ }
24
+
25
+ export async function extractEpisodes(
26
+ dbPath: string,
27
+ options: { minScore?: number; limit?: number } = {}
28
+ ): Promise<ExtractionResult> {
29
+ const Database = (await import('better-sqlite3')).default;
30
+ const db = new Database(dbPath, { readonly: true });
31
+
32
+ const minScore = options.minScore ?? 0;
33
+ const limit = options.limit ?? 0;
34
+
35
+ try {
36
+ // Fetch pain events with parameterized query
37
+ let painQuery = 'SELECT id, session_id, source, score, reason, severity, created_at FROM pain_events WHERE 1=1';
38
+ const params: (string | number)[] = [];
39
+ if (minScore > 0) {
40
+ painQuery += ' AND score >= ?';
41
+ params.push(minScore);
42
+ }
43
+ painQuery += ' ORDER BY created_at DESC';
44
+ if (limit > 0) {
45
+ painQuery += ' LIMIT ?';
46
+ params.push(limit);
47
+ }
48
+
49
+ const rawPainRows = db.prepare(painQuery).all(...params);
50
+ // Validate each row — no unsafe cast
51
+ const painEvents = rawPainRows.map(validatePainRow).filter((r): r is NonNullable<typeof r> => r !== null);
52
+
53
+ // Fetch evolution tasks
54
+ const rawEvoRows = db.prepare('SELECT task_id, score, status, resolution, created_at FROM evolution_tasks').all();
55
+ const evoTasks = rawEvoRows.map(validateEvolutionRow).filter((r): r is NonNullable<typeof r> => r !== null);
56
+
57
+ // Fetch principle events
58
+ const rawPeRows = db.prepare('SELECT principle_id, event_type, created_at FROM principle_events').all();
59
+ const prEvents = rawPeRows.map(validatePrincipleEventRow).filter((r): r is NonNullable<typeof r> => r !== null);
60
+
61
+ // Gate blocks count per session — validate each row
62
+ const rawGateRows = db.prepare('SELECT session_id, COUNT(*) as cnt FROM gate_blocks GROUP BY session_id').all();
63
+ const gateRows = rawGateRows.map(validateGateRow).filter((r): r is NonNullable<typeof r> => r !== null);
64
+ const gateBlockMap = new Map(gateRows.map(g => [g.session_id, g.cnt]));
65
+
66
+ // Deduplicate by sanitized reason
67
+ const seen = new Set<string>();
68
+ const episodes: PainEpisode[] = [];
69
+
70
+ for (const pe of painEvents) {
71
+ const dedupKey = sanitize(pe.reason).substring(0, 80);
72
+ if (seen.has(dedupKey)) continue;
73
+ seen.add(dedupKey);
74
+
75
+ const peTime = new Date(pe.created_at).getTime();
76
+ const linkedTask = evoTasks.find(t => {
77
+ const tTime = new Date(t.created_at).getTime();
78
+ return Math.abs(tTime - peTime) < 3600000 && Math.abs(t.score - pe.score) <= 10;
79
+ });
80
+
81
+ const linkedPrinciples = prEvents
82
+ .filter(e => {
83
+ if (!e.principle_id) return false;
84
+ const eTime = new Date(e.created_at).getTime();
85
+ return Math.abs(eTime - peTime) < 7200000;
86
+ })
87
+ .map(e => e.principle_id)
88
+ .filter((v): v is string => v !== null)
89
+ .filter((v, i, a) => a.indexOf(v) === i);
90
+
91
+ episodes.push({
92
+ episodeId: `EP-${pe.id}`,
93
+ summary: truncate(sanitize(pe.reason)),
94
+ source: pe.source,
95
+ score: pe.score,
96
+ severity: pe.severity,
97
+ createdAt: pe.created_at,
98
+ evolutionTaskResolution: linkedTask?.resolution ?? null,
99
+ linkedPrinciples,
100
+ gateBlockCount: gateBlockMap.get(pe.session_id) ?? 0,
101
+ });
102
+ }
103
+
104
+ const dates = painEvents.map(e => e.created_at).sort();
105
+ return {
106
+ episodes,
107
+ stats: {
108
+ total: painEvents.length,
109
+ dateRange: {
110
+ from: dates[0] ?? new Date().toISOString(),
111
+ to: dates[dates.length - 1] ?? new Date().toISOString(),
112
+ },
113
+ },
114
+ };
115
+ } finally {
116
+ db.close();
117
+ }
118
+ }
119
+
120
+ export interface LogStats {
121
+ totalEvents: number;
122
+ painSignalCount: number;
123
+ degradedReasons: string[];
124
+ }
125
+
126
+ export function extractLogStats(logsDir: string): LogStats {
127
+ const stats: LogStats = { totalEvents: 0, painSignalCount: 0, degradedReasons: [] };
128
+
129
+ try {
130
+ const files = readdirSync(logsDir).filter(f => f.endsWith('.jsonl'));
131
+ for (const file of files) {
132
+ const lines = readFileSync(join(logsDir, file), 'utf-8').split('\n').filter(Boolean);
133
+ for (const line of lines) {
134
+ try {
135
+ const ev = JSON.parse(line) as Record<string, unknown>;
136
+ stats.totalEvents++;
137
+ if (ev.type === 'pain_signal') stats.painSignalCount++;
138
+ } catch (parseErr: unknown) {
139
+ const msg = parseErr instanceof Error ? parseErr.message : String(parseErr);
140
+ stats.degradedReasons.push(`jsonl-parse-fail:${file}:${msg}`);
141
+ }
142
+ }
143
+ }
144
+ } catch (dirErr: unknown) {
145
+ const msg = dirErr instanceof Error ? dirErr.message : String(dirErr);
146
+ stats.degradedReasons.push(`logs-dir-unreadable:${msg}`);
147
+ }
148
+
149
+ return stats;
150
+ }
@@ -0,0 +1,142 @@
1
+ /**
2
+ * PRI-361 — Local Evaluator (I/O layer in pd-cli)
3
+ *
4
+ * Calls LM Studio for advisory scoring. Uses core validation
5
+ * to parse LLM responses — no unsafe casts.
6
+ */
7
+
8
+ import type {
9
+ PainEpisode,
10
+ LocalEvaluation,
11
+ RubricDimension,
12
+ RubricScore,
13
+ } from '@principles/core/quality-scorecard';
14
+ import {
15
+ RUBRIC_LABELS,
16
+ RUBRIC_PROMPTS,
17
+ RUBRIC_DIMENSIONS as DIMS,
18
+ meetsMvpThreshold,
19
+ sumScores,
20
+ validateLlmScoreResponse,
21
+ extractJsonFromLlmResponse,
22
+ } from '@principles/core/quality-scorecard';
23
+
24
+ function buildEvaluationPrompt(episode: PainEpisode): string {
25
+ const dimensions = DIMS.map(d => `${d} (${RUBRIC_LABELS[d]}): ${RUBRIC_PROMPTS[d]}`).join('\n');
26
+
27
+ return `You are a quality evaluator for an AI agent's pain-signal -> diagnosis -> principle pipeline.
28
+
29
+ ## Task
30
+ Evaluate this pain episode on a 7-dimension rubric. Each dimension scores 0 (fail), 1 (partial), or 2 (pass).
31
+
32
+ ## Pain Episode
33
+ - ID: ${episode.episodeId}
34
+ - Source: ${episode.source}
35
+ - Pain Score: ${episode.score}
36
+ - Severity: ${episode.severity}
37
+ - Summary: ${episode.summary}
38
+ - Created: ${episode.createdAt}
39
+ - Evolution Task Resolution: ${episode.evolutionTaskResolution ?? 'none'}
40
+ - Linked Principles: ${episode.linkedPrinciples.length > 0 ? episode.linkedPrinciples.join(', ') : 'none'}
41
+ - Gate Blocks: ${episode.gateBlockCount}
42
+
43
+ ## Rubric Dimensions
44
+ ${dimensions}
45
+
46
+ ## Additional Checks
47
+ - Is the language consistent (not mixing Chinese and English incoherently)?
48
+ - Is the diagnosis/principle overly abstract (no concrete actionable guidance)?
49
+ - Does it fabricate non-existent evidence, axioms, or references?
50
+
51
+ ## Output Format (STRICT JSON)
52
+ Respond with ONLY a JSON object:
53
+ {
54
+ "scores": { "G1": 0-2, "G2": 0-2, "G3": 0-2, "G4": 0-2, "G5": 0-2, "G6": 0-2, "G7": 0-2 },
55
+ "rationales": { "G1": "...", "G2": "...", "G3": "...", "G4": "...", "G5": "...", "G6": "...", "G7": "..." },
56
+ "flags": ["list of issues found"]
57
+ }
58
+
59
+ Do NOT output anything other than this JSON object.`;
60
+ }
61
+
62
+ export interface LocalEvaluatorConfig {
63
+ baseUrl: string;
64
+ model: string;
65
+ }
66
+
67
+ export async function evaluateWithLocalModel(
68
+ episode: PainEpisode,
69
+ config: LocalEvaluatorConfig,
70
+ log: (msg: string) => void
71
+ ): Promise<LocalEvaluation> {
72
+ const prompt = buildEvaluationPrompt(episode);
73
+ const url = `${config.baseUrl.replace(/\/+$/, '')}/chat/completions`;
74
+
75
+ try {
76
+ const resp = await fetch(url, {
77
+ method: 'POST',
78
+ headers: { 'Content-Type': 'application/json' },
79
+ body: JSON.stringify({
80
+ model: config.model,
81
+ messages: [
82
+ { role: 'system', content: 'You are a precise JSON-output quality evaluator. Output only valid JSON.' },
83
+ { role: 'user', content: prompt },
84
+ ],
85
+ temperature: 0.1,
86
+ max_tokens: 2000,
87
+ }),
88
+ signal: AbortSignal.timeout(120_000),
89
+ });
90
+
91
+ if (!resp.ok) {
92
+ throw new Error(`LM Studio request failed: ${resp.status}`);
93
+ }
94
+
95
+ const data = (await resp.json()) as { choices: { message: { content: string } }[] };
96
+ const content = data.choices?.[0]?.message?.content ?? '';
97
+
98
+ const parsed = extractJsonFromLlmResponse(content);
99
+ if (parsed === null) {
100
+ throw new Error(`LM Studio returned non-JSON response`);
101
+ }
102
+
103
+ const { scores, rationales, flags } = validateLlmScoreResponse(parsed);
104
+ const totalScore = sumScores(scores);
105
+
106
+ return {
107
+ model: config.model,
108
+ dimensionScores: scores,
109
+ dimensionRationales: rationales,
110
+ totalScore,
111
+ maxScore: 14,
112
+ mvpMet: meetsMvpThreshold(scores),
113
+ flags: flags,
114
+ };
115
+ } catch (err: unknown) {
116
+ const msg = err instanceof Error ? err.message : String(err);
117
+ log(`Evaluation error for ${episode.episodeId}: ${msg}`);
118
+ const zeroScores = Object.fromEntries(DIMS.map(d => [d, 0])) as Record<RubricDimension, RubricScore>;
119
+ return {
120
+ model: config.model,
121
+ dimensionScores: zeroScores,
122
+ dimensionRationales: Object.fromEntries(DIMS.map(d => [d, `Evaluation failed: ${msg}`])) as Record<RubricDimension, string>,
123
+ totalScore: 0,
124
+ maxScore: 14,
125
+ mvpMet: false,
126
+ flags: ['evaluation_error'],
127
+ };
128
+ }
129
+ }
130
+
131
+ export async function checkLmStudioAvailable(baseUrl: string): Promise<{ available: boolean; models: string[]; error?: string }> {
132
+ try {
133
+ const url = `${baseUrl.replace(/\/+$/, '')}/models`;
134
+ const resp = await fetch(url, { signal: AbortSignal.timeout(5000) });
135
+ if (!resp.ok) return { available: false, models: [], error: `HTTP ${resp.status}` };
136
+ const data = (await resp.json()) as { data: { id: string }[] };
137
+ const models = (data.data || []).map((m) => m.id);
138
+ return { available: true, models };
139
+ } catch (err: unknown) {
140
+ return { available: false, models: [], error: err instanceof Error ? err.message : String(err) };
141
+ }
142
+ }