@pennyfarthing/core 7.4.1 → 7.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/package.json +1 -1
  2. package/packages/core/dist/cli/commands/doctor-legacy.test.d.ts +13 -0
  3. package/packages/core/dist/cli/commands/doctor-legacy.test.d.ts.map +1 -0
  4. package/packages/core/dist/cli/commands/doctor-legacy.test.js +207 -0
  5. package/packages/core/dist/cli/commands/doctor-legacy.test.js.map +1 -0
  6. package/packages/core/dist/cli/commands/doctor.d.ts +16 -0
  7. package/packages/core/dist/cli/commands/doctor.d.ts.map +1 -1
  8. package/packages/core/dist/cli/commands/doctor.js +130 -2
  9. package/packages/core/dist/cli/commands/doctor.js.map +1 -1
  10. package/packages/core/dist/cli/commands/init.d.ts.map +1 -1
  11. package/packages/core/dist/cli/commands/init.js +17 -27
  12. package/packages/core/dist/cli/commands/init.js.map +1 -1
  13. package/packages/core/dist/cli/commands/update.d.ts.map +1 -1
  14. package/packages/core/dist/cli/commands/update.js +21 -52
  15. package/packages/core/dist/cli/commands/update.js.map +1 -1
  16. package/packages/core/dist/cli/utils/symlinks.d.ts +15 -0
  17. package/packages/core/dist/cli/utils/symlinks.d.ts.map +1 -1
  18. package/packages/core/dist/cli/utils/symlinks.js +148 -2
  19. package/packages/core/dist/cli/utils/symlinks.js.map +1 -1
  20. package/packages/core/dist/cli/utils/themes.d.ts.map +1 -1
  21. package/packages/core/dist/cli/utils/themes.js +9 -0
  22. package/packages/core/dist/cli/utils/themes.js.map +1 -1
  23. package/pennyfarthing-dist/agents/dev.md +29 -24
  24. package/pennyfarthing-dist/agents/handoff.md +42 -119
  25. package/pennyfarthing-dist/agents/reviewer.md +32 -37
  26. package/pennyfarthing-dist/agents/sm-handoff.md +43 -66
  27. package/pennyfarthing-dist/agents/sm.md +52 -35
  28. package/pennyfarthing-dist/agents/tea.md +25 -8
  29. package/pennyfarthing-dist/agents/testing-runner.md +4 -4
  30. package/pennyfarthing-dist/commands/architect.md +0 -55
  31. package/pennyfarthing-dist/commands/dev.md +1 -54
  32. package/pennyfarthing-dist/commands/devops.md +0 -52
  33. package/pennyfarthing-dist/commands/health-check.md +33 -0
  34. package/pennyfarthing-dist/commands/orchestrator.md +0 -49
  35. package/pennyfarthing-dist/commands/pm.md +0 -53
  36. package/pennyfarthing-dist/commands/reviewer.md +1 -58
  37. package/pennyfarthing-dist/commands/sm.md +1 -64
  38. package/pennyfarthing-dist/commands/sprint.md +133 -0
  39. package/pennyfarthing-dist/commands/standalone.md +194 -0
  40. package/pennyfarthing-dist/commands/tea.md +1 -57
  41. package/pennyfarthing-dist/commands/tech-writer.md +0 -46
  42. package/pennyfarthing-dist/commands/theme-maker.md +10 -5
  43. package/pennyfarthing-dist/commands/ux-designer.md +0 -55
  44. package/pennyfarthing-dist/guides/XML-TAGS.md +156 -0
  45. package/pennyfarthing-dist/guides/agent-behavior.md +64 -38
  46. package/pennyfarthing-dist/guides/measurement-framework.md +210 -0
  47. package/pennyfarthing-dist/personas/themes/a-team.yaml +130 -0
  48. package/pennyfarthing-dist/personas/themes/alice-in-wonderland.yaml +1 -1
  49. package/pennyfarthing-dist/personas/themes/ancient-strategists.yaml +1 -1
  50. package/pennyfarthing-dist/personas/themes/arcane.yaml +1 -1
  51. package/pennyfarthing-dist/personas/themes/better-call-saul.yaml +1 -1
  52. package/pennyfarthing-dist/personas/themes/big-lebowski.yaml +1 -1
  53. package/pennyfarthing-dist/personas/themes/black-sails.yaml +1 -1
  54. package/pennyfarthing-dist/personas/themes/blade-runner.yaml +1 -1
  55. package/pennyfarthing-dist/personas/themes/bobiverse.yaml +1 -1
  56. package/pennyfarthing-dist/personas/themes/breaking-bad.yaml +1 -1
  57. package/pennyfarthing-dist/personas/themes/count-of-monte-cristo.yaml +1 -1
  58. package/pennyfarthing-dist/personas/themes/cowboy-bebop.yaml +1 -1
  59. package/pennyfarthing-dist/personas/themes/deadwood.yaml +1 -1
  60. package/pennyfarthing-dist/personas/themes/dickens.yaml +1 -1
  61. package/pennyfarthing-dist/personas/themes/discworld.yaml +1 -1
  62. package/pennyfarthing-dist/personas/themes/doctor-who.yaml +1 -1
  63. package/pennyfarthing-dist/personas/themes/don-quixote.yaml +1 -1
  64. package/pennyfarthing-dist/personas/themes/dune.yaml +1 -1
  65. package/pennyfarthing-dist/personas/themes/enlightenment-thinkers.yaml +1 -1
  66. package/pennyfarthing-dist/personas/themes/expeditionary-force.yaml +1 -1
  67. package/pennyfarthing-dist/personas/themes/futurama.yaml +1 -1
  68. package/pennyfarthing-dist/personas/themes/game-of-thrones.yaml +1 -1
  69. package/pennyfarthing-dist/personas/themes/gilligans-island.yaml +131 -1
  70. package/pennyfarthing-dist/personas/themes/gothic-literature.yaml +1 -1
  71. package/pennyfarthing-dist/personas/themes/great-gatsby.yaml +1 -1
  72. package/pennyfarthing-dist/personas/themes/hannibal.yaml +1 -1
  73. package/pennyfarthing-dist/personas/themes/harry-potter.yaml +1 -1
  74. package/pennyfarthing-dist/personas/themes/his-dark-materials.yaml +1 -1
  75. package/pennyfarthing-dist/personas/themes/inspector-morse.yaml +1 -1
  76. package/pennyfarthing-dist/personas/themes/jane-austen.yaml +1 -1
  77. package/pennyfarthing-dist/personas/themes/legion-of-doom.yaml +130 -0
  78. package/pennyfarthing-dist/personas/themes/mad-max.yaml +1 -1
  79. package/pennyfarthing-dist/personas/themes/moby-dick.yaml +1 -1
  80. package/pennyfarthing-dist/personas/themes/neuromancer.yaml +1 -1
  81. package/pennyfarthing-dist/personas/themes/parks-and-rec.yaml +130 -0
  82. package/pennyfarthing-dist/personas/themes/princess-bride.yaml +130 -0
  83. package/pennyfarthing-dist/personas/themes/renaissance-masters.yaml +1 -1
  84. package/pennyfarthing-dist/personas/themes/russian-masters.yaml +1 -1
  85. package/pennyfarthing-dist/personas/themes/sandman.yaml +1 -1
  86. package/pennyfarthing-dist/personas/themes/scientific-revolutionaries.yaml +1 -1
  87. package/pennyfarthing-dist/personas/themes/shakespeare.yaml +1 -1
  88. package/pennyfarthing-dist/personas/themes/star-trek-tng.yaml +139 -3
  89. package/pennyfarthing-dist/personas/themes/star-trek-tos.yaml +124 -0
  90. package/pennyfarthing-dist/personas/themes/star-wars.yaml +1 -1
  91. package/pennyfarthing-dist/personas/themes/succession.yaml +1 -1
  92. package/pennyfarthing-dist/personas/themes/superfriends.yaml +131 -1
  93. package/pennyfarthing-dist/personas/themes/ted-lasso.yaml +131 -1
  94. package/pennyfarthing-dist/personas/themes/the-americans.yaml +1 -1
  95. package/pennyfarthing-dist/personas/themes/the-expanse.yaml +131 -1
  96. package/pennyfarthing-dist/personas/themes/the-good-place.yaml +1 -1
  97. package/pennyfarthing-dist/personas/themes/the-matrix.yaml +1 -1
  98. package/pennyfarthing-dist/personas/themes/the-sopranos.yaml +1 -1
  99. package/pennyfarthing-dist/personas/themes/west-wing.yaml +6 -6
  100. package/pennyfarthing-dist/personas/themes/world-explorers.yaml +1 -1
  101. package/pennyfarthing-dist/personas/themes/wwii-leaders.yaml +1 -1
  102. package/pennyfarthing-dist/scripts/core/check-context.sh +23 -6
  103. package/pennyfarthing-dist/scripts/core/phase-check-start.sh +95 -0
  104. package/pennyfarthing-dist/scripts/git/release.sh +3 -2
  105. package/pennyfarthing-dist/scripts/health/drift-detection.sh +162 -0
  106. package/pennyfarthing-dist/scripts/hooks/bell-mode-hook.sh +87 -0
  107. package/pennyfarthing-dist/scripts/jira/create-jira-epic.sh +1 -1
  108. package/pennyfarthing-dist/scripts/misc/deploy.sh +1 -1
  109. package/pennyfarthing-dist/scripts/misc/statusline.sh +25 -32
  110. package/pennyfarthing-dist/scripts/sprint/import-epic-to-future.mjs +377 -0
  111. package/pennyfarthing-dist/scripts/sprint/import-epic-to-future.sh +9 -0
  112. package/pennyfarthing-dist/scripts/theme/compute-theme-tiers.js +492 -0
  113. package/pennyfarthing-dist/scripts/theme/compute-theme-tiers.sh +8 -200
  114. package/pennyfarthing-dist/scripts/workflow/list-workflows.sh +38 -5
  115. package/pennyfarthing-dist/scripts/workflow/phase-owner.sh +40 -0
  116. package/pennyfarthing-dist/skills/theme-creation/SKILL.md +12 -7
  117. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-04-final-validation.md +11 -3
  118. package/pennyfarthing-dist/workflows/epics-and-stories/steps/step-05-import-to-future.md +122 -0
  119. package/pennyfarthing-dist/workflows/epics-and-stories/workflow.yaml +3 -2
  120. package/packages/core/dist/workflow/generic-handoff.d.ts +0 -281
  121. package/packages/core/dist/workflow/generic-handoff.d.ts.map +0 -1
  122. package/packages/core/dist/workflow/generic-handoff.js +0 -411
  123. package/packages/core/dist/workflow/generic-handoff.js.map +0 -1
  124. package/packages/core/dist/workflow/generic-handoff.test.d.ts +0 -21
  125. package/packages/core/dist/workflow/generic-handoff.test.d.ts.map +0 -1
  126. package/packages/core/dist/workflow/generic-handoff.test.js +0 -499
  127. package/packages/core/dist/workflow/generic-handoff.test.js.map +0 -1
@@ -0,0 +1,492 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * compute-theme-tiers.js - Compute tier rankings from job-fair results
4
+ *
5
+ * Reads all summary.yaml files from internal/results/job-fair/
6
+ * For each theme, extracts character×role scores from the matrix
7
+ * Normalizes across formats, then computes delta vs baseline
8
+ * Assigns tier based on overall performance vs control baseline
9
+ *
10
+ * KEY DESIGN DECISIONS:
11
+ * 1. Normalizes dev roles: averages dev-codegen + dev-debug into synthetic "dev"
12
+ * to enable fair comparison across old 4-role and new 6-role formats.
13
+ * Final comparison uses: dev, reviewer, sm, tea (4 roles)
14
+ * 2. Uses the MOST COMPLETE run for each theme (most matrix entries),
15
+ * not the most recent. This prevents incomplete runs from overriding good data.
16
+ *
17
+ * Tier criteria (calibrated for actual delta distribution):
18
+ * S: delta >= +7 (elite - top performers)
19
+ * A: delta >= +5 (excellent - strong positive)
20
+ * B: delta >= +3 (strong - solid performers)
21
+ * C: delta >= +1 (good - above average)
22
+ * D: delta < +1 (average/below)
23
+ * U: no data (unbenchmarked)
24
+ *
25
+ * Usage:
26
+ * compute-theme-tiers.js [--dry-run] [--verbose] [--min-entries N]
27
+ */
28
+
29
+ import { readdirSync, readFileSync, writeFileSync, existsSync } from 'fs';
30
+ import { join, dirname, basename } from 'path';
31
+ import { fileURLToPath } from 'url';
32
+ import { execSync } from 'child_process';
33
+
34
+ const __filename = fileURLToPath(import.meta.url);
35
+ const __dirname = dirname(__filename);
36
+ const PROJECT_ROOT = join(__dirname, '..', '..');
37
+ const JOB_FAIR_DIR = join(PROJECT_ROOT, '..', 'internal', 'results', 'job-fair');
38
+ const THEMES_DIR = join(PROJECT_ROOT, 'personas', 'themes');
39
+
40
+ // Default minimum entries for a run to be considered complete
41
+ const DEFAULT_MIN_ENTRIES = 20;
42
+
43
+ // Normalized roles for fair comparison across old (4-role) and new (6-role) formats:
44
+ // - Old format: dev, reviewer, sm, tea
45
+ // - New format: architect, dev-codegen, dev-debug, reviewer, sm, tea
46
+ //
47
+ // Strategy: Average dev-codegen + dev-debug into synthetic "dev" score, giving us
48
+ // 4 comparable roles: dev, reviewer, sm, tea
49
+ const NORMALIZED_ROLES = new Set(['dev', 'reviewer', 'sm', 'tea']);
50
+ const DEV_SUBROLES = ['dev-codegen', 'dev-debug'];
51
+
52
+ // Tier thresholds (calibrated for actual delta distribution)
53
+ const TIER_THRESHOLDS = {
54
+ S: 7, // delta >= +7 (elite - top performers)
55
+ A: 5, // delta >= +5 (excellent - strong positive)
56
+ B: 3, // delta >= +3 (strong - solid performers)
57
+ C: 1, // delta >= +1 (good - above average)
58
+ // D: below +1 (average/below)
59
+ };
60
+
61
+ function parseArgs(argv) {
62
+ const args = {
63
+ dryRun: false,
64
+ verbose: false,
65
+ minEntries: DEFAULT_MIN_ENTRIES,
66
+ };
67
+
68
+ let i = 2;
69
+ while (i < argv.length) {
70
+ const arg = argv[i];
71
+ switch (arg) {
72
+ case '--dry-run':
73
+ args.dryRun = true;
74
+ break;
75
+ case '--verbose':
76
+ args.verbose = true;
77
+ break;
78
+ case '--min-entries':
79
+ args.minEntries = parseInt(argv[++i], 10);
80
+ break;
81
+ case '--help':
82
+ case '-h':
83
+ showUsage();
84
+ process.exit(0);
85
+ }
86
+ i++;
87
+ }
88
+ return args;
89
+ }
90
+
91
+ function showUsage() {
92
+ console.log(`Usage: compute-theme-tiers.js [OPTIONS]
93
+
94
+ Options:
95
+ --dry-run Output changes without writing to theme files
96
+ --verbose Show detailed output including skipped runs
97
+ --min-entries N Minimum matrix entries for a run to be complete (default: ${DEFAULT_MIN_ENTRIES})
98
+ --help, -h Show this help message
99
+
100
+ Normalization:
101
+ Averages dev-codegen + dev-debug into synthetic "dev" score.
102
+ Final comparison uses 4 roles: dev, reviewer, sm, tea.
103
+
104
+ Tier Criteria (based on mean delta from control on common roles):
105
+ S: delta >= +7 (elite - top performers)
106
+ A: delta >= +5 (excellent - strong positive)
107
+ B: delta >= +3 (strong - solid performers)
108
+ C: delta >= +1 (good - above average)
109
+ D: delta < +1 (average/below)
110
+ U: no data (unbenchmarked)`);
111
+ }
112
+
113
+ /**
114
+ * Extract YAML field using yq
115
+ */
116
+ function yqGet(filePath, field) {
117
+ try {
118
+ const result = execSync(`yq -r '${field}' "${filePath}"`, {
119
+ encoding: 'utf-8',
120
+ stdio: ['pipe', 'pipe', 'pipe'],
121
+ }).trim();
122
+ return result === 'null' ? null : result;
123
+ } catch {
124
+ return null;
125
+ }
126
+ }
127
+
128
+ /**
129
+ * Parse baselines from summary.yaml
130
+ * Returns: { role: { mean, std, n } }
131
+ */
132
+ function parseBaselines(filePath) {
133
+ try {
134
+ const raw = execSync(`yq -o=json '.baselines' "${filePath}"`, {
135
+ encoding: 'utf-8',
136
+ stdio: ['pipe', 'pipe', 'pipe'],
137
+ });
138
+ return JSON.parse(raw);
139
+ } catch {
140
+ return null;
141
+ }
142
+ }
143
+
144
+ /**
145
+ * Count matrix entries by grep (handles duplicate YAML keys)
146
+ * Counts lines matching "mean:" within the matrix section
147
+ */
148
+ function countMatrixEntries(filePath) {
149
+ try {
150
+ // Count "mean:" lines after "matrix:" line, excluding baselines section
151
+ const result = execSync(
152
+ `awk '/^matrix:/,0 { if (/mean:/) count++ } END { print count }' "${filePath}"`,
153
+ { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }
154
+ ).trim();
155
+ return parseInt(result, 10) || 0;
156
+ } catch {
157
+ return 0;
158
+ }
159
+ }
160
+
161
+ /**
162
+ * Extract all scores from matrix section using yq (handles duplicate keys)
163
+ * Returns: [{ character, role, mean, n }]
164
+ */
165
+ function parseMatrixScores(filePath) {
166
+ try {
167
+ // Use yq to iterate through matrix entries - handles duplicates
168
+ const raw = execSync(
169
+ `yq '.matrix | to_entries | .[] | .key as $char | .value | to_entries | .[] | [$char, .key, .value.mean, .value.n] | @csv' "${filePath}"`,
170
+ { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }
171
+ );
172
+
173
+ const scores = [];
174
+ for (const line of raw.trim().split('\n')) {
175
+ if (!line) continue;
176
+ // Parse CSV: character,role,mean,n (unquoted or quoted)
177
+ // Handle both: death,dev,70.00,1 and "death","dev",70.00,1
178
+ const parts = line.split(',');
179
+ if (parts.length >= 4) {
180
+ const character = parts[0].replace(/^"|"$/g, '');
181
+ const role = parts[1].replace(/^"|"$/g, '');
182
+ const mean = parseFloat(parts[2]);
183
+ const n = parseInt(parts[3], 10);
184
+ if (!isNaN(mean) && !isNaN(n)) {
185
+ scores.push({ character, role, mean, n });
186
+ }
187
+ }
188
+ }
189
+ return scores;
190
+ } catch {
191
+ return [];
192
+ }
193
+ }
194
+
195
+ /**
196
+ * Normalize baselines: average dev-codegen + dev-debug into synthetic "dev"
197
+ */
198
+ function normalizeBaselines(baselines) {
199
+ if (!baselines) return null;
200
+
201
+ const normalized = { ...baselines };
202
+
203
+ // If we have dev-codegen and dev-debug but no dev, create synthetic dev
204
+ if (!normalized.dev && normalized['dev-codegen'] && normalized['dev-debug']) {
205
+ const codegen = normalized['dev-codegen'];
206
+ const debug = normalized['dev-debug'];
207
+ normalized.dev = {
208
+ mean: (codegen.mean + debug.mean) / 2,
209
+ std: Math.sqrt((codegen.std ** 2 + debug.std ** 2) / 2), // pooled std approximation
210
+ n: codegen.n + debug.n,
211
+ };
212
+ }
213
+
214
+ return normalized;
215
+ }
216
+
217
+ /**
218
+ * Compute delta vs baselines for a job-fair run
219
+ * Returns: { meanDelta, meanScore, nRoles, roleDeltas }
220
+ *
221
+ * Normalizes dev-codegen + dev-debug into synthetic "dev" for fair comparison
222
+ * across old (4-role) and new (6-role) benchmark formats.
223
+ */
224
+ function computeDeltas(baselines, matrixScores) {
225
+ if (!baselines || !matrixScores || matrixScores.length === 0) return null;
226
+
227
+ // Normalize baselines (average dev-codegen + dev-debug into dev)
228
+ const normalizedBaselines = normalizeBaselines(baselines);
229
+
230
+ // Aggregate scores by role
231
+ // First pass: collect raw scores including dev subroles
232
+ const rawScores = {};
233
+ for (const { role, mean } of matrixScores) {
234
+ if (typeof mean !== 'number') continue;
235
+ if (!rawScores[role]) {
236
+ rawScores[role] = { sum: 0, count: 0 };
237
+ }
238
+ rawScores[role].sum += mean;
239
+ rawScores[role].count++;
240
+ }
241
+
242
+ // Second pass: normalize dev subroles into synthetic "dev"
243
+ const roleScores = {};
244
+ for (const [role, scores] of Object.entries(rawScores)) {
245
+ if (DEV_SUBROLES.includes(role)) {
246
+ // Accumulate dev subroles into synthetic "dev"
247
+ if (!roleScores.dev) {
248
+ roleScores.dev = { sum: 0, count: 0 };
249
+ }
250
+ roleScores.dev.sum += scores.sum;
251
+ roleScores.dev.count += scores.count;
252
+ } else if (NORMALIZED_ROLES.has(role)) {
253
+ roleScores[role] = scores;
254
+ }
255
+ // Skip roles not in NORMALIZED_ROLES (e.g., architect)
256
+ }
257
+
258
+ // Compute deltas vs normalized baselines
259
+ const roleDeltas = {};
260
+ let totalDelta = 0;
261
+ let totalScore = 0;
262
+ let nRoles = 0;
263
+
264
+ for (const [role, scores] of Object.entries(roleScores)) {
265
+ const baseline = normalizedBaselines[role];
266
+ if (!baseline || typeof baseline.mean !== 'number') continue;
267
+
268
+ const roleMean = scores.sum / scores.count;
269
+ const delta = roleMean - baseline.mean;
270
+
271
+ roleDeltas[role] = {
272
+ mean: roleMean,
273
+ baseline: baseline.mean,
274
+ delta,
275
+ n: scores.count,
276
+ };
277
+
278
+ totalDelta += delta;
279
+ totalScore += roleMean;
280
+ nRoles++;
281
+ }
282
+
283
+ if (nRoles === 0) return null;
284
+
285
+ return {
286
+ meanDelta: totalDelta / nRoles,
287
+ meanScore: totalScore / nRoles,
288
+ nRoles,
289
+ roleDeltas,
290
+ };
291
+ }
292
+
293
+ /**
294
+ * Assign tier based on mean delta
295
+ */
296
+ function assignTier(meanDelta) {
297
+ if (meanDelta >= TIER_THRESHOLDS.S) return 'S';
298
+ if (meanDelta >= TIER_THRESHOLDS.A) return 'A';
299
+ if (meanDelta >= TIER_THRESHOLDS.B) return 'B';
300
+ if (meanDelta >= TIER_THRESHOLDS.C) return 'C';
301
+ return 'D';
302
+ }
303
+
304
+ /**
305
+ * Find all job-fair summary files
306
+ */
307
+ function findSummaryFiles() {
308
+ if (!existsSync(JOB_FAIR_DIR)) {
309
+ console.error(`Error: Job fair directory not found: ${JOB_FAIR_DIR}`);
310
+ process.exit(1);
311
+ }
312
+
313
+ const files = [];
314
+ for (const entry of readdirSync(JOB_FAIR_DIR, { withFileTypes: true })) {
315
+ if (!entry.isDirectory()) continue;
316
+ const summaryPath = join(JOB_FAIR_DIR, entry.name, 'summary.yaml');
317
+ if (existsSync(summaryPath)) {
318
+ files.push({
319
+ path: summaryPath,
320
+ runName: entry.name,
321
+ });
322
+ }
323
+ }
324
+ return files.sort((a, b) => a.runName.localeCompare(b.runName));
325
+ }
326
+
327
+ /**
328
+ * Update tier in theme file
329
+ */
330
+ function updateThemeTier(themeName, newTier, dryRun) {
331
+ const themeFile = join(THEMES_DIR, `${themeName}.yaml`);
332
+ if (!existsSync(themeFile)) {
333
+ return { updated: false, reason: 'file not found' };
334
+ }
335
+
336
+ const content = readFileSync(themeFile, 'utf-8');
337
+ const tierMatch = content.match(/^(\s+tier:\s*)(\S+)/m);
338
+
339
+ if (!tierMatch) {
340
+ return { updated: false, reason: 'no tier field', currentTier: 'U' };
341
+ }
342
+
343
+ const currentTier = tierMatch[2];
344
+ if (currentTier === newTier) {
345
+ return { updated: false, reason: 'unchanged', currentTier };
346
+ }
347
+
348
+ if (!dryRun) {
349
+ const newContent = content.replace(/^(\s+tier:\s*)\S+/m, `$1${newTier}`);
350
+ writeFileSync(themeFile, newContent);
351
+ }
352
+
353
+ return { updated: true, currentTier, newTier };
354
+ }
355
+
356
+ /**
357
+ * Main execution
358
+ */
359
+ function main() {
360
+ const args = parseArgs(process.argv);
361
+
362
+ if (args.dryRun) {
363
+ console.log('DRY RUN - no changes will be made\n');
364
+ }
365
+
366
+ console.log('Configuration:');
367
+ console.log(` Minimum entries for complete run: ${args.minEntries}`);
368
+ console.log(` Normalized roles: ${[...NORMALIZED_ROLES].join(', ')}`);
369
+ console.log(` Dev subroles (averaged): ${DEV_SUBROLES.join(' + ')} → dev`);
370
+ console.log(` Job fair directory: ${JOB_FAIR_DIR}`);
371
+ console.log('');
372
+
373
+ // Find all summary files
374
+ const summaryFiles = findSummaryFiles();
375
+ console.log(`Scanning ${summaryFiles.length} job-fair runs...\n`);
376
+
377
+ // Process each run and collect best run per theme
378
+ const themeRuns = {}; // theme -> { bestRun, entries, data }
379
+ const skippedRuns = [];
380
+
381
+ for (const { path, runName } of summaryFiles) {
382
+ const theme = yqGet(path, '.theme');
383
+ if (!theme) continue;
384
+
385
+ const entries = countMatrixEntries(path);
386
+
387
+ // Skip incomplete runs
388
+ if (entries < args.minEntries) {
389
+ skippedRuns.push({ theme, runName, entries, reason: 'incomplete' });
390
+ continue;
391
+ }
392
+
393
+ const baselines = parseBaselines(path);
394
+ const matrixScores = parseMatrixScores(path);
395
+
396
+ const deltas = computeDeltas(baselines, matrixScores);
397
+ if (!deltas) {
398
+ skippedRuns.push({ theme, runName, entries, reason: 'no valid deltas' });
399
+ continue;
400
+ }
401
+
402
+ // Keep the most complete run for each theme
403
+ if (!themeRuns[theme] || entries > themeRuns[theme].entries) {
404
+ themeRuns[theme] = {
405
+ runName,
406
+ entries,
407
+ ...deltas,
408
+ };
409
+ }
410
+ }
411
+
412
+ // Show skipped runs in verbose mode
413
+ if (args.verbose && skippedRuns.length > 0) {
414
+ console.log('Skipped Runs (incomplete or invalid):');
415
+ for (const { theme, runName, entries, reason } of skippedRuns) {
416
+ console.log(` ${theme}: ${runName} (${entries} entries) - ${reason}`);
417
+ }
418
+ console.log('');
419
+ }
420
+
421
+ // Sort themes by delta (best first)
422
+ const sortedThemes = Object.entries(themeRuns)
423
+ .map(([theme, data]) => ({ theme, ...data }))
424
+ .sort((a, b) => b.meanDelta - a.meanDelta);
425
+
426
+ // Print results
427
+ console.log('Theme Performance Summary');
428
+ console.log('='.repeat(70));
429
+ console.log('');
430
+ console.log(
431
+ 'Theme'.padEnd(28) +
432
+ 'Entries'.padStart(8) +
433
+ 'Mean'.padStart(8) +
434
+ 'Delta'.padStart(10) +
435
+ 'Tier'.padStart(6) +
436
+ (args.verbose ? ' Source Run' : '')
437
+ );
438
+ console.log('-'.repeat(70));
439
+
440
+ let updated = 0;
441
+ let unchanged = 0;
442
+ const tierCounts = { S: 0, A: 0, B: 0, C: 0, D: 0 };
443
+
444
+ for (const { theme, runName, entries, meanScore, meanDelta } of sortedThemes) {
445
+ const tier = assignTier(meanDelta);
446
+ tierCounts[tier]++;
447
+
448
+ const deltaStr = (meanDelta >= 0 ? '+' : '') + meanDelta.toFixed(2);
449
+ console.log(
450
+ theme.padEnd(28) +
451
+ entries.toString().padStart(8) +
452
+ meanScore.toFixed(2).padStart(8) +
453
+ deltaStr.padStart(10) +
454
+ tier.padStart(6) +
455
+ (args.verbose ? ` ${runName}` : '')
456
+ );
457
+
458
+ // Update theme file
459
+ const result = updateThemeTier(theme, tier, args.dryRun);
460
+ if (result.updated) {
461
+ updated++;
462
+ if (args.verbose) {
463
+ console.log(` → Updated: ${result.currentTier} → ${result.newTier}`);
464
+ }
465
+ } else {
466
+ unchanged++;
467
+ }
468
+ }
469
+
470
+ console.log('');
471
+ console.log('Tier Distribution:');
472
+ for (const tier of ['S', 'A', 'B', 'C', 'D']) {
473
+ console.log(` ${tier}: ${tierCounts[tier]} themes`);
474
+ }
475
+
476
+ // Count unbenchmarked themes
477
+ const allThemes = readdirSync(THEMES_DIR)
478
+ .filter(f => f.endsWith('.yaml'))
479
+ .map(f => f.replace('.yaml', ''));
480
+ const benchmarkedThemes = new Set(Object.keys(themeRuns));
481
+ const unbenchmarked = allThemes.filter(t => !benchmarkedThemes.has(t));
482
+ console.log(` U: ${unbenchmarked.length} themes (unbenchmarked)`);
483
+
484
+ if (args.verbose && unbenchmarked.length > 0) {
485
+ console.log(` ${unbenchmarked.slice(0, 10).join(', ')}${unbenchmarked.length > 10 ? '...' : ''}`);
486
+ }
487
+
488
+ console.log('');
489
+ console.log(`Summary: ${updated} updated, ${unchanged} unchanged`);
490
+ }
491
+
492
+ main();
@@ -1,203 +1,11 @@
1
- #!/usr/bin/env bash
2
- # compute-theme-tiers.sh - Compute tier rankings from job-fair results and update theme files
1
+ #!/bin/bash
2
+ # compute-theme-tiers.sh - Shell wrapper for compute-theme-tiers.js
3
+ # Computes tier rankings from job-fair results and updates theme files
3
4
  #
4
- # Usage: compute-theme-tiers.sh [--dry-run] [--verbose]
5
+ # Uses the MOST COMPLETE run for each theme (most matrix entries),
6
+ # not the most recent. This prevents incomplete runs from overriding good data.
5
7
  #
6
- # Reads all summary.yaml files from internal/results/job-fair/
7
- # For each theme, extracts all character×role scores from the matrix
8
- # Computes delta vs baseline for each role, then averages across all roles
9
- # Assigns tier based on overall performance vs control baseline
10
- #
11
- # Tier criteria (based on mean delta from control):
12
- # S: delta >= +10 (elite - consistently beats control)
13
- # A: delta >= 0 (excellent - matches or beats control)
14
- # B: delta >= -10 (strong - slightly below control)
15
- # C: delta >= -20 (good - notably below control)
16
- # D: delta < -20 (below average)
17
- # U: no data (unbenchmarked)
18
-
19
- set -euo pipefail
20
-
21
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
22
- PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
23
-
24
- JOB_FAIR_DIR="$PROJECT_ROOT/internal/results/job-fair"
25
- THEMES_DIR="$PROJECT_ROOT/pennyfarthing-dist/personas/themes"
26
-
27
- DRY_RUN=false
28
- VERBOSE=false
29
-
30
- for arg in "$@"; do
31
- case $arg in
32
- --dry-run) DRY_RUN=true ;;
33
- --verbose) VERBOSE=true ;;
34
- esac
35
- done
36
-
37
- if [[ "$DRY_RUN" == "true" ]]; then
38
- echo "DRY RUN - no changes will be made"
39
- echo ""
40
- fi
41
-
42
- # Check dependencies
43
- if ! command -v yq &> /dev/null; then
44
- echo "Error: yq is required but not installed"
45
- exit 1
46
- fi
47
-
48
- # Temporary files for aggregation
49
- THEME_DATA=$(mktemp)
50
- THEME_AGG=$(mktemp)
51
- trap "rm -f $THEME_DATA $THEME_AGG" EXIT
52
-
53
- # Process all job-fair summary.yaml files
54
- # Extract theme name, compute mean score and delta vs baselines
55
- echo "Scanning job-fair results..."
56
-
57
- find "$JOB_FAIR_DIR" -name "summary.yaml" -type f | while read -r f; do
58
- theme=$(yq '.theme' "$f" 2>/dev/null | grep -v "^null$" || true)
59
- [[ -z "$theme" ]] && continue
60
-
61
- # Extract baselines as "role:mean" pairs
62
- baselines=$(yq '.baselines | to_entries | .[] | .key + ":" + (.value.mean | tostring)' "$f" 2>/dev/null || true)
63
- [[ -z "$baselines" ]] && continue
64
-
65
- # Extract all matrix scores as "role:mean" pairs (flatten character dimension)
66
- # Matrix format: character: { role: {mean: X, n: Y} }
67
- scores=$(yq '.matrix | to_entries | .[] | .value | to_entries | .[] | .key + ":" + (.value.mean | tostring)' "$f" 2>/dev/null || true)
68
- [[ -z "$scores" ]] && continue
69
-
70
- # Use awk to compute per-role averages and deltas, then overall mean
71
- echo "$theme" "$(echo "$baselines" | tr '\n' '|')" "$(echo "$scores" | tr '\n' '|')"
72
- done | awk '
73
- {
74
- theme = $1
75
-
76
- # Parse baselines (field 2)
77
- n_base = split($2, base_pairs, "|")
78
- for (i = 1; i <= n_base; i++) {
79
- if (base_pairs[i] == "") continue
80
- split(base_pairs[i], kv, ":")
81
- baseline[kv[1]] = kv[2]
82
- }
83
-
84
- # Parse scores (field 3) and accumulate by role
85
- delete role_sum
86
- delete role_count
87
- n_scores = split($3, score_pairs, "|")
88
- for (i = 1; i <= n_scores; i++) {
89
- if (score_pairs[i] == "") continue
90
- split(score_pairs[i], kv, ":")
91
- role = kv[1]
92
- score = kv[2]
93
- role_sum[role] += score
94
- role_count[role]++
95
- }
96
-
97
- # Compute delta for each role, then average
98
- total_delta = 0
99
- total_score = 0
100
- n_roles = 0
101
- for (role in role_sum) {
102
- role_mean = role_sum[role] / role_count[role]
103
- if (role in baseline) {
104
- delta = role_mean - baseline[role]
105
- total_delta += delta
106
- total_score += role_mean
107
- n_roles++
108
- }
109
- }
110
-
111
- if (n_roles > 0) {
112
- mean_delta = total_delta / n_roles
113
- mean_score = total_score / n_roles
114
- print theme, n_scores, mean_score, mean_delta
115
- }
116
- }
117
- ' > "$THEME_DATA"
118
-
119
- summary_count=$(wc -l < "$THEME_DATA" | tr -d ' ')
120
- echo "Found $summary_count themes with job-fair results"
121
- echo ""
122
-
123
- # Aggregate by theme (in case multiple job-fair runs exist for same theme)
124
- # Take the most recent (last) result for each theme
125
- awk '
126
- {
127
- theme = $1
128
- n = $2
129
- score = $3
130
- delta = $4
131
- # Keep last occurrence (most recent)
132
- data[theme] = n "|" score "|" delta
133
- }
134
- END {
135
- for (theme in data) {
136
- split(data[theme], parts, "|")
137
- n = parts[1]
138
- score = parts[2]
139
- delta = parts[3]
140
-
141
- # Assign tier based on mean delta
142
- if (delta >= 10) tier = "S"
143
- else if (delta >= 0) tier = "A"
144
- else if (delta >= -10) tier = "B"
145
- else if (delta >= -20) tier = "C"
146
- else tier = "D"
147
-
148
- printf "%s|%d|%.2f|%.2f|%s\n", theme, n, score, delta, tier
149
- }
150
- }
151
- ' "$THEME_DATA" | sort -t'|' -k4 -rn > "$THEME_AGG"
152
-
153
- # Print results
154
- echo "Theme Performance Summary"
155
- echo "========================="
156
- echo ""
157
- printf "%-25s %6s %8s %8s %4s\n" "Theme" "Runs" "Mean" "Delta" "Tier"
158
- printf "%-25s %6s %8s %8s %4s\n" "-------------------------" "------" "--------" "--------" "----"
159
-
160
- updated=0
161
- unchanged=0
162
-
163
- while IFS='|' read -r theme n score delta tier; do
164
- printf "%-25s %6d %8.2f %+8.2f %4s\n" "$theme" "$n" "$score" "$delta" "$tier"
165
-
166
- # Update theme file
167
- theme_file="$THEMES_DIR/${theme}.yaml"
168
- if [[ -f "$theme_file" ]]; then
169
- current_tier=$(grep -E "^ tier:" "$theme_file" 2>/dev/null | sed 's/.*tier:[[:space:]]*//' || echo "U")
170
-
171
- if [[ "$current_tier" != "$tier" ]]; then
172
- if [[ "$DRY_RUN" == "true" ]]; then
173
- [[ "$VERBOSE" == "true" ]] && echo " Would update: $current_tier → $tier"
174
- else
175
- sed -i '' "s/^ tier:.*/ tier: $tier/" "$theme_file"
176
- [[ "$VERBOSE" == "true" ]] && echo " Updated: $current_tier → $tier"
177
- fi
178
- updated=$((updated + 1))
179
- else
180
- unchanged=$((unchanged + 1))
181
- fi
182
- else
183
- [[ "$VERBOSE" == "true" ]] && echo " Warning: No theme file for $theme"
184
- fi
185
- done < "$THEME_AGG"
186
-
187
- echo ""
188
-
189
- # Count themes by tier
190
- echo "Tier Distribution:"
191
- for t in S A B C D; do
192
- count=$(grep -c "|$t$" "$THEME_AGG" 2>/dev/null || echo 0)
193
- echo " $t: $count themes"
194
- done
195
-
196
- # Count unbenchmarked themes
197
- benchmarked=$(wc -l < "$THEME_AGG" | tr -d ' ')
198
- total_themes=$(ls "$THEMES_DIR"/*.yaml 2>/dev/null | wc -l | tr -d ' ')
199
- unbenchmarked=$((total_themes - benchmarked))
200
- echo " U: $unbenchmarked themes (unbenchmarked)"
8
+ # All logic is implemented in compute-theme-tiers.js (Node.js)
201
9
 
202
- echo ""
203
- echo "Summary: $updated updated, $unchanged unchanged"
10
+ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
11
+ exec node "$SCRIPT_DIR/compute-theme-tiers.js" "$@"