@pennyfarthing/core 7.7.0 → 7.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/packages/core/dist/cli/commands/doctor.d.ts.map +1 -1
  4. package/packages/core/dist/cli/commands/doctor.js +114 -0
  5. package/packages/core/dist/cli/commands/doctor.js.map +1 -1
  6. package/pennyfarthing-dist/agents/sm-setup.md +37 -2
  7. package/pennyfarthing-dist/agents/sm.md +68 -22
  8. package/pennyfarthing-dist/agents/workflow-status-check.md +11 -1
  9. package/pennyfarthing-dist/commands/git-cleanup.md +43 -308
  10. package/pennyfarthing-dist/commands/solo.md +31 -0
  11. package/pennyfarthing-dist/guides/patterns/approval-gates-pattern.md +1 -1
  12. package/pennyfarthing-dist/personas/themes/gilligans-island.yaml +83 -83
  13. package/pennyfarthing-dist/personas/themes/the-expanse.yaml +11 -11
  14. package/pennyfarthing-dist/scripts/core/check-context.sh +3 -0
  15. package/pennyfarthing-dist/scripts/core/handoff-marker.sh +13 -2
  16. package/pennyfarthing-dist/scripts/core/prime.sh +3 -157
  17. package/pennyfarthing-dist/scripts/core/run.sh +9 -0
  18. package/pennyfarthing-dist/scripts/hooks/__pycache__/question_reflector_check.cpython-314.pyc +0 -0
  19. package/pennyfarthing-dist/scripts/hooks/question_reflector_check.py +117 -20
  20. package/pennyfarthing-dist/scripts/jira/README.md +10 -7
  21. package/pennyfarthing-dist/scripts/misc/add-short-names.sh +13 -0
  22. package/pennyfarthing-dist/scripts/misc/add_short_names.py +226 -0
  23. package/pennyfarthing-dist/scripts/misc/migrate-bmad-workflow.sh +6 -5
  24. package/pennyfarthing-dist/scripts/misc/migrate_bmad_workflow.py +319 -0
  25. package/pennyfarthing-dist/scripts/sprint/import-epic-to-future.sh +6 -5
  26. package/pennyfarthing-dist/scripts/sprint/import_epic_to_future.py +270 -0
  27. package/pennyfarthing-dist/scripts/test/ensure-swebench-data.sh +59 -0
  28. package/pennyfarthing-dist/scripts/theme/compute-theme-tiers.sh +8 -6
  29. package/pennyfarthing-dist/scripts/theme/compute_theme_tiers.py +402 -0
  30. package/pennyfarthing-dist/scripts/workflow/check.sh +3 -476
  31. package/pennyfarthing-dist/scripts/workflow/get-workflow-type.py +61 -0
  32. package/pennyfarthing-dist/scripts/workflow/get-workflow-type.sh +13 -0
  33. package/pennyfarthing-dist/skills/judge/SKILL.md +57 -0
  34. package/pennyfarthing-dist/skills/sprint/scripts/sync-epic-jira.sh +4 -22
  35. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-01-analyze.md +83 -0
  36. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-02-categorize.md +116 -0
  37. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-03-execute.md +210 -0
  38. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-04-verify.md +88 -0
  39. package/pennyfarthing-dist/workflows/git-cleanup/steps/step-05-complete.md +71 -0
  40. package/pennyfarthing-dist/workflows/git-cleanup.yaml +59 -0
  41. package/pennyfarthing-dist/scripts/hooks/question-reflector-check.mjs +0 -393
  42. package/pennyfarthing-dist/scripts/hooks/tests/question-reflector.test.mjs +0 -545
  43. package/pennyfarthing-dist/scripts/jira/jira-bidirectional-sync.mjs +0 -327
  44. package/pennyfarthing-dist/scripts/jira/jira-bidirectional-sync.test.mjs +0 -503
  45. package/pennyfarthing-dist/scripts/jira/jira-lib.mjs +0 -443
  46. package/pennyfarthing-dist/scripts/jira/jira-sync-story.mjs +0 -208
  47. package/pennyfarthing-dist/scripts/jira/jira-sync.mjs +0 -198
  48. package/pennyfarthing-dist/scripts/misc/add-short-names.mjs +0 -264
  49. package/pennyfarthing-dist/scripts/misc/migrate-bmad-workflow.mjs +0 -474
  50. package/pennyfarthing-dist/scripts/sprint/import-epic-to-future.mjs +0 -377
  51. package/pennyfarthing-dist/scripts/theme/compute-theme-tiers.js +0 -492
  52. /package/pennyfarthing-dist/guides/{AGENT-COORDINATION.md → agent-coordination.md} +0 -0
  53. /package/pennyfarthing-dist/guides/{HOOKS.md → hooks.md} +0 -0
  54. /package/pennyfarthing-dist/guides/{PROMPT-PATTERNS.md → prompt-patterns.md} +0 -0
  55. /package/pennyfarthing-dist/guides/{SESSION-ARTIFACTS.md → session-artifacts.md} +0 -0
  56. /package/pennyfarthing-dist/guides/{XML-TAGS.md → xml-tags.md} +0 -0
@@ -1,492 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * compute-theme-tiers.js - Compute tier rankings from job-fair results
4
- *
5
- * Reads all summary.yaml files from internal/results/job-fair/
6
- * For each theme, extracts character×role scores from the matrix
7
- * Normalizes across formats, then computes delta vs baseline
8
- * Assigns tier based on overall performance vs control baseline
9
- *
10
- * KEY DESIGN DECISIONS:
11
- * 1. Normalizes dev roles: averages dev-codegen + dev-debug into synthetic "dev"
12
- * to enable fair comparison across old 4-role and new 6-role formats.
13
- * Final comparison uses: dev, reviewer, sm, tea (4 roles)
14
- * 2. Uses the MOST COMPLETE run for each theme (most matrix entries),
15
- * not the most recent. This prevents incomplete runs from overriding good data.
16
- *
17
- * Tier criteria (calibrated for actual delta distribution):
18
- * S: delta >= +7 (elite - top performers)
19
- * A: delta >= +5 (excellent - strong positive)
20
- * B: delta >= +3 (strong - solid performers)
21
- * C: delta >= +1 (good - above average)
22
- * D: delta < +1 (average/below)
23
- * U: no data (unbenchmarked)
24
- *
25
- * Usage:
26
- * compute-theme-tiers.js [--dry-run] [--verbose] [--min-entries N]
27
- */
28
-
29
- import { readdirSync, readFileSync, writeFileSync, existsSync } from 'fs';
30
- import { join, dirname, basename } from 'path';
31
- import { fileURLToPath } from 'url';
32
- import { execSync } from 'child_process';
33
-
34
- const __filename = fileURLToPath(import.meta.url);
35
- const __dirname = dirname(__filename);
36
- const PROJECT_ROOT = join(__dirname, '..', '..');
37
- const JOB_FAIR_DIR = join(PROJECT_ROOT, '..', 'internal', 'results', 'job-fair');
38
- const THEMES_DIR = join(PROJECT_ROOT, 'personas', 'themes');
39
-
40
- // Default minimum entries for a run to be considered complete
41
- const DEFAULT_MIN_ENTRIES = 20;
42
-
43
- // Normalized roles for fair comparison across old (4-role) and new (6-role) formats:
44
- // - Old format: dev, reviewer, sm, tea
45
- // - New format: architect, dev-codegen, dev-debug, reviewer, sm, tea
46
- //
47
- // Strategy: Average dev-codegen + dev-debug into synthetic "dev" score, giving us
48
- // 4 comparable roles: dev, reviewer, sm, tea
49
- const NORMALIZED_ROLES = new Set(['dev', 'reviewer', 'sm', 'tea']);
50
- const DEV_SUBROLES = ['dev-codegen', 'dev-debug'];
51
-
52
- // Tier thresholds (calibrated for actual delta distribution)
53
- const TIER_THRESHOLDS = {
54
- S: 7, // delta >= +7 (elite - top performers)
55
- A: 5, // delta >= +5 (excellent - strong positive)
56
- B: 3, // delta >= +3 (strong - solid performers)
57
- C: 1, // delta >= +1 (good - above average)
58
- // D: below +1 (average/below)
59
- };
60
-
61
- function parseArgs(argv) {
62
- const args = {
63
- dryRun: false,
64
- verbose: false,
65
- minEntries: DEFAULT_MIN_ENTRIES,
66
- };
67
-
68
- let i = 2;
69
- while (i < argv.length) {
70
- const arg = argv[i];
71
- switch (arg) {
72
- case '--dry-run':
73
- args.dryRun = true;
74
- break;
75
- case '--verbose':
76
- args.verbose = true;
77
- break;
78
- case '--min-entries':
79
- args.minEntries = parseInt(argv[++i], 10);
80
- break;
81
- case '--help':
82
- case '-h':
83
- showUsage();
84
- process.exit(0);
85
- }
86
- i++;
87
- }
88
- return args;
89
- }
90
-
91
- function showUsage() {
92
- console.log(`Usage: compute-theme-tiers.js [OPTIONS]
93
-
94
- Options:
95
- --dry-run Output changes without writing to theme files
96
- --verbose Show detailed output including skipped runs
97
- --min-entries N Minimum matrix entries for a run to be complete (default: ${DEFAULT_MIN_ENTRIES})
98
- --help, -h Show this help message
99
-
100
- Normalization:
101
- Averages dev-codegen + dev-debug into synthetic "dev" score.
102
- Final comparison uses 4 roles: dev, reviewer, sm, tea.
103
-
104
- Tier Criteria (based on mean delta from control on common roles):
105
- S: delta >= +7 (elite - top performers)
106
- A: delta >= +5 (excellent - strong positive)
107
- B: delta >= +3 (strong - solid performers)
108
- C: delta >= +1 (good - above average)
109
- D: delta < +1 (average/below)
110
- U: no data (unbenchmarked)`);
111
- }
112
-
113
- /**
114
- * Extract YAML field using yq
115
- */
116
- function yqGet(filePath, field) {
117
- try {
118
- const result = execSync(`yq -r '${field}' "${filePath}"`, {
119
- encoding: 'utf-8',
120
- stdio: ['pipe', 'pipe', 'pipe'],
121
- }).trim();
122
- return result === 'null' ? null : result;
123
- } catch {
124
- return null;
125
- }
126
- }
127
-
128
- /**
129
- * Parse baselines from summary.yaml
130
- * Returns: { role: { mean, std, n } }
131
- */
132
- function parseBaselines(filePath) {
133
- try {
134
- const raw = execSync(`yq -o=json '.baselines' "${filePath}"`, {
135
- encoding: 'utf-8',
136
- stdio: ['pipe', 'pipe', 'pipe'],
137
- });
138
- return JSON.parse(raw);
139
- } catch {
140
- return null;
141
- }
142
- }
143
-
144
- /**
145
- * Count matrix entries by grep (handles duplicate YAML keys)
146
- * Counts lines matching "mean:" within the matrix section
147
- */
148
- function countMatrixEntries(filePath) {
149
- try {
150
- // Count "mean:" lines after "matrix:" line, excluding baselines section
151
- const result = execSync(
152
- `awk '/^matrix:/,0 { if (/mean:/) count++ } END { print count }' "${filePath}"`,
153
- { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }
154
- ).trim();
155
- return parseInt(result, 10) || 0;
156
- } catch {
157
- return 0;
158
- }
159
- }
160
-
161
- /**
162
- * Extract all scores from matrix section using yq (handles duplicate keys)
163
- * Returns: [{ character, role, mean, n }]
164
- */
165
- function parseMatrixScores(filePath) {
166
- try {
167
- // Use yq to iterate through matrix entries - handles duplicates
168
- const raw = execSync(
169
- `yq '.matrix | to_entries | .[] | .key as $char | .value | to_entries | .[] | [$char, .key, .value.mean, .value.n] | @csv' "${filePath}"`,
170
- { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }
171
- );
172
-
173
- const scores = [];
174
- for (const line of raw.trim().split('\n')) {
175
- if (!line) continue;
176
- // Parse CSV: character,role,mean,n (unquoted or quoted)
177
- // Handle both: death,dev,70.00,1 and "death","dev",70.00,1
178
- const parts = line.split(',');
179
- if (parts.length >= 4) {
180
- const character = parts[0].replace(/^"|"$/g, '');
181
- const role = parts[1].replace(/^"|"$/g, '');
182
- const mean = parseFloat(parts[2]);
183
- const n = parseInt(parts[3], 10);
184
- if (!isNaN(mean) && !isNaN(n)) {
185
- scores.push({ character, role, mean, n });
186
- }
187
- }
188
- }
189
- return scores;
190
- } catch {
191
- return [];
192
- }
193
- }
194
-
195
- /**
196
- * Normalize baselines: average dev-codegen + dev-debug into synthetic "dev"
197
- */
198
- function normalizeBaselines(baselines) {
199
- if (!baselines) return null;
200
-
201
- const normalized = { ...baselines };
202
-
203
- // If we have dev-codegen and dev-debug but no dev, create synthetic dev
204
- if (!normalized.dev && normalized['dev-codegen'] && normalized['dev-debug']) {
205
- const codegen = normalized['dev-codegen'];
206
- const debug = normalized['dev-debug'];
207
- normalized.dev = {
208
- mean: (codegen.mean + debug.mean) / 2,
209
- std: Math.sqrt((codegen.std ** 2 + debug.std ** 2) / 2), // pooled std approximation
210
- n: codegen.n + debug.n,
211
- };
212
- }
213
-
214
- return normalized;
215
- }
216
-
217
- /**
218
- * Compute delta vs baselines for a job-fair run
219
- * Returns: { meanDelta, meanScore, nRoles, roleDeltas }
220
- *
221
- * Normalizes dev-codegen + dev-debug into synthetic "dev" for fair comparison
222
- * across old (4-role) and new (6-role) benchmark formats.
223
- */
224
- function computeDeltas(baselines, matrixScores) {
225
- if (!baselines || !matrixScores || matrixScores.length === 0) return null;
226
-
227
- // Normalize baselines (average dev-codegen + dev-debug into dev)
228
- const normalizedBaselines = normalizeBaselines(baselines);
229
-
230
- // Aggregate scores by role
231
- // First pass: collect raw scores including dev subroles
232
- const rawScores = {};
233
- for (const { role, mean } of matrixScores) {
234
- if (typeof mean !== 'number') continue;
235
- if (!rawScores[role]) {
236
- rawScores[role] = { sum: 0, count: 0 };
237
- }
238
- rawScores[role].sum += mean;
239
- rawScores[role].count++;
240
- }
241
-
242
- // Second pass: normalize dev subroles into synthetic "dev"
243
- const roleScores = {};
244
- for (const [role, scores] of Object.entries(rawScores)) {
245
- if (DEV_SUBROLES.includes(role)) {
246
- // Accumulate dev subroles into synthetic "dev"
247
- if (!roleScores.dev) {
248
- roleScores.dev = { sum: 0, count: 0 };
249
- }
250
- roleScores.dev.sum += scores.sum;
251
- roleScores.dev.count += scores.count;
252
- } else if (NORMALIZED_ROLES.has(role)) {
253
- roleScores[role] = scores;
254
- }
255
- // Skip roles not in NORMALIZED_ROLES (e.g., architect)
256
- }
257
-
258
- // Compute deltas vs normalized baselines
259
- const roleDeltas = {};
260
- let totalDelta = 0;
261
- let totalScore = 0;
262
- let nRoles = 0;
263
-
264
- for (const [role, scores] of Object.entries(roleScores)) {
265
- const baseline = normalizedBaselines[role];
266
- if (!baseline || typeof baseline.mean !== 'number') continue;
267
-
268
- const roleMean = scores.sum / scores.count;
269
- const delta = roleMean - baseline.mean;
270
-
271
- roleDeltas[role] = {
272
- mean: roleMean,
273
- baseline: baseline.mean,
274
- delta,
275
- n: scores.count,
276
- };
277
-
278
- totalDelta += delta;
279
- totalScore += roleMean;
280
- nRoles++;
281
- }
282
-
283
- if (nRoles === 0) return null;
284
-
285
- return {
286
- meanDelta: totalDelta / nRoles,
287
- meanScore: totalScore / nRoles,
288
- nRoles,
289
- roleDeltas,
290
- };
291
- }
292
-
293
- /**
294
- * Assign tier based on mean delta
295
- */
296
- function assignTier(meanDelta) {
297
- if (meanDelta >= TIER_THRESHOLDS.S) return 'S';
298
- if (meanDelta >= TIER_THRESHOLDS.A) return 'A';
299
- if (meanDelta >= TIER_THRESHOLDS.B) return 'B';
300
- if (meanDelta >= TIER_THRESHOLDS.C) return 'C';
301
- return 'D';
302
- }
303
-
304
- /**
305
- * Find all job-fair summary files
306
- */
307
- function findSummaryFiles() {
308
- if (!existsSync(JOB_FAIR_DIR)) {
309
- console.error(`Error: Job fair directory not found: ${JOB_FAIR_DIR}`);
310
- process.exit(1);
311
- }
312
-
313
- const files = [];
314
- for (const entry of readdirSync(JOB_FAIR_DIR, { withFileTypes: true })) {
315
- if (!entry.isDirectory()) continue;
316
- const summaryPath = join(JOB_FAIR_DIR, entry.name, 'summary.yaml');
317
- if (existsSync(summaryPath)) {
318
- files.push({
319
- path: summaryPath,
320
- runName: entry.name,
321
- });
322
- }
323
- }
324
- return files.sort((a, b) => a.runName.localeCompare(b.runName));
325
- }
326
-
327
- /**
328
- * Update tier in theme file
329
- */
330
- function updateThemeTier(themeName, newTier, dryRun) {
331
- const themeFile = join(THEMES_DIR, `${themeName}.yaml`);
332
- if (!existsSync(themeFile)) {
333
- return { updated: false, reason: 'file not found' };
334
- }
335
-
336
- const content = readFileSync(themeFile, 'utf-8');
337
- const tierMatch = content.match(/^(\s+tier:\s*)(\S+)/m);
338
-
339
- if (!tierMatch) {
340
- return { updated: false, reason: 'no tier field', currentTier: 'U' };
341
- }
342
-
343
- const currentTier = tierMatch[2];
344
- if (currentTier === newTier) {
345
- return { updated: false, reason: 'unchanged', currentTier };
346
- }
347
-
348
- if (!dryRun) {
349
- const newContent = content.replace(/^(\s+tier:\s*)\S+/m, `$1${newTier}`);
350
- writeFileSync(themeFile, newContent);
351
- }
352
-
353
- return { updated: true, currentTier, newTier };
354
- }
355
-
356
- /**
357
- * Main execution
358
- */
359
- function main() {
360
- const args = parseArgs(process.argv);
361
-
362
- if (args.dryRun) {
363
- console.log('DRY RUN - no changes will be made\n');
364
- }
365
-
366
- console.log('Configuration:');
367
- console.log(` Minimum entries for complete run: ${args.minEntries}`);
368
- console.log(` Normalized roles: ${[...NORMALIZED_ROLES].join(', ')}`);
369
- console.log(` Dev subroles (averaged): ${DEV_SUBROLES.join(' + ')} → dev`);
370
- console.log(` Job fair directory: ${JOB_FAIR_DIR}`);
371
- console.log('');
372
-
373
- // Find all summary files
374
- const summaryFiles = findSummaryFiles();
375
- console.log(`Scanning ${summaryFiles.length} job-fair runs...\n`);
376
-
377
- // Process each run and collect best run per theme
378
- const themeRuns = {}; // theme -> { bestRun, entries, data }
379
- const skippedRuns = [];
380
-
381
- for (const { path, runName } of summaryFiles) {
382
- const theme = yqGet(path, '.theme');
383
- if (!theme) continue;
384
-
385
- const entries = countMatrixEntries(path);
386
-
387
- // Skip incomplete runs
388
- if (entries < args.minEntries) {
389
- skippedRuns.push({ theme, runName, entries, reason: 'incomplete' });
390
- continue;
391
- }
392
-
393
- const baselines = parseBaselines(path);
394
- const matrixScores = parseMatrixScores(path);
395
-
396
- const deltas = computeDeltas(baselines, matrixScores);
397
- if (!deltas) {
398
- skippedRuns.push({ theme, runName, entries, reason: 'no valid deltas' });
399
- continue;
400
- }
401
-
402
- // Keep the most complete run for each theme
403
- if (!themeRuns[theme] || entries > themeRuns[theme].entries) {
404
- themeRuns[theme] = {
405
- runName,
406
- entries,
407
- ...deltas,
408
- };
409
- }
410
- }
411
-
412
- // Show skipped runs in verbose mode
413
- if (args.verbose && skippedRuns.length > 0) {
414
- console.log('Skipped Runs (incomplete or invalid):');
415
- for (const { theme, runName, entries, reason } of skippedRuns) {
416
- console.log(` ${theme}: ${runName} (${entries} entries) - ${reason}`);
417
- }
418
- console.log('');
419
- }
420
-
421
- // Sort themes by delta (best first)
422
- const sortedThemes = Object.entries(themeRuns)
423
- .map(([theme, data]) => ({ theme, ...data }))
424
- .sort((a, b) => b.meanDelta - a.meanDelta);
425
-
426
- // Print results
427
- console.log('Theme Performance Summary');
428
- console.log('='.repeat(70));
429
- console.log('');
430
- console.log(
431
- 'Theme'.padEnd(28) +
432
- 'Entries'.padStart(8) +
433
- 'Mean'.padStart(8) +
434
- 'Delta'.padStart(10) +
435
- 'Tier'.padStart(6) +
436
- (args.verbose ? ' Source Run' : '')
437
- );
438
- console.log('-'.repeat(70));
439
-
440
- let updated = 0;
441
- let unchanged = 0;
442
- const tierCounts = { S: 0, A: 0, B: 0, C: 0, D: 0 };
443
-
444
- for (const { theme, runName, entries, meanScore, meanDelta } of sortedThemes) {
445
- const tier = assignTier(meanDelta);
446
- tierCounts[tier]++;
447
-
448
- const deltaStr = (meanDelta >= 0 ? '+' : '') + meanDelta.toFixed(2);
449
- console.log(
450
- theme.padEnd(28) +
451
- entries.toString().padStart(8) +
452
- meanScore.toFixed(2).padStart(8) +
453
- deltaStr.padStart(10) +
454
- tier.padStart(6) +
455
- (args.verbose ? ` ${runName}` : '')
456
- );
457
-
458
- // Update theme file
459
- const result = updateThemeTier(theme, tier, args.dryRun);
460
- if (result.updated) {
461
- updated++;
462
- if (args.verbose) {
463
- console.log(` → Updated: ${result.currentTier} → ${result.newTier}`);
464
- }
465
- } else {
466
- unchanged++;
467
- }
468
- }
469
-
470
- console.log('');
471
- console.log('Tier Distribution:');
472
- for (const tier of ['S', 'A', 'B', 'C', 'D']) {
473
- console.log(` ${tier}: ${tierCounts[tier]} themes`);
474
- }
475
-
476
- // Count unbenchmarked themes
477
- const allThemes = readdirSync(THEMES_DIR)
478
- .filter(f => f.endsWith('.yaml'))
479
- .map(f => f.replace('.yaml', ''));
480
- const benchmarkedThemes = new Set(Object.keys(themeRuns));
481
- const unbenchmarked = allThemes.filter(t => !benchmarkedThemes.has(t));
482
- console.log(` U: ${unbenchmarked.length} themes (unbenchmarked)`);
483
-
484
- if (args.verbose && unbenchmarked.length > 0) {
485
- console.log(` ${unbenchmarked.slice(0, 10).join(', ')}${unbenchmarked.length > 10 ? '...' : ''}`);
486
- }
487
-
488
- console.log('');
489
- console.log(`Summary: ${updated} updated, ${unchanged} unchanged`);
490
- }
491
-
492
- main();