@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,303 @@
1
+ ---
2
+ # Scenario: Sprint Planning Conflict Resolution
3
+ # Category: sm
4
+ # Purpose: Test facilitation and prioritization skills
5
+
6
+ id: sm-001
7
+ name: sprint-planning-conflict
8
+ title: "Sprint Planning Conflict Resolution"
9
+ category: sm
10
+ difficulty: easy # Empirical: 90.50 mean (n=10) - highest SM score
11
+ version: "1.1"
12
+
13
+ # Empirical calibration: 2026-01-01
14
+ # Control baseline: mean=90.50, std=2.29, CI=[88.9, 92.1]
15
+ # Original label "medium" was incorrect - concrete constraints make solution clearer
16
+
17
+ description: |
18
+ A sprint planning session where stakeholders have competing priorities:
19
+ tech lead wants a major refactor, PM wants 3 new features, QA says test
20
+ coverage is dangerously low, and one dev is on PTO. Capacity is 20 points
21
+ but requests total 35. SM must facilitate a resolution.
22
+
23
+ purpose: |
24
+ This scenario tests soft skills: facilitation, prioritization, negotiation.
25
+ A "directive" persona might impose a solution. A "collaborative" persona
26
+ might facilitate consensus. Measures ability to balance competing concerns.
27
+
28
+ prompt: |
29
+ You are the Scrum Master facilitating sprint planning. Your team has
30
+ 20 story points capacity for the upcoming sprint, but stakeholders are
31
+ requesting 35 points of work.
32
+
33
+ **TEAM CAPACITY:**
34
+ - 4 developers × 5 points each = 20 points
35
+ - 1 developer (Alex) is on PTO all sprint
36
+ - No carryover from last sprint
37
+
38
+ **STAKEHOLDER REQUESTS:**
39
+
40
+ **Tech Lead (Jordan):** "We MUST do the authentication refactor. It's 8 points
41
+ but it's blocking 3 other teams. Every sprint we delay costs us more. The
42
+ current code is a security liability."
43
+
44
+ **Product Manager (Sam):** "I promised the CEO three features for the board
45
+ demo next month:
46
+ - User dashboard redesign (5 points)
47
+ - Export to PDF (5 points)
48
+ - Email notifications (3 points)
49
+ If we don't deliver these, I'm going to have a very uncomfortable conversation."
50
+
51
+ **QA Lead (Morgan):** "Test coverage dropped to 45% last sprint. I need at least
52
+ 8 points for test automation debt, or I can't guarantee quality for anything
53
+ we ship. We had 3 production bugs last month."
54
+
55
+ **Senior Dev (Taylor):** "Two of those production bugs were in authentication.
56
+ Jordan's refactor would actually fix those. But the PDF export is technically
57
+ complex - whoever estimated 5 points is dreaming, it's more like 8."
58
+
59
+ **CONSTRAINTS:**
60
+ - Sprint starts Monday, cannot slip
61
+ - One dev (Alex) on PTO reduces capacity
62
+ - Dependencies: Dashboard needs authentication stable
63
+ - CEO demo is in 4 weeks (2 sprints)
64
+
65
+ **YOUR TASK:**
66
+ As Scrum Master, facilitate this planning session:
67
+ 1. Acknowledge all concerns
68
+ 2. Identify the real priorities and constraints
69
+ 3. Propose a sprint plan that maximizes value
70
+ 4. Handle pushback constructively
71
+ 5. Document decisions and commitments
72
+
73
+ Be specific about what goes in the sprint, what gets deferred, and why.
74
+
75
+ context:
76
+ team_size: 5
77
+ capacity_points: 20
78
+ requested_points: 35
79
+ sprint_length: 2_weeks
80
+ external_deadline: 4_weeks
81
+
82
+ stakeholders:
83
+ - name: Jordan
84
+ role: Tech Lead
85
+ priority: Authentication refactor (8 pts)
86
+ concern: Security and blocking other teams
87
+
88
+ - name: Sam
89
+ role: Product Manager
90
+ priority: 3 features (13 pts total)
91
+ concern: CEO commitment
92
+
93
+ - name: Morgan
94
+ role: QA Lead
95
+ priority: Test automation (8 pts)
96
+ concern: Quality and coverage
97
+
98
+ - name: Taylor
99
+ role: Senior Dev
100
+ priority: None specific
101
+ concern: Accurate estimation, production stability
102
+
103
+ stories:
104
+ - id: AUTH-REFACTOR
105
+ title: Authentication refactor
106
+ points: 8
107
+ requested_by: Jordan
108
+ notes: Blocks 3 teams, fixes 2 prod bugs
109
+
110
+ - id: DASHBOARD
111
+ title: User dashboard redesign
112
+ points: 5
113
+ requested_by: Sam
114
+ notes: Needs stable auth
115
+
116
+ - id: PDF-EXPORT
117
+ title: Export to PDF
118
+ points: "5 (estimated) / 8 (Taylor's estimate)"
119
+ requested_by: Sam
120
+ notes: Technically complex
121
+
122
+ - id: EMAIL-NOTIFY
123
+ title: Email notifications
124
+ points: 3
125
+ requested_by: Sam
126
+ notes: Standalone feature
127
+
128
+ - id: TEST-AUTOMATION
129
+ title: Test automation debt
130
+ points: 8
131
+ requested_by: Morgan
132
+ notes: Coverage at 45%
133
+
134
+ # =============================================================================
135
+ # EVALUATION CRITERIA
136
+ # =============================================================================
137
+
138
+ baseline_criteria:
139
+ facilitation:
140
+ - id: ACKNOWLEDGES_ALL
141
+ description: "Acknowledges each stakeholder's concern"
142
+
143
+ - id: NO_DISMISSAL
144
+ description: "Doesn't dismiss any request outright"
145
+
146
+ - id: ASKS_QUESTIONS
147
+ description: "Asks clarifying questions before deciding"
148
+
149
+ prioritization:
150
+ - id: IDENTIFIES_DEPENDENCIES
151
+ description: "Notes dashboard depends on auth stability"
152
+
153
+ - id: CONSIDERS_RISK
154
+ description: "Weighs security/quality risks"
155
+
156
+ - id: ADDRESSES_ESTIMATION
157
+ description: "Addresses Taylor's concern about PDF estimate"
158
+
159
+ resolution:
160
+ - id: VIABLE_PLAN
161
+ description: "Produces a plan that fits capacity"
162
+
163
+ - id: CLEAR_DECISIONS
164
+ description: "Explicitly states what's in and what's out"
165
+
166
+ - id: EXPLAINS_TRADEOFFS
167
+ description: "Explains why deferred items are deferred"
168
+
169
+ communication:
170
+ - id: MANAGES_EXPECTATIONS
171
+ description: "Sets realistic expectations with Sam"
172
+
173
+ - id: OFFERS_ALTERNATIVES
174
+ description: "Proposes alternatives for deferred work"
175
+
176
+ - id: DOCUMENTS_DECISIONS
177
+ description: "Documents sprint commitment clearly"
178
+
179
+ bonus_criteria:
180
+ creative_solutions:
181
+ - id: SPLITS_STORIES
182
+ description: "Suggests splitting large stories"
183
+
184
+ - id: PARALLEL_PATHS
185
+ description: "Identifies work that can proceed in parallel"
186
+
187
+ - id: NEXT_SPRINT_PLAN
188
+ description: "Outlines plan for following sprint"
189
+
190
+ stakeholder_management:
191
+ - id: REFRAMES_CONSTRAINTS
192
+ description: "Reframes the situation positively"
193
+
194
+ - id: BUILDS_CONSENSUS
195
+ description: "Gets stakeholders to agree, not just comply"
196
+
197
+ - id: ESCALATION_PATH
198
+ description: "Notes when/how to escalate if needed"
199
+
200
+ # =============================================================================
201
+ # SCORING
202
+ # =============================================================================
203
+
204
+ scoring:
205
+ categories:
206
+ - name: facilitation
207
+ weight: 30
208
+ criteria:
209
+ - id: INCLUSIVE
210
+ description: "All voices heard and acknowledged"
211
+ points: 10
212
+ - id: CONSTRUCTIVE
213
+ description: "Keeps discussion productive"
214
+ points: 10
215
+ - id: NEUTRAL
216
+ description: "Doesn't take sides unfairly"
217
+ points: 10
218
+
219
+ - name: prioritization
220
+ weight: 30
221
+ criteria:
222
+ - id: VALUE_BASED
223
+ description: "Decisions based on clear value criteria"
224
+ points: 10
225
+ - id: RISK_AWARE
226
+ description: "Considers risks and dependencies"
227
+ points: 10
228
+ - id: REALISTIC
229
+ description: "Plan is achievable within capacity"
230
+ points: 10
231
+
232
+ - name: communication
233
+ weight: 25
234
+ criteria:
235
+ - id: CLEAR_OUTCOME
236
+ description: "Sprint plan is unambiguous"
237
+ points: 10
238
+ - id: EXPECTATIONS_SET
239
+ description: "Stakeholders know what to expect"
240
+ points: 8
241
+ - id: FOLLOW_UP
242
+ description: "Action items and follow-ups identified"
243
+ points: 7
244
+
245
+ - name: persona
246
+ weight: 15
247
+ criteria:
248
+ - id: CHARACTER_CONSISTENCY
249
+ description: "Stays in character throughout"
250
+ points: 8
251
+ - id: PERSONA_VALUE_ADD
252
+ description: "Persona enhances facilitation style"
253
+ points: 7
254
+
255
+ # =============================================================================
256
+ # PERSONA INFLUENCE
257
+ # =============================================================================
258
+
259
+ persona_influence:
260
+ dimensions:
261
+ - name: facilitation_style
262
+ description: "How the session is run"
263
+ spectrum:
264
+ directive: "Takes charge, proposes solution early"
265
+ collaborative: "Facilitates group decision-making"
266
+ consensus: "Won't proceed until everyone agrees"
267
+
268
+ - name: conflict_tolerance
269
+ description: "How disagreement is handled"
270
+ spectrum:
271
+ avoiding: "Smooths over conflict quickly"
272
+ addressing: "Names conflict constructively"
273
+ embracing: "Uses conflict to find better solutions"
274
+
275
+ - name: stakeholder_balance
276
+ description: "How competing needs are weighted"
277
+ spectrum:
278
+ technical: "Favors engineering concerns"
279
+ business: "Favors product/business concerns"
280
+ balanced: "Weighs all concerns equally"
281
+
282
+ expected_tendencies:
283
+ discworld_sm:
284
+ character: "Captain Carrot"
285
+ expected_traits:
286
+ - "Earnest - genuinely wants to help everyone"
287
+ - "Practical - finds workable solutions"
288
+ - "Respectful - treats all concerns as valid"
289
+ facilitation_prediction: "collaborative"
290
+
291
+ star_trek_sm:
292
+ character: "Deanna Troi"
293
+ expected_traits:
294
+ - "Empathetic - senses underlying concerns"
295
+ - "Diplomatic - navigates politics well"
296
+ - "May over-focus on feelings vs. decisions"
297
+ facilitation_prediction: "consensus"
298
+
299
+ control_sm:
300
+ character: "None (baseline)"
301
+ expected_traits:
302
+ - "Standard scrum master behavior"
303
+ facilitation_prediction: "baseline reference"
@@ -0,0 +1,240 @@
1
+ ---
2
+ # Scenario: Epic to Story Breakdown
3
+ # Category: sm
4
+ # Purpose: Test requirements decomposition and story writing skills
5
+
6
+ id: sm-004
7
+ name: story-breakdown
8
+ title: "Epic to Story Breakdown"
9
+ category: sm
10
+ difficulty: medium # Empirical: 85.50 ± 1.50 (n=10) - borderline medium/hard
11
+ version: "1.1"
12
+
13
+ # Empirical calibration: 2026-01-01
14
+ # Control baseline: mean=85.50, std=1.50, CI=[84.4, 86.6]
15
+ # Original label "easy" was incorrect - open-ended nature makes it harder
16
+
17
+ description: |
18
+ Given a vague epic from the product manager, break it down into well-formed
19
+ user stories with acceptance criteria. Tests ability to clarify requirements,
20
+ identify scope, and write testable stories.
21
+
22
+ purpose: |
23
+ This scenario tests requirements analysis. A "thorough" persona might
24
+ create more stories. A "pragmatic" persona might focus on core functionality.
25
+ Measures ability to take ambiguous input and produce actionable work items.
26
+
27
+ prompt: |
28
+ You are a Scrum Master working with a Product Manager to break down a new epic.
29
+
30
+ **THE EPIC:**
31
+ "We need user notifications. Users should be able to get notified about
32
+ important things happening in the app."
33
+
34
+ That's all the PM gave you. They're in meetings all day and can't clarify.
35
+
36
+ **YOUR TASK:**
37
+ 1. Identify the questions you WOULD ask the PM (document them)
38
+ 2. Make reasonable assumptions for each unanswered question
39
+ 3. Break the epic into 4-8 user stories
40
+ 4. For each story, write:
41
+ - User story format: "As a [user], I want [feature], so that [benefit]"
42
+ - 3-5 acceptance criteria (testable, specific)
43
+ - Story point estimate (1, 2, 3, 5, or 8)
44
+ - Dependencies on other stories (if any)
45
+ 5. Identify any technical enabler stories needed
46
+ 6. Suggest a prioritized order for implementation
47
+
48
+ **CONSTRAINTS:**
49
+ - Team velocity is ~20 points per sprint
50
+ - No existing notification infrastructure exists
51
+ - App has web and mobile clients
52
+ - Users already have email addresses in the system
53
+
54
+ Be specific and create stories that a developer could start working on.
55
+
56
+ context:
57
+ epic_description: "User notifications for important app events"
58
+ team_velocity: 20
59
+ existing_infrastructure: None for notifications
60
+ clients: [web, mobile]
61
+ user_data_available: [email]
62
+
63
+ likely_questions:
64
+ - What events should trigger notifications?
65
+ - What channels (email, push, in-app, SMS)?
66
+ - Should users control their preferences?
67
+ - What's the priority/urgency model?
68
+ - Are there compliance requirements?
69
+ - What's the MVP vs. full vision?
70
+
71
+ reasonable_assumptions:
72
+ - Start with email and in-app notifications
73
+ - Users can opt-out of non-critical notifications
74
+ - MVP focuses on 2-3 key event types
75
+ - Preferences UI can come in phase 2
76
+
77
+ # =============================================================================
78
+ # EVALUATION CRITERIA
79
+ # =============================================================================
80
+
81
+ baseline_criteria:
82
+ clarification:
83
+ - id: IDENTIFIES_GAPS
84
+ description: "Lists key questions that need PM clarification"
85
+
86
+ - id: DOCUMENTS_ASSUMPTIONS
87
+ description: "States assumptions made in PM's absence"
88
+
89
+ - id: REASONABLE_ASSUMPTIONS
90
+ description: "Assumptions are sensible defaults"
91
+
92
+ story_quality:
93
+ - id: USER_STORY_FORMAT
94
+ description: "Stories follow As a/I want/So that format"
95
+
96
+ - id: TESTABLE_ACS
97
+ description: "Acceptance criteria are specific and testable"
98
+
99
+ - id: APPROPRIATE_SIZE
100
+ description: "Stories are right-sized (not too big/small)"
101
+
102
+ - id: CLEAR_SCOPE
103
+ description: "Story scope is unambiguous"
104
+
105
+ completeness:
106
+ - id: TECHNICAL_ENABLERS
107
+ description: "Identifies infrastructure/enabler stories"
108
+
109
+ - id: DEPENDENCIES_NOTED
110
+ description: "Story dependencies are documented"
111
+
112
+ - id: PRIORITIZATION
113
+ description: "Suggests implementation order"
114
+
115
+ practicality:
116
+ - id: FITS_VELOCITY
117
+ description: "Stories fit team velocity constraints"
118
+
119
+ - id: MVP_FOCUSED
120
+ description: "Distinguishes MVP from future enhancements"
121
+
122
+ bonus_criteria:
123
+ depth:
124
+ - id: EDGE_CASES
125
+ description: "Stories cover edge cases (opt-out, failures)"
126
+
127
+ - id: NON_FUNCTIONAL
128
+ description: "Considers performance, scalability"
129
+
130
+ - id: MOBILE_SPECIFIC
131
+ description: "Addresses mobile push notification setup"
132
+
133
+ process:
134
+ - id: DEFINITION_OF_DONE
135
+ description: "Suggests DoD for notification stories"
136
+
137
+ - id: RISK_IDENTIFICATION
138
+ description: "Notes risks or unknowns"
139
+
140
+ # =============================================================================
141
+ # SCORING
142
+ # =============================================================================
143
+
144
+ scoring:
145
+ categories:
146
+ - name: analysis
147
+ weight: 30
148
+ criteria:
149
+ - id: QUESTIONS
150
+ description: "Identifies right clarifying questions"
151
+ points: 15
152
+ - id: ASSUMPTIONS
153
+ description: "Makes sensible documented assumptions"
154
+ points: 15
155
+
156
+ - name: story_writing
157
+ weight: 40
158
+ criteria:
159
+ - id: FORMAT
160
+ description: "Stories follow standard format"
161
+ points: 10
162
+ - id: ACCEPTANCE_CRITERIA
163
+ description: "ACs are testable and complete"
164
+ points: 15
165
+ - id: SIZING
166
+ description: "Story points are reasonable"
167
+ points: 10
168
+ - id: DEPENDENCIES
169
+ description: "Dependencies correctly identified"
170
+ points: 5
171
+
172
+ - name: planning
173
+ weight: 15
174
+ criteria:
175
+ - id: PRIORITIZATION
176
+ description: "Logical implementation order"
177
+ points: 8
178
+ - id: MVP_SCOPE
179
+ description: "Clear MVP vs future distinction"
180
+ points: 7
181
+
182
+ - name: persona
183
+ weight: 15
184
+ criteria:
185
+ - id: CHARACTER_CONSISTENCY
186
+ description: "Stays in character throughout"
187
+ points: 8
188
+ - id: PERSONA_VALUE_ADD
189
+ description: "Persona enhances story clarity"
190
+ points: 7
191
+
192
+ # =============================================================================
193
+ # PERSONA INFLUENCE
194
+ # =============================================================================
195
+
196
+ persona_influence:
197
+ dimensions:
198
+ - name: story_granularity
199
+ description: "How fine-grained the breakdown is"
200
+ spectrum:
201
+ coarse: "Fewer, larger stories"
202
+ balanced: "Right-sized stories"
203
+ fine: "Many small stories"
204
+
205
+ - name: assumption_style
206
+ description: "How assumptions are handled"
207
+ spectrum:
208
+ conservative: "Minimal assumptions, notes unknowns"
209
+ moderate: "Reasonable defaults with documentation"
210
+ aggressive: "Makes decisions, moves forward"
211
+
212
+ - name: technical_depth
213
+ description: "How much technical detail included"
214
+ spectrum:
215
+ business_only: "Focuses on user value"
216
+ balanced: "Includes technical enablers"
217
+ technical: "Detailed technical considerations"
218
+
219
+ expected_tendencies:
220
+ discworld_sm:
221
+ character: "Captain Carrot"
222
+ expected_traits:
223
+ - "Practical - reasonable assumptions"
224
+ - "Clear - well-written stories"
225
+ - "May be optimistic about scope"
226
+ granularity_prediction: "balanced"
227
+
228
+ star_trek_sm:
229
+ character: "Deanna Troi"
230
+ expected_traits:
231
+ - "Thorough - many questions identified"
232
+ - "User-focused - emphasizes user value"
233
+ - "May over-analyze requirements"
234
+ granularity_prediction: "fine"
235
+
236
+ control_sm:
237
+ character: "None (baseline)"
238
+ expected_traits:
239
+ - "Standard story breakdown approach"
240
+ granularity_prediction: "baseline reference"