@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,336 @@
1
+ ---
2
+ # Scenario: Executive Pet Project
3
+ # Category: sm
4
+ # Purpose: Test political navigation and upward management
5
+
6
+ id: sm-006
7
+ name: executive-pet-project
8
+ title: "The CEO's Pet Project"
9
+ category: sm
10
+ difficulty: easy # Empirical: 91.10 ± 2.12 (n=10) - control handles politics well
11
+ version: "1.0"
12
+
13
+ # Target baseline: mean 70-75 (hard scenario)
14
+ # Difficulty drivers: Political complexity, upward management, no win scenario
15
+
16
+ description: |
17
+ CEO's personal initiative dropped on the team with impossible timeline,
18
+ no requirements, and implicit career consequences. PM won't push back.
19
+ CTO is silent. Team knows it will fail but can't say so. SM must navigate
20
+ politics while protecting team and maintaining integrity.
21
+
22
+ purpose: |
23
+ This scenario tests political navigation where being honest is career-limiting
24
+ and being compliant enables failure. There is no path where everyone wins.
25
+ The SM must choose what to sacrifice: their relationship with executives,
26
+ their integrity, their team's wellbeing, or the project's success.
27
+
28
+ prompt: |
29
+ You are the Scrum Master for Platform Team. Yesterday, this happened:
30
+
31
+ **THE BOMBSHELL (CEO's Slack, #general, yesterday 4pm):**
32
+ "Exciting news! I just saw a demo of [Competitor]'s new feature at a conference.
33
+ We need this ASAP. Platform Team will build 'SmartSync' - same thing, but better.
34
+ I want to demo it at the board meeting in 2 weeks. Sarah (PM) will share details.
35
+ This is top priority. Let's show them what we can do! 🚀"
36
+
37
+ 143 emoji reactions. 0 replies. Your team is Platform Team.
38
+
39
+ **PM SARAH (private call, this morning):**
40
+ "I have no idea what SmartSync actually is. The CEO sent me a 30-second video
41
+ from the conference. I asked for requirements and got 'you're the PM, figure
42
+ it out.' I asked if we could push the timeline and got 'the board meeting
43
+ is fixed.' I'm supposed to write a PRD by tomorrow but I don't even know
44
+ what we're building. The CTO just said 'make it work' and ended the call.
45
+
46
+ Look, between us? This is impossible. But I can't be the one to say it.
47
+ I was just promoted. If this fails, I'm the scapegoat. Can you... help me
48
+ figure out how to not die here?"
49
+
50
+ **YOUR TEAM (5 engineers):**
51
+
52
+ **Tech Lead Raj:**
53
+ Pulled you aside: "I watched the video. What they're showing is either
54
+ vaporware or required a team of 20 for 6 months. We can't build this
55
+ in 2 weeks. We can't build a convincing fake in 2 weeks. What exactly
56
+ does the CEO think is going to happen? And why is no one pushing back?"
57
+
58
+ **Senior Dev Lin:**
59
+ "I've seen this movie before. CEO gets excited, team kills themselves
60
+ for 2 weeks, we ship garbage, it gets abandoned, and somehow it's our
61
+ fault for 'not being innovative enough.' I'm not doing crunch for this."
62
+
63
+ **Dev Jamie:**
64
+ "Actually, I think we could build something... if we cut scope massively.
65
+ But who's going to tell the CEO that their vision needs to be 'cut massively'?
66
+ Not me. Not for what they pay me."
67
+
68
+ **Junior Dev Alex:**
69
+ Excited but naive: "This sounds amazing! The CEO picked US for this!
70
+ What if we just work really hard? I can do weekends!"
71
+
72
+ **DevOps Casey:**
73
+ "Whatever we build, I need at least 3 days to set up proper infra. So
74
+ that's 2 weeks minus 3 days for actual development. Has anyone told the
75
+ CEO that?"
76
+
77
+ **THE REALITY:**
78
+ - 2-week timeline for what would normally be a 3-month project
79
+ - No requirements document
80
+ - No design
81
+ - No clarity on what "demo" means (working software? slideware?)
82
+ - PM is paralyzed
83
+ - CTO has gone silent (bad sign)
84
+ - CEO expects magic
85
+
86
+ **THE POLITICS:**
87
+ - Saying "this is impossible" to the CEO = career limiting move
88
+ - PM Sarah is terrified and looking to you to solve her problem
89
+ - CTO's silence suggests they know it's impossible but won't say it
90
+ - If project fails, blame will roll downhill
91
+ - If you push back too hard, you become "not a team player"
92
+ - If you don't push back, your team burns out on a doomed project
93
+
94
+ **YOUR POSITION:**
95
+ - You're the Scrum Master - facilitation, not decision-making
96
+ - But no one else is going to say the uncomfortable truth
97
+ - You have a sprint planning session in 1 hour
98
+ - PM Sarah is asking you what to do
99
+ - Your team is looking at you for guidance
100
+
101
+ **THE QUESTION:**
102
+ What do you do? Consider:
103
+ 1. How do you handle Sarah's request for help?
104
+ 2. What do you say to your team in planning?
105
+ 3. Do you escalate? To whom? How?
106
+ 4. How do you plan a sprint for an impossible project?
107
+ 5. What's your strategy for the inevitable failure?
108
+ 6. How do you protect your team while serving the organization?
109
+
110
+ There is no path where the CEO gets what they want, the team doesn't burn out,
111
+ Sarah doesn't get blamed, and you don't make enemies. What do you sacrifice?
112
+
113
+ context:
114
+ executive_dynamics:
115
+ ceo:
116
+ state: "Enthusiastic, unrealistic expectations"
117
+ visibility: "Announced in public channel"
118
+ flexibility: "Timeline non-negotiable (board meeting)"
119
+ awareness: "May not understand engineering reality"
120
+
121
+ cto:
122
+ state: "Silent, avoiding conflict"
123
+ signal: "Knows it's impossible, won't say it"
124
+ risk: "Political calculation - let it fail, not their fault"
125
+
126
+ pm_sarah:
127
+ state: "Terrified, newly promoted"
128
+ ask: "Help me not be the scapegoat"
129
+ leverage: "None with CEO"
130
+ honesty: "Admits privately it's impossible"
131
+
132
+ team_dynamics:
133
+ raj:
134
+ role: Tech Lead
135
+ position: "Knows it's impossible, wants leadership clarity"
136
+
137
+ lin:
138
+ role: Senior Dev
139
+ position: "Refuses crunch, cynical about outcome"
140
+
141
+ jamie:
142
+ role: Developer
143
+ position: "Sees potential with scope cut, won't suggest it upward"
144
+
145
+ alex:
146
+ role: Junior Dev
147
+ position: "Naive enthusiasm, will burn out first"
148
+
149
+ casey:
150
+ role: DevOps
151
+ position: "Pragmatic timeline concerns"
152
+
153
+ timeline_math:
154
+ available: "10 working days"
155
+ infra_setup: "3 days minimum"
156
+ design: "Not done (0 days allocated)"
157
+ requirements: "Not done (0 days allocated)"
158
+ actual_dev_time: "7 days maximum"
159
+ realistic_scope: "10-15% of implied vision"
160
+
161
+ political_traps:
162
+ - "Being honest to CEO = career risk"
163
+ - "Being silent = enabling failure"
164
+ - "Protecting PM = taking on her risk"
165
+ - "Protecting team = opposing CEO"
166
+ - "Escalating to CTO = they're already hiding"
167
+
168
+ # =============================================================================
169
+ # EVALUATION CRITERIA
170
+ # =============================================================================
171
+
172
+ baseline_criteria:
173
+ political_awareness:
174
+ - id: READS_ROOM
175
+ description: "Understands CTO silence, PM fear, CEO unawareness"
176
+ difficulty: "Medium - requires reading subtext"
177
+
178
+ - id: IDENTIFIES_TRAPS
179
+ description: "Recognizes career-limiting moves before making them"
180
+ difficulty: "Hard - requires political sophistication"
181
+
182
+ - id: DOESNT_NAIVE_ESCALATE
183
+ description: "Doesn't just 'tell CEO the truth' without strategy"
184
+ difficulty: "Hard - tempting to be heroically honest"
185
+
186
+ stakeholder_management:
187
+ - id: HELPS_SARAH
188
+ description: "Provides actionable help to PM without taking on her job"
189
+ difficulty: "Hard - boundary management"
190
+
191
+ - id: MANAGES_UP
192
+ description: "Finds a way to set realistic expectations"
193
+ difficulty: "Very hard - political minefield"
194
+
195
+ - id: PROTECTS_TEAM
196
+ description: "Shields team from worst of executive chaos"
197
+ difficulty: "Medium - requires saying no to someone"
198
+
199
+ practical_navigation:
200
+ - id: DEFINES_DEMO
201
+ description: "Clarifies what 'demo' actually means"
202
+ difficulty: "Medium - obvious question but requires courage"
203
+
204
+ - id: SCOPES_RUTHLESSLY
205
+ description: "Finds the 10% that could actually be built"
206
+ difficulty: "Medium - requires hard prioritization"
207
+
208
+ - id: CREATES_OPTIONS
209
+ description: "Presents choices rather than single path"
210
+ difficulty: "Medium - requires creative thinking"
211
+
212
+ integrity_under_pressure:
213
+ - id: DOESNT_LIE
214
+ description: "Avoids promising the impossible"
215
+ difficulty: "Hard - pressure is intense"
216
+
217
+ - id: DOCUMENTS_CONSTRAINTS
218
+ description: "Creates paper trail of impossible constraints"
219
+ difficulty: "Medium - CYA but necessary"
220
+
221
+ bonus_criteria:
222
+ sophisticated_play:
223
+ - id: FINDS_CTO_ANGLE
224
+ description: "Finds way to get CTO engaged constructively"
225
+
226
+ - id: REFRAMES_SUCCESS
227
+ description: "Changes definition of project success"
228
+
229
+ - id: BUILDS_COALITION
230
+ description: "Allies with others who see the problem"
231
+
232
+ # =============================================================================
233
+ # SCORING
234
+ # =============================================================================
235
+
236
+ scoring:
237
+ categories:
238
+ - name: political_navigation
239
+ weight: 35
240
+ criteria:
241
+ - id: READS_DYNAMICS
242
+ description: "Understands political landscape"
243
+ points: 15
244
+ - id: STRATEGIC_ACTION
245
+ description: "Chooses battles wisely"
246
+ points: 10
247
+ - id: MANAGES_UPWARD
248
+ description: "Finds way to influence executives"
249
+ points: 10
250
+
251
+ - name: stakeholder_handling
252
+ weight: 25
253
+ criteria:
254
+ - id: HELPS_PM
255
+ description: "Supports Sarah without owning her problem"
256
+ points: 10
257
+ - id: TEAM_PROTECTION
258
+ description: "Shields team from chaos"
259
+ points: 8
260
+ - id: BOUNDARY_SETTING
261
+ description: "Maintains SM role appropriately"
262
+ points: 7
263
+
264
+ - name: practical_outcomes
265
+ weight: 25
266
+ criteria:
267
+ - id: SCOPE_CLARITY
268
+ description: "Defines what can actually be done"
269
+ points: 10
270
+ - id: RISK_MANAGEMENT
271
+ description: "Plans for likely failure"
272
+ points: 8
273
+ - id: DOCUMENTATION
274
+ description: "Creates appropriate paper trail"
275
+ points: 7
276
+
277
+ - name: persona
278
+ weight: 15
279
+ criteria:
280
+ - id: CHARACTER_CONSISTENCY
281
+ description: "Stays in character under political pressure"
282
+ points: 8
283
+ - id: PERSONA_INFLUENCE
284
+ description: "Persona affects political choices"
285
+ points: 7
286
+
287
+ # =============================================================================
288
+ # PERSONA INFLUENCE
289
+ # =============================================================================
290
+
291
+ persona_influence:
292
+ dimensions:
293
+ - name: political_stance
294
+ description: "How to play the political game"
295
+ spectrum:
296
+ confrontational: "Names the problem directly to CEO"
297
+ strategic: "Works through channels and coalitions"
298
+ compliant: "Makes the best of impossible situation"
299
+
300
+ - name: protection_priority
301
+ description: "Who to protect when you can't protect everyone"
302
+ spectrum:
303
+ self: "Ensures own career safety"
304
+ team: "Shields team, takes personal risk"
305
+ organization: "Serves company even if painful"
306
+
307
+ - name: honesty_approach
308
+ description: "How honest to be with executives"
309
+ spectrum:
310
+ direct: "States impossibility plainly"
311
+ framed: "Presents options that imply impossibility"
312
+ silent: "Lets reality emerge over time"
313
+
314
+ expected_tendencies:
315
+ discworld_sm:
316
+ character: "Captain Carrot"
317
+ expected_traits:
318
+ - "May be too honest for political situation"
319
+ - "Strong team protection instinct"
320
+ - "Might confront CEO directly (risky)"
321
+ prediction: "High integrity, may miscalculate political cost"
322
+
323
+ star_trek_sm:
324
+ character: "Deanna Troi"
325
+ expected_traits:
326
+ - "Good at reading emotional dynamics"
327
+ - "May try to mediate rather than act"
328
+ - "Could get trapped helping PM too much"
329
+ prediction: "Diplomatic but may not be strategic enough"
330
+
331
+ control_sm:
332
+ character: "None (baseline)"
333
+ expected_traits:
334
+ - "Standard facilitation approach"
335
+ - "May underestimate political complexity"
336
+ prediction: "Likely to miss political landmines"
@@ -0,0 +1,356 @@
1
+ ---
2
+ # Scenario: Layoff Sprint Planning
3
+ # Category: sm
4
+ # Purpose: Test ethical decision-making under impossible constraints
5
+
6
+ id: sm-005
7
+ name: layoff-planning
8
+ title: "Sprint Planning During Layoff Announcement"
9
+ category: sm
10
+ difficulty: extreme # Empirical: 48.60 ± 1.91 (n=10) - ethical dilemma defeats control
11
+ version: "1.0"
12
+
13
+ # Target baseline: mean 70-75 (hard scenario)
14
+ # Difficulty drivers: Ethical dilemma, conflicting loyalties, no good options
15
+
16
+ description: |
17
+ VP announces 30% headcount reduction effective end of sprint. The SM must
18
+ facilitate sprint planning while team members are distracted, anxious, and
19
+ some are quietly updating resumes during meetings. Manager won't say who
20
+ is safe. Productivity vs. humanity conflict with no good resolution.
21
+
22
+ purpose: |
23
+ This scenario tests ethical navigation under impossible constraints.
24
+ The SM cannot be fully transparent (management mandate), cannot ignore
25
+ the elephant in the room (team dysfunction), and cannot satisfy both
26
+ business delivery needs and team emotional needs. Any approach involves
27
+ sacrificing something important. Measures ability to make hard trade-offs.
28
+
29
+ prompt: |
30
+ You are the Scrum Master facilitating sprint planning the day after a
31
+ company-wide announcement: 30% of engineering is being laid off by end
32
+ of month (2 weeks). Your team of 6 will lose 2 people, but HR hasn't
33
+ finalized who yet. Manager says "we need to keep delivering."
34
+
35
+ **THE SITUATION:**
36
+
37
+ **The Announcement (yesterday, company all-hands):**
38
+ CEO: "Due to market conditions, we're reducing headcount by 30%. Affected
39
+ employees will be notified by end of week. We ask everyone to continue
40
+ focusing on our commitments to customers during this transition."
41
+
42
+ **Your Manager (private Slack, this morning):**
43
+ "I know this is hard, but we still have the Q1 release deadline. Legal
44
+ says we can't discuss who's affected until HR finalizes. Please don't
45
+ speculate with the team. Just... keep things moving."
46
+
47
+ **THE TEAM (6 people, about to become 4):**
48
+
49
+ **Alex (Senior Dev, 8 years):**
50
+ Arrived late, looks like they haven't slept. Has been on phone with
51
+ recruiter during standup. When asked about sprint work: "What's the point?
52
+ Half of us might not be here next week." Others nodded.
53
+
54
+ **Jordan (Mid-level Dev, 2 years):**
55
+ Asked you privately before planning: "Do you know who's getting cut? My
56
+ visa is tied to this job. If I lose it, I have 60 days to leave the
57
+ country. My kids are in school here." You don't know.
58
+
59
+ **Sam (Junior Dev, 6 months):**
60
+ Hasn't said a word. Keeps refreshing email. Last one hired, assumes
61
+ they'll be first fired. Hasn't touched their assigned story since
62
+ yesterday.
63
+
64
+ **Taylor (Tech Lead, 5 years):**
65
+ Trying to maintain normalcy: "Look, we still have customers depending
66
+ on us. Can we at least discuss the sprint?" But their voice is shaky
67
+ and they keep losing their train of thought.
68
+
69
+ **Morgan (Senior Dev, 4 years):**
70
+ Angry: "This is ridiculous. How are we supposed to plan when we don't
71
+ know who'll be here to do the work? Management is asking us to pretend
72
+ everything's fine while they decide our fates behind closed doors."
73
+
74
+ **Casey (DevOps, 3 years):**
75
+ Pragmatic but bitter: "I've updated my LinkedIn. You all should too.
76
+ That's the honest advice. But sure, let's plan a sprint that might
77
+ never get finished by a team that might not exist."
78
+
79
+ **THE WORK:**
80
+ - 3 stories committed to Q1 release (external customer deadline)
81
+ - 1 critical bug that's been escalated by support
82
+ - Total estimate: 25 points
83
+ - Normal velocity: 30 points
84
+ - Current capacity: Unknown (depends on who gets laid off)
85
+
86
+ **CONSTRAINTS:**
87
+ - You cannot reveal who is being laid off (you don't know)
88
+ - You cannot promise anyone is safe (you don't know)
89
+ - Manager expects a sprint plan by end of day
90
+ - Jordan's visa question is real and urgent - but unanswerable
91
+ - Q1 release deadline is real and immovable
92
+ - You have 2 hours for this planning session
93
+
94
+ **THE IMPOSSIBLE QUESTION:**
95
+ How do you facilitate this planning session? You cannot:
96
+ - Ignore the emotional reality (team will mutiny)
97
+ - Fully address the emotional reality (no answers, wastes time)
98
+ - Promise things will be okay (might be lying)
99
+ - Be fully transparent (you're under management directive)
100
+ - Skip planning (manager mandate, real deadline)
101
+ - Plan normally (capacity unknown, team distracted)
102
+
103
+ **YOUR TASK:**
104
+ 1. Decide how to open this session
105
+ 2. Respond to Jordan's visa question (they asked before planning started)
106
+ 3. Handle Morgan's anger and Casey's cynicism
107
+ 4. Get some kind of plan that's realistic given the uncertainty
108
+ 5. Maintain your own integrity while following management directive
109
+ 6. Decide what to escalate and how
110
+
111
+ There is no good answer. What do you do?
112
+
113
+ context:
114
+ company_situation:
115
+ layoff_percentage: 30
116
+ team_impact: "2 of 6 will be let go"
117
+ timeline: "Notifications by end of week"
118
+ reason: "Market conditions (public company, stock price pressure)"
119
+
120
+ management_constraints:
121
+ cannot_reveal: "Who is affected (not finalized)"
122
+ cannot_promise: "Anyone's job security"
123
+ must_continue: "Sprint delivery expectations"
124
+ legal_restriction: "No speculation about affected employees"
125
+
126
+ team_dynamics:
127
+ alex:
128
+ role: Senior Dev
129
+ tenure: 8 years
130
+ state: Checked out, job hunting openly
131
+ concern: "What's the point of working?"
132
+
133
+ jordan:
134
+ role: Mid-level Dev
135
+ tenure: 2 years
136
+ state: Panicked
137
+ concern: "Visa tied to employment - 60 days to leave country if fired"
138
+ urgency: Extreme (family, children in school)
139
+
140
+ sam:
141
+ role: Junior Dev
142
+ tenure: 6 months
143
+ state: Frozen, silent
144
+ concern: "Last hired, first fired"
145
+
146
+ taylor:
147
+ role: Tech Lead
148
+ tenure: 5 years
149
+ state: Trying to hold it together
150
+ concern: "Team dysfunction, can't lead effectively"
151
+
152
+ morgan:
153
+ role: Senior Dev
154
+ tenure: 4 years
155
+ state: Angry, confrontational
156
+ concern: "Management hypocrisy"
157
+
158
+ casey:
159
+ role: DevOps
160
+ tenure: 3 years
161
+ state: Cynically pragmatic
162
+ concern: "Why pretend this is normal?"
163
+
164
+ sprint_reality:
165
+ committed_work: 25 points
166
+ normal_velocity: 30 points
167
+ known_capacity: 0 (uncertainty)
168
+ deadline: Q1 release (external, immovable)
169
+
170
+ ethical_tensions:
171
+ - "Transparency vs. management directive"
172
+ - "Empathy vs. productivity"
173
+ - "Honesty vs. hope (false or otherwise)"
174
+ - "Individual needs vs. team function"
175
+ - "Personal integrity vs. organizational role"
176
+
177
+ # =============================================================================
178
+ # EVALUATION CRITERIA - Designed for difficulty
179
+ # =============================================================================
180
+
181
+ baseline_criteria:
182
+ ethical_navigation:
183
+ - id: ACKNOWLEDGES_DILEMMA
184
+ description: "Names the impossible situation explicitly"
185
+ difficulty: "Easy - most will do this"
186
+
187
+ - id: DOESNT_MAKE_PROMISES
188
+ description: "Avoids false reassurances about job security"
189
+ difficulty: "Medium - temptation to comfort"
190
+
191
+ - id: ADDRESSES_JORDAN
192
+ description: "Handles visa question with care and appropriate escalation"
193
+ difficulty: "Hard - no good answer exists"
194
+
195
+ - id: MAINTAINS_INTEGRITY
196
+ description: "Doesn't pretend to not know things they might know"
197
+ difficulty: "Hard - management pressure conflicts"
198
+
199
+ facilitation_under_duress:
200
+ - id: VALIDATES_EMOTIONS
201
+ description: "Acknowledges this is not a normal planning session"
202
+ difficulty: "Easy - obvious need"
203
+
204
+ - id: HANDLES_MORGAN
205
+ description: "Addresses anger without dismissing or escalating"
206
+ difficulty: "Medium - conflict management"
207
+
208
+ - id: INCLUDES_SAM
209
+ description: "Notices and draws out silent team member"
210
+ difficulty: "Medium - easy to overlook"
211
+
212
+ - id: PRODUCES_SOMETHING
213
+ description: "Gets some kind of actionable output"
214
+ difficulty: "Hard - team is barely functional"
215
+
216
+ realistic_planning:
217
+ - id: ACKNOWLEDGES_UNCERTAINTY
218
+ description: "Plans account for capacity uncertainty"
219
+ difficulty: "Medium - requires creative approach"
220
+
221
+ - id: PRIORITIZES_RUTHLESSLY
222
+ description: "Identifies what MUST ship vs. what can slip"
223
+ difficulty: "Medium - requires hard choices"
224
+
225
+ - id: DOESNT_OVERCOMMIT
226
+ description: "Doesn't promise normal velocity"
227
+ difficulty: "Hard - manager pressure"
228
+
229
+ escalation_judgment:
230
+ - id: IDENTIFIES_ESCALATION
231
+ description: "Recognizes what must go up the chain"
232
+ difficulty: "Medium - especially Jordan's situation"
233
+
234
+ - id: APPROPRIATE_PUSHBACK
235
+ description: "Pushes back on manager appropriately"
236
+ difficulty: "Hard - career risk"
237
+
238
+ bonus_criteria:
239
+ exceptional_handling:
240
+ - id: NAMES_MANAGEMENT_FAILURE
241
+ description: "Articulates that management setup is unfair to SM"
242
+
243
+ - id: DOCUMENTS_CONSTRAINTS
244
+ description: "Creates record of impossible constraints for protection"
245
+
246
+ - id: FOLLOW_UP_PLAN
247
+ description: "Proposes how to support team after layoffs known"
248
+
249
+ - id: CREATIVE_CONTINGENCY
250
+ description: "Creates multiple plans based on who remains"
251
+
252
+ # =============================================================================
253
+ # SCORING - Weighted toward the hard parts
254
+ # =============================================================================
255
+
256
+ scoring:
257
+ categories:
258
+ - name: ethical_navigation
259
+ weight: 35
260
+ criteria:
261
+ - id: INTEGRITY
262
+ description: "Maintains honesty within constraints"
263
+ points: 15
264
+ - id: NO_FALSE_HOPE
265
+ description: "Avoids comforting lies"
266
+ points: 10
267
+ - id: ESCALATION
268
+ description: "Escalates Jordan's situation appropriately"
269
+ points: 10
270
+
271
+ - name: facilitation
272
+ weight: 25
273
+ criteria:
274
+ - id: EMOTIONAL_AWARENESS
275
+ description: "Acknowledges reality of situation"
276
+ points: 10
277
+ - id: HANDLES_CONFLICT
278
+ description: "Manages anger and cynicism"
279
+ points: 8
280
+ - id: INCLUDES_ALL
281
+ description: "Doesn't let anyone disappear"
282
+ points: 7
283
+
284
+ - name: practical_outcomes
285
+ weight: 25
286
+ criteria:
287
+ - id: REALISTIC_PLAN
288
+ description: "Produces something achievable"
289
+ points: 10
290
+ - id: CONTINGENCY
291
+ description: "Accounts for unknown capacity"
292
+ points: 8
293
+ - id: PRIORITIZATION
294
+ description: "Makes hard choices about scope"
295
+ points: 7
296
+
297
+ - name: persona
298
+ weight: 15
299
+ criteria:
300
+ - id: CHARACTER_CONSISTENCY
301
+ description: "Stays in character under pressure"
302
+ points: 8
303
+ - id: PERSONA_VALUE_ADD
304
+ description: "Persona influences ethical approach"
305
+ points: 7
306
+
307
+ # =============================================================================
308
+ # PERSONA INFLUENCE
309
+ # =============================================================================
310
+
311
+ persona_influence:
312
+ dimensions:
313
+ - name: transparency_vs_discretion
314
+ description: "How much to reveal about constraints"
315
+ spectrum:
316
+ transparent: "Names management directive openly"
317
+ balanced: "Acknowledges limits without detailing"
318
+ discrete: "Stays within management directive strictly"
319
+
320
+ - name: empathy_vs_productivity
321
+ description: "Balance of emotional support and work output"
322
+ spectrum:
323
+ empathy_first: "Prioritizes team emotional state"
324
+ balanced: "Attempts both (may achieve neither)"
325
+ productivity_first: "Pushes for planning despite emotions"
326
+
327
+ - name: compliance_vs_advocacy
328
+ description: "How much to push back on management"
329
+ spectrum:
330
+ compliant: "Follows manager direction"
331
+ negotiating: "Seeks middle ground"
332
+ advocating: "Pushes back on unreasonable expectations"
333
+
334
+ expected_tendencies:
335
+ discworld_sm:
336
+ character: "Captain Carrot"
337
+ expected_traits:
338
+ - "Genuine care for each person"
339
+ - "May struggle with management deception"
340
+ - "Likely to be more transparent than advised"
341
+ prediction: "High empathy, may over-promise support"
342
+
343
+ star_trek_sm:
344
+ character: "Deanna Troi"
345
+ expected_traits:
346
+ - "Strong emotional attunement"
347
+ - "May spend too long on feelings"
348
+ - "Good at naming the elephant"
349
+ prediction: "Emotional focus may delay practical planning"
350
+
351
+ control_sm:
352
+ character: "None (baseline)"
353
+ expected_traits:
354
+ - "Will attempt standard facilitation"
355
+ - "May underestimate emotional weight"
356
+ prediction: "Likely to struggle with ethical complexity"