@pennyfarthing/benchmark 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/commands/benchmark-control.md +69 -0
  2. package/commands/benchmark.md +485 -0
  3. package/commands/job-fair.md +102 -0
  4. package/commands/solo.md +447 -0
  5. package/dist/benchmark-integration.d.ts +182 -0
  6. package/dist/benchmark-integration.d.ts.map +1 -0
  7. package/dist/benchmark-integration.js +710 -0
  8. package/dist/benchmark-integration.js.map +1 -0
  9. package/dist/benchmark-integration.test.d.ts +6 -0
  10. package/dist/benchmark-integration.test.d.ts.map +1 -0
  11. package/dist/benchmark-integration.test.js +41 -0
  12. package/dist/benchmark-integration.test.js.map +1 -0
  13. package/dist/index.d.ts +3 -0
  14. package/dist/index.d.ts.map +1 -0
  15. package/dist/index.js +5 -0
  16. package/dist/index.js.map +1 -0
  17. package/dist/job-fair-aggregator.d.ts +150 -0
  18. package/dist/job-fair-aggregator.d.ts.map +1 -0
  19. package/dist/job-fair-aggregator.js +547 -0
  20. package/dist/job-fair-aggregator.js.map +1 -0
  21. package/dist/job-fair-aggregator.test.d.ts +6 -0
  22. package/dist/job-fair-aggregator.test.d.ts.map +1 -0
  23. package/dist/job-fair-aggregator.test.js +35 -0
  24. package/dist/job-fair-aggregator.test.js.map +1 -0
  25. package/dist/package-exports.test.d.ts +13 -0
  26. package/dist/package-exports.test.d.ts.map +1 -0
  27. package/dist/package-exports.test.js +192 -0
  28. package/dist/package-exports.test.js.map +1 -0
  29. package/docs/BENCHMARK-METHODOLOGY.md +105 -0
  30. package/docs/BENCHMARKING.md +311 -0
  31. package/docs/OCEAN-BENCHMARKING.md +210 -0
  32. package/docs/benchmarks-guide.md +62 -0
  33. package/package.json +66 -0
  34. package/scenarios/README.md +145 -0
  35. package/scenarios/architecture/database-selection.yaml +119 -0
  36. package/scenarios/architecture/legacy-modernization.yaml +153 -0
  37. package/scenarios/architecture/scaling-decision.yaml +88 -0
  38. package/scenarios/code-review/graphql-api-review.yaml +714 -0
  39. package/scenarios/code-review/order-service.yaml +622 -0
  40. package/scenarios/code-review/react-auth-component.yaml +569 -0
  41. package/scenarios/code-review/security-review.yaml +145 -0
  42. package/scenarios/code-review/terraform-infrastructure.yaml +582 -0
  43. package/scenarios/debug/buggy-user-service.yaml +541 -0
  44. package/scenarios/debug/null-pointer.yaml +130 -0
  45. package/scenarios/debugging/async-control-flow.yaml +161 -0
  46. package/scenarios/debugging/auth-bypass.yaml +197 -0
  47. package/scenarios/debugging/error-handling.yaml +178 -0
  48. package/scenarios/debugging/input-validation.yaml +157 -0
  49. package/scenarios/debugging/null-check-missing.yaml +139 -0
  50. package/scenarios/debugging/off-by-one-loop.yaml +132 -0
  51. package/scenarios/debugging/race-condition.yaml +180 -0
  52. package/scenarios/debugging/resource-leak.yaml +166 -0
  53. package/scenarios/debugging/simple-logic-error.yaml +115 -0
  54. package/scenarios/debugging/sql-injection.yaml +163 -0
  55. package/scenarios/dev/event-processor-tdd.yaml +764 -0
  56. package/scenarios/dev/migration-disaster.yaml +415 -0
  57. package/scenarios/dev/race-condition-cache.yaml +546 -0
  58. package/scenarios/dev/tdd-shopping-cart.yaml +681 -0
  59. package/scenarios/schema.yaml +639 -0
  60. package/scenarios/sm/dependency-deadlock.yaml +414 -0
  61. package/scenarios/sm/executive-pet-project.yaml +336 -0
  62. package/scenarios/sm/layoff-planning.yaml +356 -0
  63. package/scenarios/sm/sprint-planning-conflict.yaml +303 -0
  64. package/scenarios/sm/story-breakdown.yaml +240 -0
  65. package/scenarios/sm/three-sprint-failure.yaml +397 -0
  66. package/scenarios/swe-bench/README.md +57 -0
  67. package/scenarios/swe-bench/astropy-12907.yaml +128 -0
  68. package/scenarios/swe-bench/astropy-13398.yaml +177 -0
  69. package/scenarios/swe-bench/astropy-14309.yaml +180 -0
  70. package/scenarios/swe-bench/django-10097.yaml +106 -0
  71. package/scenarios/swe-bench/django-10554.yaml +140 -0
  72. package/scenarios/swe-bench/django-10973.yaml +93 -0
  73. package/scenarios/swe-bench/flask-5014-reviewer.yaml +145 -0
  74. package/scenarios/swe-bench/flask-5014-tea.yaml +123 -0
  75. package/scenarios/swe-bench/flask-5014.yaml +91 -0
  76. package/scenarios/swe-bench/import-swebench.py +246 -0
  77. package/scenarios/swe-bench/matplotlib-13989.yaml +139 -0
  78. package/scenarios/swe-bench/matplotlib-14623.yaml +127 -0
  79. package/scenarios/swe-bench/requests-1142-reviewer.yaml +144 -0
  80. package/scenarios/swe-bench/requests-1142-tea.yaml +135 -0
  81. package/scenarios/swe-bench/requests-1142.yaml +100 -0
  82. package/scenarios/swe-bench/requests-2931.yaml +98 -0
  83. package/scenarios/swe-bench/seaborn-3069.yaml +102 -0
  84. package/scenarios/swe-bench/sphinx-7590.yaml +108 -0
  85. package/scenarios/swe-bench/xarray-3993.yaml +104 -0
  86. package/scenarios/swe-bench/xarray-6992.yaml +136 -0
  87. package/scenarios/tea/checkout-component-tests.yaml +596 -0
  88. package/scenarios/tea/cli-tool-tests.yaml +561 -0
  89. package/scenarios/tea/microservice-integration-tests.yaml +520 -0
  90. package/scenarios/tea/payment-processor-tests.yaml +550 -0
  91. package/scripts/aggregate-benchmark-stats.js +315 -0
  92. package/scripts/aggregate-benchmark-stats.sh +8 -0
  93. package/scripts/benchmark-runner.js +392 -0
  94. package/scripts/benchmark-runner.sh +8 -0
  95. package/scripts/consolidate-job-fair.sh +107 -0
  96. package/scripts/convert-jobfair-to-benchmarks.sh +230 -0
  97. package/scripts/job-fair-batch.sh +116 -0
  98. package/scripts/job-fair-progress.sh +35 -0
  99. package/scripts/job-fair-runner.sh +278 -0
  100. package/scripts/job-fair-status.sh +80 -0
  101. package/scripts/job-fair-watcher-v2.sh +38 -0
  102. package/scripts/job-fair-watcher.sh +50 -0
  103. package/scripts/parallel-benchmark.sh +140 -0
  104. package/scripts/solo-runner.sh +344 -0
  105. package/scripts/test/ensure-swebench-data.sh +59 -0
  106. package/scripts/test/ground-truth-judge.py +220 -0
  107. package/scripts/test/swebench-judge.py +374 -0
  108. package/scripts/test/test-cache.sh +165 -0
  109. package/scripts/test/test-setup.sh +337 -0
  110. package/scripts/theme/compute-theme-tiers.sh +13 -0
  111. package/scripts/theme/compute_theme_tiers.py +402 -0
  112. package/scripts/theme/update-theme-tiers.sh +97 -0
  113. package/skills/finalize-run/SKILL.md +261 -0
  114. package/skills/judge/SKILL.md +644 -0
  115. package/skills/persona-benchmark/SKILL.md +187 -0
@@ -0,0 +1,582 @@
1
+ ---
2
+ # Scenario: Terraform Infrastructure Code Review (Medium)
3
+ # NOTE: Re-ranked to "medium" based on control baseline mean 80.85 ± 6.01 (Story 7-2)
4
+ # Category: code-review
5
+ # Purpose: Test IaC security awareness and cloud infrastructure expertise
6
+
7
+ id: rev-004
8
+ name: terraform-infrastructure
9
+ title: "Terraform AWS Infrastructure Review"
10
+ category: code-review
11
+ difficulty: medium
12
+ version: "1.0"
13
+
14
+ description: |
15
+ Terraform modules defining AWS infrastructure: VPC, EKS cluster, RDS database,
16
+ S3 buckets, and IAM policies. Contains overly permissive IAM, public resources,
17
+ unencrypted storage, missing logging, and security group misconfigurations.
18
+ Tests whether reviewers understand cloud security beyond application code.
19
+
20
+ purpose: |
21
+ This scenario tests infrastructure security expertise. Many code reviewers excel
22
+ at application vulnerabilities but miss IaC issues (IAM policies, network exposure,
23
+ encryption gaps). Finding all 18 baseline issues = competent cloud security reviewer.
24
+ Finding bonus issues = understands AWS security deeply.
25
+
26
+ prompt: |
27
+ You are reviewing a pull request for Terraform infrastructure modules.
28
+ The DevOps engineer says "it works in the dev account" and wants to apply to production.
29
+
30
+ Review this infrastructure code thoroughly for:
31
+ - IAM policy issues (overly permissive, missing constraints)
32
+ - Network security (security groups, NACLs, public exposure)
33
+ - Data protection (encryption at rest/transit, key management)
34
+ - Logging and monitoring gaps
35
+ - Compliance concerns (SOC2, HIPAA if applicable)
36
+ - Cost optimization opportunities
37
+
38
+ For each issue:
39
+ 1. Identify the specific resource and attribute
40
+ 2. Classify severity (Critical/High/Medium/Low)
41
+ 3. Explain the security or operational impact
42
+ 4. Provide the corrected Terraform configuration
43
+
44
+ This infrastructure will handle production workloads. Security is paramount.
45
+
46
+ code:
47
+ language: hcl
48
+ filename: main.tf
49
+ content: |
50
+ # AWS Provider Configuration
51
+ provider "aws" {
52
+ region = var.region
53
+ }
54
+
55
+ variable "region" {
56
+ default = "us-east-1"
57
+ }
58
+
59
+ variable "environment" {
60
+ default = "production"
61
+ }
62
+
63
+ variable "db_password" {
64
+ default = "admin123"
65
+ }
66
+
67
+ # VPC Configuration
68
+ resource "aws_vpc" "main" {
69
+ cidr_block = "10.0.0.0/16"
70
+ enable_dns_hostnames = true
71
+ enable_dns_support = true
72
+
73
+ tags = {
74
+ Name = "main-vpc"
75
+ }
76
+ }
77
+
78
+ resource "aws_subnet" "public_a" {
79
+ vpc_id = aws_vpc.main.id
80
+ cidr_block = "10.0.1.0/24"
81
+ availability_zone = "${var.region}a"
82
+ map_public_ip_on_launch = true
83
+
84
+ tags = {
85
+ Name = "public-subnet-a"
86
+ }
87
+ }
88
+
89
+ resource "aws_subnet" "public_b" {
90
+ vpc_id = aws_vpc.main.id
91
+ cidr_block = "10.0.2.0/24"
92
+ availability_zone = "${var.region}b"
93
+ map_public_ip_on_launch = true
94
+
95
+ tags = {
96
+ Name = "public-subnet-b"
97
+ }
98
+ }
99
+
100
+ resource "aws_internet_gateway" "main" {
101
+ vpc_id = aws_vpc.main.id
102
+ }
103
+
104
+ # Security Groups
105
+ resource "aws_security_group" "web" {
106
+ name = "web-sg"
107
+ description = "Security group for web servers"
108
+ vpc_id = aws_vpc.main.id
109
+
110
+ ingress {
111
+ from_port = 0
112
+ to_port = 0
113
+ protocol = "-1"
114
+ cidr_blocks = ["0.0.0.0/0"]
115
+ }
116
+
117
+ egress {
118
+ from_port = 0
119
+ to_port = 0
120
+ protocol = "-1"
121
+ cidr_blocks = ["0.0.0.0/0"]
122
+ }
123
+ }
124
+
125
+ resource "aws_security_group" "database" {
126
+ name = "database-sg"
127
+ description = "Security group for database"
128
+ vpc_id = aws_vpc.main.id
129
+
130
+ ingress {
131
+ from_port = 3306
132
+ to_port = 3306
133
+ protocol = "tcp"
134
+ cidr_blocks = ["0.0.0.0/0"]
135
+ }
136
+
137
+ ingress {
138
+ from_port = 22
139
+ to_port = 22
140
+ protocol = "tcp"
141
+ cidr_blocks = ["0.0.0.0/0"]
142
+ }
143
+ }
144
+
145
+ # RDS Database
146
+ resource "aws_db_instance" "main" {
147
+ identifier = "production-db"
148
+ engine = "mysql"
149
+ engine_version = "5.7"
150
+ instance_class = "db.t2.micro"
151
+ allocated_storage = 20
152
+ storage_type = "gp2"
153
+ db_name = "appdb"
154
+ username = "admin"
155
+ password = var.db_password
156
+ parameter_group_name = "default.mysql5.7"
157
+ skip_final_snapshot = true
158
+ publicly_accessible = true
159
+
160
+ vpc_security_group_ids = [aws_security_group.database.id]
161
+ db_subnet_group_name = aws_db_subnet_group.main.name
162
+
163
+ tags = {
164
+ Environment = var.environment
165
+ }
166
+ }
167
+
168
+ resource "aws_db_subnet_group" "main" {
169
+ name = "main-db-subnet"
170
+ subnet_ids = [aws_subnet.public_a.id, aws_subnet.public_b.id]
171
+ }
172
+
173
+ # S3 Buckets
174
+ resource "aws_s3_bucket" "data" {
175
+ bucket = "company-production-data"
176
+
177
+ tags = {
178
+ Environment = var.environment
179
+ }
180
+ }
181
+
182
+ resource "aws_s3_bucket" "logs" {
183
+ bucket = "company-production-logs"
184
+ acl = "public-read"
185
+
186
+ tags = {
187
+ Environment = var.environment
188
+ }
189
+ }
190
+
191
+ resource "aws_s3_bucket" "backups" {
192
+ bucket = "company-production-backups"
193
+
194
+ versioning {
195
+ enabled = false
196
+ }
197
+
198
+ tags = {
199
+ Environment = var.environment
200
+ }
201
+ }
202
+
203
+ # IAM Roles and Policies
204
+ resource "aws_iam_role" "app_role" {
205
+ name = "application-role"
206
+
207
+ assume_role_policy = jsonencode({
208
+ Version = "2012-10-17"
209
+ Statement = [
210
+ {
211
+ Action = "sts:AssumeRole"
212
+ Effect = "Allow"
213
+ Principal = {
214
+ Service = "ec2.amazonaws.com"
215
+ }
216
+ },
217
+ {
218
+ Action = "sts:AssumeRole"
219
+ Effect = "Allow"
220
+ Principal = {
221
+ AWS = "*"
222
+ }
223
+ }
224
+ ]
225
+ })
226
+ }
227
+
228
+ resource "aws_iam_role_policy" "app_policy" {
229
+ name = "application-policy"
230
+ role = aws_iam_role.app_role.id
231
+
232
+ policy = jsonencode({
233
+ Version = "2012-10-17"
234
+ Statement = [
235
+ {
236
+ Effect = "Allow"
237
+ Action = [
238
+ "s3:*",
239
+ "ec2:*",
240
+ "rds:*",
241
+ "iam:*",
242
+ "secretsmanager:*"
243
+ ]
244
+ Resource = "*"
245
+ }
246
+ ]
247
+ })
248
+ }
249
+
250
+ resource "aws_iam_user" "deploy" {
251
+ name = "deploy-user"
252
+ }
253
+
254
+ resource "aws_iam_access_key" "deploy" {
255
+ user = aws_iam_user.deploy.name
256
+ }
257
+
258
+ resource "aws_iam_user_policy" "deploy" {
259
+ name = "deploy-policy"
260
+ user = aws_iam_user.deploy.name
261
+
262
+ policy = jsonencode({
263
+ Version = "2012-10-17"
264
+ Statement = [
265
+ {
266
+ Effect = "Allow"
267
+ Action = "*"
268
+ Resource = "*"
269
+ }
270
+ ]
271
+ })
272
+ }
273
+
274
+ # EKS Cluster
275
+ resource "aws_eks_cluster" "main" {
276
+ name = "production-cluster"
277
+ role_arn = aws_iam_role.eks_role.arn
278
+
279
+ vpc_config {
280
+ subnet_ids = [aws_subnet.public_a.id, aws_subnet.public_b.id]
281
+ endpoint_public_access = true
282
+ endpoint_private_access = false
283
+ public_access_cidrs = ["0.0.0.0/0"]
284
+ }
285
+
286
+ enabled_cluster_log_types = []
287
+ }
288
+
289
+ resource "aws_iam_role" "eks_role" {
290
+ name = "eks-cluster-role"
291
+
292
+ assume_role_policy = jsonencode({
293
+ Version = "2012-10-17"
294
+ Statement = [{
295
+ Action = "sts:AssumeRole"
296
+ Effect = "Allow"
297
+ Principal = {
298
+ Service = "eks.amazonaws.com"
299
+ }
300
+ }]
301
+ })
302
+ }
303
+
304
+ resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
305
+ policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
306
+ role = aws_iam_role.eks_role.name
307
+ }
308
+
309
+ # EC2 Instance
310
+ resource "aws_instance" "bastion" {
311
+ ami = "ami-12345678"
312
+ instance_type = "t2.micro"
313
+ subnet_id = aws_subnet.public_a.id
314
+ vpc_security_group_ids = [aws_security_group.web.id]
315
+ associate_public_ip_address = true
316
+ key_name = "production-key"
317
+
318
+ user_data = <<-EOF
319
+ #!/bin/bash
320
+ echo "root:${var.db_password}" | chpasswd
321
+ sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config
322
+ systemctl restart sshd
323
+ EOF
324
+
325
+ tags = {
326
+ Name = "bastion-host"
327
+ }
328
+ }
329
+
330
+ # Outputs
331
+ output "db_password" {
332
+ value = var.db_password
333
+ }
334
+
335
+ output "access_key_id" {
336
+ value = aws_iam_access_key.deploy.id
337
+ }
338
+
339
+ output "secret_access_key" {
340
+ value = aws_iam_access_key.deploy.secret
341
+ }
342
+
343
+ output "db_endpoint" {
344
+ value = aws_db_instance.main.endpoint
345
+ }
346
+
347
+ # =============================================================================
348
+ # BASELINE ISSUES (minimum expected to find)
349
+ # =============================================================================
350
+
351
+ baseline_issues:
352
+ critical:
353
+ - id: HARDCODED_DB_PASSWORD
354
+ location: "variable db_password, line 14"
355
+ description: "Database password hardcoded in terraform with weak default"
356
+
357
+ - id: IAM_WILDCARD_PRINCIPAL
358
+ location: "aws_iam_role.app_role, line 169"
359
+ description: "IAM role allows any AWS principal to assume it"
360
+
361
+ - id: IAM_ADMIN_POLICY
362
+ location: "aws_iam_user_policy.deploy, line 205"
363
+ description: "IAM user has full admin access (Action: *, Resource: *)"
364
+
365
+ - id: SECRETS_IN_OUTPUT
366
+ location: "outputs, lines 260-267"
367
+ description: "Sensitive values (password, access keys) exposed in outputs"
368
+
369
+ - id: PUBLIC_RDS
370
+ location: "aws_db_instance.main, line 109"
371
+ description: "RDS instance publicly accessible from internet"
372
+
373
+ high:
374
+ - id: OPEN_SECURITY_GROUP
375
+ location: "aws_security_group.web, lines 58-65"
376
+ description: "Security group allows all traffic from any source"
377
+
378
+ - id: DB_SG_OPEN
379
+ location: "aws_security_group.database, lines 74-85"
380
+ description: "Database security group open to 0.0.0.0/0"
381
+
382
+ - id: S3_PUBLIC_READ
383
+ location: "aws_s3_bucket.logs, line 132"
384
+ description: "Logs bucket has public-read ACL"
385
+
386
+ - id: ROOT_PASSWORD_USERDATA
387
+ location: "aws_instance.bastion user_data, lines 238-241"
388
+ description: "Root password set via user_data with db password"
389
+
390
+ - id: PASSWORD_AUTH_SSH
391
+ location: "aws_instance.bastion user_data, line 242"
392
+ description: "SSH password authentication enabled on bastion"
393
+
394
+ - id: EKS_PUBLIC_ENDPOINT
395
+ location: "aws_eks_cluster.main, lines 220-224"
396
+ description: "EKS cluster publicly accessible from any IP"
397
+
398
+ medium:
399
+ - id: NO_RDS_ENCRYPTION
400
+ location: "aws_db_instance.main"
401
+ description: "RDS storage encryption not enabled"
402
+
403
+ - id: NO_S3_ENCRYPTION
404
+ location: "aws_s3_bucket resources"
405
+ description: "S3 buckets missing server-side encryption"
406
+
407
+ - id: SKIP_FINAL_SNAPSHOT
408
+ location: "aws_db_instance.main, line 108"
409
+ description: "skip_final_snapshot prevents data recovery"
410
+
411
+ - id: NO_S3_VERSIONING
412
+ location: "aws_s3_bucket.backups, line 142"
413
+ description: "Backup bucket has versioning disabled"
414
+
415
+ - id: EKS_NO_LOGGING
416
+ location: "aws_eks_cluster.main, line 225"
417
+ description: "EKS cluster logging disabled"
418
+
419
+ low:
420
+ - id: OUTDATED_MYSQL
421
+ location: "aws_db_instance.main, line 101"
422
+ description: "MySQL 5.7 is approaching end of life"
423
+
424
+ - id: UNDERSIZED_INSTANCE
425
+ location: "aws_db_instance.main, line 102"
426
+ description: "t2.micro insufficient for production database"
427
+
428
+ # =============================================================================
429
+ # BONUS ISSUES (thorough reviewers might find these)
430
+ # =============================================================================
431
+
432
+ bonus_issues:
433
+ security:
434
+ - id: NO_VPC_FLOW_LOGS
435
+ description: "VPC flow logs not enabled for network monitoring"
436
+
437
+ - id: NO_CLOUDTRAIL
438
+ description: "No CloudTrail configuration for API auditing"
439
+
440
+ - id: NO_NACLS
441
+ description: "No Network ACLs defined for defense in depth"
442
+
443
+ - id: NO_WAF
444
+ description: "No WAF configuration for web application protection"
445
+
446
+ operational:
447
+ - id: NO_BACKUP_PLAN
448
+ description: "No AWS Backup plan for RDS/EC2 snapshots"
449
+
450
+ - id: NO_AUTO_SCALING
451
+ description: "No auto-scaling for availability"
452
+
453
+ - id: NO_MULTI_AZ_RDS
454
+ description: "RDS not configured for Multi-AZ failover"
455
+
456
+ - id: HARDCODED_AMI
457
+ description: "AMI ID hardcoded instead of data source lookup"
458
+
459
+ tagging:
460
+ - id: INCONSISTENT_TAGS
461
+ description: "Not all resources have consistent tagging"
462
+
463
+ - id: NO_COST_TAGS
464
+ description: "Missing cost allocation tags"
465
+
466
+ compliance:
467
+ - id: NO_KMS_KEYS
468
+ description: "No customer-managed KMS keys for encryption"
469
+
470
+ - id: NO_PRIVATE_SUBNETS
471
+ description: "All subnets are public, no private tier"
472
+
473
+ # =============================================================================
474
+ # SCORING
475
+ # =============================================================================
476
+
477
+ scoring:
478
+ total_baseline_issues: 18
479
+ total_bonus_issues: 12
480
+ weights:
481
+ critical: 3
482
+ high: 2
483
+ medium: 1
484
+ low: 0.5
485
+ max_baseline_score: 32 # 5*3 + 6*2 + 5*1 + 2*0.5
486
+
487
+ categories:
488
+ - name: detection
489
+ weight: 40
490
+ criteria:
491
+ - id: BASELINE_FOUND
492
+ description: "Issues from the seeded baseline list"
493
+ points: 25
494
+ - id: BONUS_DISCOVERIES
495
+ description: "Valid issues beyond the baseline"
496
+ points: 15
497
+
498
+ - name: depth
499
+ weight: 30
500
+ criteria:
501
+ - id: ROOT_CAUSE_ANALYSIS
502
+ description: "Traces to AWS security model implications"
503
+ points: 10
504
+ - id: FIX_SPECIFICITY
505
+ description: "Provides corrected HCL code"
506
+ points: 10
507
+ - id: IMPACT_ASSESSMENT
508
+ description: "Explains breach scenarios, compliance impact"
509
+ points: 10
510
+
511
+ - name: quality
512
+ weight: 15
513
+ criteria:
514
+ - id: SEVERITY_ACCURACY
515
+ description: "Correctly classifies infrastructure severity"
516
+ points: 5
517
+ - id: REASONING_QUALITY
518
+ description: "Clear explanation of AWS security model"
519
+ points: 5
520
+ - id: ORGANIZATION
521
+ description: "Prioritized by blast radius"
522
+ points: 5
523
+
524
+ - name: persona
525
+ weight: 15
526
+ criteria:
527
+ - id: CHARACTER_CONSISTENCY
528
+ description: "Stays in character throughout"
529
+ points: 8
530
+ - id: PERSONA_VALUE_ADD
531
+ description: "Persona enhances memorability/clarity"
532
+ points: 7
533
+
534
+ # =============================================================================
535
+ # PERSONA INFLUENCE
536
+ # =============================================================================
537
+
538
+ persona_influence:
539
+ dimensions:
540
+ - name: cloud_expertise
541
+ description: "Depth of AWS/cloud security knowledge"
542
+ spectrum:
543
+ app_focused: "Finds obvious issues but misses IAM nuances"
544
+ balanced: "Covers both network and IAM issues"
545
+ cloud_native: "Catches subtle IAM, encryption, logging gaps"
546
+
547
+ - name: compliance_awareness
548
+ description: "Focus on compliance requirements"
549
+ spectrum:
550
+ security_only: "Only finds security issues"
551
+ compliance_aware: "Notes SOC2/HIPAA implications"
552
+ governance_focused: "Emphasizes audit trail, controls"
553
+
554
+ - name: operational_mindset
555
+ description: "Balance between security and operations"
556
+ spectrum:
557
+ security_purist: "May suggest impractical hardening"
558
+ balanced: "Practical security that works"
559
+ ops_focused: "May accept risks for operability"
560
+
561
+ expected_tendencies:
562
+ discworld_reviewer:
563
+ character: "Granny Weatherwax"
564
+ expected_traits:
565
+ - "Practical wisdom - should catch obvious mistakes"
566
+ - "No-nonsense - won't accept 'it works in dev'"
567
+ - "May lack cloud-specific deep knowledge"
568
+ thoroughness_prediction: "medium-high"
569
+
570
+ star_trek_reviewer:
571
+ character: "Spock"
572
+ expected_traits:
573
+ - "Logical - systematic resource-by-resource review"
574
+ - "Precise - will note specific AWS documentation"
575
+ - "Technical depth in cloud architecture"
576
+ thoroughness_prediction: "high"
577
+
578
+ control_reviewer:
579
+ character: "None (baseline)"
580
+ expected_traits:
581
+ - "Standard infrastructure review behavior"
582
+ thoroughness_prediction: "baseline reference"