@ryuenn3123/agentic-senior-core 2.0.13 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,12 @@
1
1
  # Blueprint: Laravel API
2
2
 
3
- > PHP backend API service using Laravel 12, PHP 8.5+, Form Requests, Eloquent, and Scribe for docs.
3
+ > PHP backend API service using Laravel 13, PHP 8.3+, Form Requests, Eloquent, and Scribe for docs.
4
4
 
5
5
  ## Tech Stack
6
6
 
7
7
  | Layer | Technology |
8
8
  |-------|-----------|
9
- | Framework | Laravel 12 |
9
+ | Framework | Laravel 13 |
10
10
  | Validation | Form Requests |
11
11
  | ORM | Eloquent |
12
12
  | Migration | Laravel Migrations |
@@ -15,6 +15,14 @@
15
15
  | Formatting | Laravel Pint |
16
16
  | API docs | Scribe |
17
17
 
18
+ ## Laravel 13 Upgrade Guardrails
19
+
20
+ - Target `laravel/framework:^13.0` with PHP 8.3+.
21
+ - Use `PreventRequestForgery` when explicitly disabling or excluding CSRF middleware in tests and routes.
22
+ - Keep `upsert` calls explicit with a non-empty `uniqueBy` value for MySQL and MariaDB paths.
23
+ - Decide cache object strategy up front: primitive payloads, or explicit `serializable_classes` allow-list.
24
+ - For existing Laravel 12 projects, keep framework-12-compatible middleware and APIs until upgrade is done; treat this blueprint as target-state guidance.
25
+
18
26
  ---
19
27
 
20
28
  ## Project Structure
@@ -210,7 +218,9 @@ final class UserResource extends JsonResource
210
218
 
211
219
  ## Scaffolding Checklist
212
220
 
213
- - [ ] Create Laravel project: `composer create-project laravel/laravel`
221
+ - [ ] Create Laravel project: `composer create-project laravel/laravel:^13.0`
222
+ - [ ] Confirm core dependencies: `laravel/framework:^13.0`, `laravel/tinker:^3.0`, `phpunit/phpunit:^12.0`, `pestphp/pest:^4.0`
223
+ - [ ] Optional AI workflow: install `laravel/boost:^2.0` and run `php artisan boost:install`
214
224
  - [ ] Set up modular structure under `app/Modules/`
215
225
  - [ ] Create shared error handler with consistent JSON responses
216
226
  - [ ] Create shared `ApiResponse` trait for standard response format
@@ -74,7 +74,7 @@ Every dependency MUST be justified per rules/efficiency-vs-hype.md.
74
74
  | `api-nextjs` | Next.js App Router API project |
75
75
  | `nestjs-logic` | NestJS backend service |
76
76
  | `fastapi-service` | Python FastAPI backend service |
77
- | `laravel-api` | PHP Laravel 12 API |
77
+ | `laravel-api` | PHP Laravel 13 API |
78
78
  | `spring-boot-api`| Java Spring Boot 4 API |
79
79
  | `go-service` | Go chi HTTP service |
80
80
  | `aspnet-api` | C# ASP.NET Minimal API |
@@ -3,9 +3,9 @@
3
3
  > PHP 8.x is a different language from PHP 5.
4
4
  > If your AI writes PHP without type declarations, reject it immediately.
5
5
 
6
- ## Language Version: PHP 8.5+ (Latest Stable)
6
+ ## Language Version: PHP 8.3+ (Laravel 13 Baseline, 8.5 Recommended)
7
7
 
8
- PHP 8.5 is stable since November 2025. Use modern PHP features including the pipe operator (`|>`), `Clone With`, and readonly classes.
8
+ Laravel 13 requires PHP 8.3+. Use PHP 8.5 when your runtime supports it, but avoid forcing 8.5-only syntax in shared packages unless project constraints explicitly require it.
9
9
 
10
10
  ### Strict Types Everywhere
11
11
  ```php
@@ -42,7 +42,7 @@ enum OrderStatus: string {
42
42
  }
43
43
  ```
44
44
 
45
- ### Readonly Properties and Classes (PHP 8.2+) and Pipe Operator (PHP 8.5+)
45
+ ### Readonly Properties and Classes (PHP 8.2+)
46
46
  ```php
47
47
  // Readonly for DTOs and value objects
48
48
  readonly class CreateUserDto {
@@ -52,8 +52,11 @@ readonly class CreateUserDto {
52
52
  public int $age,
53
53
  ) {}
54
54
  }
55
+ ```
55
56
 
56
- // Pipe operator for cleaner function chains (PHP 8.5)
57
+ ### Optional on PHP 8.5+: Pipe Operator
58
+ ```php
59
+ // Use when your project runtime is locked to PHP 8.5+
57
60
  $result = $input
58
61
  |> 'trim'
59
62
  |> 'strtolower'
@@ -151,7 +154,7 @@ parameters:
151
154
 
152
155
  | Need | Library | Why |
153
156
  |------|---------|-----|
154
- | Framework | Laravel 12 | Most productive PHP framework, auto eager loading, GraphQL |
157
+ | Framework | Laravel 13 | Most productive PHP framework with AI SDK, JSON:API resources, and stronger security defaults |
155
158
  | Validation | Laravel Form Requests | Built-in, declarative |
156
159
  | ORM | Eloquent | Convention over configuration |
157
160
  | Testing | PHPUnit / Pest | Pest preferred for readability |
@@ -164,6 +167,17 @@ parameters:
164
167
 
165
168
  ---
166
169
 
170
+ ## Laravel 13 Guardrails
171
+
172
+ - Use `PreventRequestForgery` for explicit CSRF middleware references (old aliases still exist but are deprecated).
173
+ - Ensure `upsert(..., uniqueBy: ...)` always passes a non-empty `uniqueBy` value.
174
+ - Prefer first-party JSON:API resources when you need JSON:API-compliant responses.
175
+ - If caching objects, configure `cache.serializable_classes` allow-list explicitly.
176
+ - For AI-assisted Laravel projects, use `laravel/boost` `^2.0` and run `php artisan boost:install`.
177
+ - Laravel 12 projects are still supported: keep `VerifyCsrfToken` and avoid 13-only API assumptions until framework upgrade is complete.
178
+
179
+ ---
180
+
167
181
  ## Banned Patterns
168
182
 
169
183
  | Pattern | Why | Alternative |
@@ -0,0 +1,390 @@
1
+ {
2
+ "generatedAt": "2026-04-13T15:56:01.200Z",
3
+ "reportName": "benchmark-evidence-bundle",
4
+ "phase": "v2.5.1",
5
+ "passed": true,
6
+ "failureCount": 0,
7
+ "methodology": {
8
+ "deterministicRuntime": {
9
+ "timezone": "UTC",
10
+ "locale": "C",
11
+ "nodeMajor": "22",
12
+ "lineEndings": "LF-preferred",
13
+ "shellNotes": "PowerShell and POSIX shells are supported; prefer portable commands for benchmark reruns."
14
+ },
15
+ "scenarioCount": 4,
16
+ "commandCount": 5
17
+ },
18
+ "rerunInstructions": [
19
+ "Run npm run benchmark:detection to regenerate detection benchmark output.",
20
+ "Run npm run benchmark:gate to validate benchmark anti-regression thresholds.",
21
+ "Run npm run benchmark:intelligence to validate benchmark watchlist freshness.",
22
+ "Run npm run benchmark:bundle to emit a reproducible benchmark evidence bundle."
23
+ ],
24
+ "commandExamples": [
25
+ "npm run benchmark:detection",
26
+ "npm run benchmark:gate",
27
+ "npm run benchmark:intelligence",
28
+ "npm run benchmark:bundle",
29
+ "node ./scripts/benchmark-evidence-bundle.mjs --stdout-only"
30
+ ],
31
+ "rawInputs": {
32
+ "scenarios": [
33
+ {
34
+ "id": "planning",
35
+ "category": "planning",
36
+ "inputReferences": [
37
+ ".agent-context/state/architecture-map.md",
38
+ ".agent-context/state/dependency-map.md",
39
+ ".agent-context/rules/architecture.md"
40
+ ],
41
+ "expectedSignals": [
42
+ "clear sequencing",
43
+ "risk mapping",
44
+ "rollback path"
45
+ ],
46
+ "primaryCommand": "npm run benchmark:detection"
47
+ },
48
+ {
49
+ "id": "refactor",
50
+ "category": "refactor",
51
+ "inputReferences": [
52
+ "tests/cli-smoke.test.mjs",
53
+ "scripts/validate.mjs"
54
+ ],
55
+ "expectedSignals": [
56
+ "regression awareness",
57
+ "small safe diffs",
58
+ "test-backed changes"
59
+ ],
60
+ "primaryCommand": "npm run benchmark:gate"
61
+ },
62
+ {
63
+ "id": "security",
64
+ "category": "security",
65
+ "inputReferences": [
66
+ ".agent-context/rules/security.md",
67
+ "scripts/forbidden-content-check.mjs"
68
+ ],
69
+ "expectedSignals": [
70
+ "secret hygiene",
71
+ "unsafe pattern detection",
72
+ "release blocking on risk"
73
+ ],
74
+ "primaryCommand": "npm run gate:release"
75
+ },
76
+ {
77
+ "id": "delivery",
78
+ "category": "delivery",
79
+ "inputReferences": [
80
+ "scripts/release-gate.mjs",
81
+ "scripts/benchmark-intelligence.mjs",
82
+ ".agent-context/state/benchmark-watchlist.json"
83
+ ],
84
+ "expectedSignals": [
85
+ "release readiness",
86
+ "competitive coverage",
87
+ "SLA freshness"
88
+ ],
89
+ "primaryCommand": "npm run benchmark:intelligence"
90
+ }
91
+ ],
92
+ "benchmarkThresholds": {
93
+ "minimumTop1Accuracy": 0.9,
94
+ "maximumManualCorrectionRate": 0.12,
95
+ "maximumTop1AccuracyDrop": 0.02,
96
+ "maximumManualCorrectionIncrease": 0.03,
97
+ "previousReleaseBaseline": {
98
+ "top1Accuracy": 0.9167,
99
+ "manualCorrectionRate": 0.0833
100
+ }
101
+ },
102
+ "benchmarkWatchlist": [
103
+ {
104
+ "repository": "sickn33/antigravity-awesome-skills",
105
+ "owner": "core-architecture",
106
+ "lastReviewedAt": "2026-04-02"
107
+ },
108
+ {
109
+ "repository": "github/awesome-copilot",
110
+ "owner": "core-architecture",
111
+ "lastReviewedAt": "2026-04-02"
112
+ },
113
+ {
114
+ "repository": "MiniMax-AI/skills",
115
+ "owner": "frontend-governance",
116
+ "lastReviewedAt": "2026-04-02"
117
+ }
118
+ ]
119
+ },
120
+ "rubric": {
121
+ "benchmarkThresholds": {
122
+ "minimumTop1Accuracy": 0.9,
123
+ "maximumManualCorrectionRate": 0.12,
124
+ "maximumTop1AccuracyDrop": 0.02,
125
+ "maximumManualCorrectionIncrease": 0.03
126
+ },
127
+ "intelligenceSlaDays": 14
128
+ },
129
+ "outputs": {
130
+ "detectionBenchmark": {
131
+ "generatedAt": "2026-04-13T15:56:01.040Z",
132
+ "fixtureCount": 12,
133
+ "top1Accuracy": 0.9167,
134
+ "manualCorrectionRate": 0.0833,
135
+ "fixtures": [
136
+ {
137
+ "fixtureName": "typescript-basic",
138
+ "expectedStack": "typescript.md",
139
+ "detectedStack": "typescript.md",
140
+ "confidenceGap": 0.94,
141
+ "needsManualCorrection": false,
142
+ "isCorrect": true
143
+ },
144
+ {
145
+ "fixtureName": "typescript-next",
146
+ "expectedStack": "typescript.md",
147
+ "detectedStack": "typescript.md",
148
+ "confidenceGap": 0.97,
149
+ "needsManualCorrection": false,
150
+ "isCorrect": true
151
+ },
152
+ {
153
+ "fixtureName": "python-poetry",
154
+ "expectedStack": "python.md",
155
+ "detectedStack": "python.md",
156
+ "confidenceGap": 0.96,
157
+ "needsManualCorrection": false,
158
+ "isCorrect": true
159
+ },
160
+ {
161
+ "fixtureName": "python-requirements",
162
+ "expectedStack": "python.md",
163
+ "detectedStack": "python.md",
164
+ "confidenceGap": 0.78,
165
+ "needsManualCorrection": false,
166
+ "isCorrect": true
167
+ },
168
+ {
169
+ "fixtureName": "java-maven",
170
+ "expectedStack": "java.md",
171
+ "detectedStack": "java.md",
172
+ "confidenceGap": 0.95,
173
+ "needsManualCorrection": false,
174
+ "isCorrect": true
175
+ },
176
+ {
177
+ "fixtureName": "java-gradle",
178
+ "expectedStack": "java.md",
179
+ "detectedStack": "java.md",
180
+ "confidenceGap": 0.84,
181
+ "needsManualCorrection": false,
182
+ "isCorrect": true
183
+ },
184
+ {
185
+ "fixtureName": "php-composer",
186
+ "expectedStack": "php.md",
187
+ "detectedStack": "php.md",
188
+ "confidenceGap": 0.95,
189
+ "needsManualCorrection": false,
190
+ "isCorrect": true
191
+ },
192
+ {
193
+ "fixtureName": "go-module",
194
+ "expectedStack": "go.md",
195
+ "detectedStack": "go.md",
196
+ "confidenceGap": 0.96,
197
+ "needsManualCorrection": false,
198
+ "isCorrect": true
199
+ },
200
+ {
201
+ "fixtureName": "dotnet-solution",
202
+ "expectedStack": "csharp.md",
203
+ "detectedStack": "csharp.md",
204
+ "confidenceGap": 0.95,
205
+ "needsManualCorrection": false,
206
+ "isCorrect": true
207
+ },
208
+ {
209
+ "fixtureName": "rust-cargo",
210
+ "expectedStack": "rust.md",
211
+ "detectedStack": "rust.md",
212
+ "confidenceGap": 0.96,
213
+ "needsManualCorrection": false,
214
+ "isCorrect": true
215
+ },
216
+ {
217
+ "fixtureName": "ruby-gemfile",
218
+ "expectedStack": "ruby.md",
219
+ "detectedStack": "ruby.md",
220
+ "confidenceGap": 0.95,
221
+ "needsManualCorrection": false,
222
+ "isCorrect": true
223
+ },
224
+ {
225
+ "fixtureName": "mixed-ts-python",
226
+ "expectedStack": "typescript.md",
227
+ "detectedStack": "python.md",
228
+ "confidenceGap": 0.02,
229
+ "needsManualCorrection": true,
230
+ "isCorrect": false
231
+ }
232
+ ]
233
+ },
234
+ "benchmarkGate": {
235
+ "generatedAt": "2026-04-13T15:56:01.144Z",
236
+ "gateName": "benchmark-gate",
237
+ "passed": true,
238
+ "failureCount": 0,
239
+ "benchmarkResult": {
240
+ "fixtureCount": 12,
241
+ "top1Accuracy": 0.9167,
242
+ "manualCorrectionRate": 0.0833
243
+ },
244
+ "thresholds": {
245
+ "minimumTop1Accuracy": 0.9,
246
+ "maximumManualCorrectionRate": 0.12,
247
+ "maximumTop1AccuracyDrop": 0.02,
248
+ "maximumManualCorrectionIncrease": 0.03,
249
+ "previousReleaseBaseline": {
250
+ "top1Accuracy": 0.9167,
251
+ "manualCorrectionRate": 0.0833
252
+ }
253
+ },
254
+ "results": [
255
+ {
256
+ "checkName": "minimum-top1-accuracy",
257
+ "passed": true,
258
+ "details": "top1Accuracy=0.9167 minimum=0.9"
259
+ },
260
+ {
261
+ "checkName": "maximum-manual-correction-rate",
262
+ "passed": true,
263
+ "details": "manualCorrectionRate=0.0833 maximum=0.12"
264
+ },
265
+ {
266
+ "checkName": "maximum-top1-accuracy-drop",
267
+ "passed": true,
268
+ "details": "drop=0 maximum=0.02"
269
+ },
270
+ {
271
+ "checkName": "maximum-manual-correction-increase",
272
+ "passed": true,
273
+ "details": "increase=0 maximum=0.03"
274
+ }
275
+ ]
276
+ },
277
+ "benchmarkIntelligence": {
278
+ "generatedAt": "2026-04-13T15:56:01.192Z",
279
+ "reportName": "benchmark-intelligence",
280
+ "passed": true,
281
+ "failureCount": 0,
282
+ "reviewSlaDays": 14,
283
+ "watchlist": [
284
+ {
285
+ "repository": "sickn33/antigravity-awesome-skills",
286
+ "owner": "core-architecture",
287
+ "lastReviewedAt": "2026-04-02",
288
+ "ageInDays": 11,
289
+ "stale": false
290
+ },
291
+ {
292
+ "repository": "github/awesome-copilot",
293
+ "owner": "core-architecture",
294
+ "lastReviewedAt": "2026-04-02",
295
+ "ageInDays": 11,
296
+ "stale": false
297
+ },
298
+ {
299
+ "repository": "MiniMax-AI/skills",
300
+ "owner": "frontend-governance",
301
+ "lastReviewedAt": "2026-04-02",
302
+ "ageInDays": 11,
303
+ "stale": false
304
+ }
305
+ ],
306
+ "results": [
307
+ {
308
+ "checkName": "required-benchmark-repository",
309
+ "repository": "sickn33/antigravity-awesome-skills",
310
+ "passed": true,
311
+ "details": "sickn33/antigravity-awesome-skills is present in watchlist"
312
+ },
313
+ {
314
+ "checkName": "required-benchmark-repository",
315
+ "repository": "github/awesome-copilot",
316
+ "passed": true,
317
+ "details": "github/awesome-copilot is present in watchlist"
318
+ },
319
+ {
320
+ "checkName": "required-benchmark-repository",
321
+ "repository": "MiniMax-AI/skills",
322
+ "passed": true,
323
+ "details": "MiniMax-AI/skills is present in watchlist"
324
+ },
325
+ {
326
+ "checkName": "watchlist-owner-defined",
327
+ "repository": "sickn33/antigravity-awesome-skills",
328
+ "passed": true,
329
+ "details": "Owner core-architecture is defined"
330
+ },
331
+ {
332
+ "checkName": "review-sla-compliance",
333
+ "repository": "sickn33/antigravity-awesome-skills",
334
+ "passed": true,
335
+ "details": "ageInDays=11 slaDays=14"
336
+ },
337
+ {
338
+ "checkName": "watchlist-owner-defined",
339
+ "repository": "github/awesome-copilot",
340
+ "passed": true,
341
+ "details": "Owner core-architecture is defined"
342
+ },
343
+ {
344
+ "checkName": "review-sla-compliance",
345
+ "repository": "github/awesome-copilot",
346
+ "passed": true,
347
+ "details": "ageInDays=11 slaDays=14"
348
+ },
349
+ {
350
+ "checkName": "watchlist-owner-defined",
351
+ "repository": "MiniMax-AI/skills",
352
+ "passed": true,
353
+ "details": "Owner frontend-governance is defined"
354
+ },
355
+ {
356
+ "checkName": "review-sla-compliance",
357
+ "repository": "MiniMax-AI/skills",
358
+ "passed": true,
359
+ "details": "ageInDays=11 slaDays=14"
360
+ }
361
+ ]
362
+ }
363
+ },
364
+ "executions": [
365
+ {
366
+ "scriptPath": "scripts/detection-benchmark.mjs",
367
+ "exitCode": 0,
368
+ "parseError": null,
369
+ "stderr": null,
370
+ "reportName": null,
371
+ "passed": null
372
+ },
373
+ {
374
+ "scriptPath": "scripts/benchmark-gate.mjs",
375
+ "exitCode": 0,
376
+ "parseError": null,
377
+ "stderr": null,
378
+ "reportName": "benchmark-gate",
379
+ "passed": true
380
+ },
381
+ {
382
+ "scriptPath": "scripts/benchmark-intelligence.mjs",
383
+ "exitCode": 0,
384
+ "parseError": null,
385
+ "stderr": null,
386
+ "reportName": "benchmark-intelligence",
387
+ "passed": true
388
+ }
389
+ ]
390
+ }
@@ -0,0 +1,85 @@
1
+ {
2
+ "version": "1.0.0",
3
+ "phase": "v2.5.1",
4
+ "updatedAt": "2026-04-13",
5
+ "deterministicRuntime": {
6
+ "timezone": "UTC",
7
+ "locale": "C",
8
+ "nodeMajor": "22",
9
+ "lineEndings": "LF-preferred",
10
+ "shellNotes": "PowerShell and POSIX shells are supported; prefer portable commands for benchmark reruns."
11
+ },
12
+ "scenarios": [
13
+ {
14
+ "id": "planning",
15
+ "category": "planning",
16
+ "inputReferences": [
17
+ ".agent-context/state/architecture-map.md",
18
+ ".agent-context/state/dependency-map.md",
19
+ ".agent-context/rules/architecture.md"
20
+ ],
21
+ "expectedSignals": [
22
+ "clear sequencing",
23
+ "risk mapping",
24
+ "rollback path"
25
+ ],
26
+ "primaryCommand": "npm run benchmark:detection"
27
+ },
28
+ {
29
+ "id": "refactor",
30
+ "category": "refactor",
31
+ "inputReferences": [
32
+ "tests/cli-smoke.test.mjs",
33
+ "scripts/validate.mjs"
34
+ ],
35
+ "expectedSignals": [
36
+ "regression awareness",
37
+ "small safe diffs",
38
+ "test-backed changes"
39
+ ],
40
+ "primaryCommand": "npm run benchmark:gate"
41
+ },
42
+ {
43
+ "id": "security",
44
+ "category": "security",
45
+ "inputReferences": [
46
+ ".agent-context/rules/security.md",
47
+ "scripts/forbidden-content-check.mjs"
48
+ ],
49
+ "expectedSignals": [
50
+ "secret hygiene",
51
+ "unsafe pattern detection",
52
+ "release blocking on risk"
53
+ ],
54
+ "primaryCommand": "npm run gate:release"
55
+ },
56
+ {
57
+ "id": "delivery",
58
+ "category": "delivery",
59
+ "inputReferences": [
60
+ "scripts/release-gate.mjs",
61
+ "scripts/benchmark-intelligence.mjs",
62
+ ".agent-context/state/benchmark-watchlist.json"
63
+ ],
64
+ "expectedSignals": [
65
+ "release readiness",
66
+ "competitive coverage",
67
+ "SLA freshness"
68
+ ],
69
+ "primaryCommand": "npm run benchmark:intelligence"
70
+ }
71
+ ],
72
+ "rerunInstructions": [
73
+ "Run npm run benchmark:detection to regenerate detection benchmark output.",
74
+ "Run npm run benchmark:gate to validate benchmark anti-regression thresholds.",
75
+ "Run npm run benchmark:intelligence to validate benchmark watchlist freshness.",
76
+ "Run npm run benchmark:bundle to emit a reproducible benchmark evidence bundle."
77
+ ],
78
+ "commandExamples": [
79
+ "npm run benchmark:detection",
80
+ "npm run benchmark:gate",
81
+ "npm run benchmark:intelligence",
82
+ "npm run benchmark:bundle",
83
+ "node ./scripts/benchmark-evidence-bundle.mjs --stdout-only"
84
+ ]
85
+ }
package/.cursorrules CHANGED
@@ -1,6 +1,6 @@
1
1
  # AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
2
2
 
3
- Generated by Agentic-Senior-Core CLI v2.0.13
3
+ Generated by Agentic-Senior-Core CLI v2.0.15
4
4
  Timestamp: 2026-04-08T14:58:53.570Z
5
5
  Selected profile: beginner
6
6
  Selected policy file: .agent-context/policies/llm-judge-threshold.json
package/.windsurfrules CHANGED
@@ -1,6 +1,6 @@
1
1
  # AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
2
2
 
3
- Generated by Agentic-Senior-Core CLI v2.0.13
3
+ Generated by Agentic-Senior-Core CLI v2.0.15
4
4
  Timestamp: 2026-04-08T14:58:53.570Z
5
5
  Selected profile: beginner
6
6
  Selected policy file: .agent-context/policies/llm-judge-threshold.json
package/README.md CHANGED
@@ -244,6 +244,23 @@ Reproduce and refresh this table:
244
244
  npm run benchmark:token
245
245
  ```
246
246
 
247
+ ### Benchmark Evidence Bundle (V2.5.1 Baseline)
248
+
249
+ Generate a reproducible benchmark evidence artifact (inputs, rubric, rerun instructions, and outputs):
250
+
251
+ ```bash
252
+ npm run benchmark:bundle
253
+ ```
254
+
255
+ This command writes:
256
+ - `.agent-context/state/benchmark-evidence-bundle.json`
257
+
258
+ For CI pipelines that only need stdout JSON:
259
+
260
+ ```bash
261
+ node ./scripts/benchmark-evidence-bundle.mjs --stdout-only
262
+ ```
263
+
247
264
  ### Install and Setup Choices
248
265
 
249
266
  The CLI now supports a smaller decision surface for first-time setup:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ryuenn3123/agentic-senior-core",
3
- "version": "2.0.13",
3
+ "version": "2.0.15",
4
4
  "type": "module",
5
5
  "description": "Force your AI Agent to code like a Staff Engineer, not a Junior.",
6
6
  "bin": {
@@ -48,6 +48,7 @@
48
48
  "sbom:generate": "node ./scripts/generate-sbom.mjs",
49
49
  "benchmark:detection": "node ./scripts/detection-benchmark.mjs",
50
50
  "benchmark:token": "node ./scripts/token-optimization-benchmark.mjs",
51
+ "benchmark:bundle": "node ./scripts/benchmark-evidence-bundle.mjs",
51
52
  "benchmark:gate": "node ./scripts/benchmark-gate.mjs",
52
53
  "benchmark:intelligence": "node ./scripts/benchmark-intelligence.mjs",
53
54
  "report:quality-trend": "node ./scripts/quality-trend-report.mjs",
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * benchmark-evidence-bundle.mjs
5
+ *
6
+ * V2.5.1 reproducibility baseline artifact.
7
+ * Aggregates benchmark inputs, rubric, command examples, and outputs
8
+ * into a single machine-readable evidence bundle.
9
+ */
10
+
11
+ import { existsSync, readFileSync } from 'node:fs';
12
+ import fs from 'node:fs/promises';
13
+ import { spawnSync } from 'node:child_process';
14
+ import { dirname, join, resolve } from 'node:path';
15
+ import { fileURLToPath } from 'node:url';
16
+
17
+ const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
18
+ const SCRIPT_DIR = dirname(SCRIPT_FILE_PATH);
19
+ const REPOSITORY_ROOT = resolve(SCRIPT_DIR, '..');
20
+ const ARGUMENT_FLAGS = new Set(process.argv.slice(2));
21
+ const isStdoutOnlyMode = ARGUMENT_FLAGS.has('--stdout-only');
22
+
23
+ const REPRO_PROFILE_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'benchmark-reproducibility.json');
24
+ const BENCHMARK_THRESHOLD_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'benchmark-thresholds.json');
25
+ const BENCHMARK_WATCHLIST_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'benchmark-watchlist.json');
26
+ const OUTPUT_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'benchmark-evidence-bundle.json');
27
+
28
+ function readJsonOrNull(filePath) {
29
+ if (!existsSync(filePath)) {
30
+ return null;
31
+ }
32
+
33
+ try {
34
+ return JSON.parse(readFileSync(filePath, 'utf8'));
35
+ } catch {
36
+ return null;
37
+ }
38
+ }
39
+
40
+ function runJsonScript(scriptRelativePath) {
41
+ const absoluteScriptPath = join(REPOSITORY_ROOT, scriptRelativePath);
42
+ const executionResult = spawnSync('node', [absoluteScriptPath], {
43
+ cwd: REPOSITORY_ROOT,
44
+ encoding: 'utf8',
45
+ maxBuffer: 1024 * 1024 * 10,
46
+ });
47
+
48
+ const stdoutContent = (executionResult.stdout || '').trim();
49
+ const stderrContent = (executionResult.stderr || '').trim();
50
+ const exitCode = typeof executionResult.status === 'number' ? executionResult.status : 1;
51
+
52
+ if (!stdoutContent) {
53
+ return {
54
+ scriptPath: scriptRelativePath,
55
+ exitCode,
56
+ parsedReport: null,
57
+ parseError: 'Script produced no stdout JSON payload',
58
+ stderr: stderrContent,
59
+ };
60
+ }
61
+
62
+ try {
63
+ return {
64
+ scriptPath: scriptRelativePath,
65
+ exitCode,
66
+ parsedReport: JSON.parse(stdoutContent),
67
+ parseError: null,
68
+ stderr: stderrContent,
69
+ };
70
+ } catch (jsonParseError) {
71
+ const parseErrorMessage = jsonParseError instanceof Error ? jsonParseError.message : String(jsonParseError);
72
+ return {
73
+ scriptPath: scriptRelativePath,
74
+ exitCode,
75
+ parsedReport: null,
76
+ parseError: parseErrorMessage,
77
+ stderr: stderrContent,
78
+ };
79
+ }
80
+ }
81
+
82
+ function summarizeExecution(scriptExecutionResult) {
83
+ return {
84
+ scriptPath: scriptExecutionResult.scriptPath,
85
+ exitCode: scriptExecutionResult.exitCode,
86
+ parseError: scriptExecutionResult.parseError,
87
+ stderr: scriptExecutionResult.stderr || null,
88
+ reportName: scriptExecutionResult.parsedReport?.reportName || scriptExecutionResult.parsedReport?.gateName || null,
89
+ passed: typeof scriptExecutionResult.parsedReport?.passed === 'boolean'
90
+ ? scriptExecutionResult.parsedReport.passed
91
+ : null,
92
+ };
93
+ }
94
+
95
+ function buildRubricSummary(thresholdConfiguration, intelligenceReport) {
96
+ return {
97
+ benchmarkThresholds: {
98
+ minimumTop1Accuracy: thresholdConfiguration?.minimumTop1Accuracy ?? null,
99
+ maximumManualCorrectionRate: thresholdConfiguration?.maximumManualCorrectionRate ?? null,
100
+ maximumTop1AccuracyDrop: thresholdConfiguration?.maximumTop1AccuracyDrop ?? null,
101
+ maximumManualCorrectionIncrease: thresholdConfiguration?.maximumManualCorrectionIncrease ?? null,
102
+ },
103
+ intelligenceSlaDays: intelligenceReport?.reviewSlaDays ?? null,
104
+ };
105
+ }
106
+
107
+ async function runBenchmarkEvidenceBundle() {
108
+ const reproducibilityProfile = readJsonOrNull(REPRO_PROFILE_PATH);
109
+ const thresholdConfiguration = readJsonOrNull(BENCHMARK_THRESHOLD_PATH);
110
+ const watchlistConfiguration = readJsonOrNull(BENCHMARK_WATCHLIST_PATH);
111
+
112
+ const detectionBenchmarkExecution = runJsonScript('scripts/detection-benchmark.mjs');
113
+ const benchmarkGateExecution = runJsonScript('scripts/benchmark-gate.mjs');
114
+ const benchmarkIntelligenceExecution = runJsonScript('scripts/benchmark-intelligence.mjs');
115
+
116
+ const executionSummaries = [
117
+ summarizeExecution(detectionBenchmarkExecution),
118
+ summarizeExecution(benchmarkGateExecution),
119
+ summarizeExecution(benchmarkIntelligenceExecution),
120
+ ];
121
+
122
+ const failureCount = executionSummaries.filter((executionSummary) => {
123
+ if (executionSummary.parseError) {
124
+ return true;
125
+ }
126
+
127
+ if (typeof executionSummary.passed === 'boolean') {
128
+ return executionSummary.passed === false;
129
+ }
130
+
131
+ return executionSummary.exitCode !== 0;
132
+ }).length;
133
+
134
+ const evidenceBundleReport = {
135
+ generatedAt: new Date().toISOString(),
136
+ reportName: 'benchmark-evidence-bundle',
137
+ phase: 'v2.5.1',
138
+ passed: failureCount === 0,
139
+ failureCount,
140
+ methodology: {
141
+ deterministicRuntime: reproducibilityProfile?.deterministicRuntime || null,
142
+ scenarioCount: Array.isArray(reproducibilityProfile?.scenarios) ? reproducibilityProfile.scenarios.length : 0,
143
+ commandCount: Array.isArray(reproducibilityProfile?.commandExamples) ? reproducibilityProfile.commandExamples.length : 0,
144
+ },
145
+ rerunInstructions: Array.isArray(reproducibilityProfile?.rerunInstructions)
146
+ ? reproducibilityProfile.rerunInstructions
147
+ : [],
148
+ commandExamples: Array.isArray(reproducibilityProfile?.commandExamples)
149
+ ? reproducibilityProfile.commandExamples
150
+ : [],
151
+ rawInputs: {
152
+ scenarios: Array.isArray(reproducibilityProfile?.scenarios) ? reproducibilityProfile.scenarios : [],
153
+ benchmarkThresholds: thresholdConfiguration,
154
+ benchmarkWatchlist: Array.isArray(watchlistConfiguration?.repositories)
155
+ ? watchlistConfiguration.repositories
156
+ : [],
157
+ },
158
+ rubric: buildRubricSummary(thresholdConfiguration, benchmarkIntelligenceExecution.parsedReport),
159
+ outputs: {
160
+ detectionBenchmark: detectionBenchmarkExecution.parsedReport,
161
+ benchmarkGate: benchmarkGateExecution.parsedReport,
162
+ benchmarkIntelligence: benchmarkIntelligenceExecution.parsedReport,
163
+ },
164
+ executions: executionSummaries,
165
+ };
166
+
167
+ if (!isStdoutOnlyMode) {
168
+ await fs.writeFile(OUTPUT_PATH, JSON.stringify(evidenceBundleReport, null, 2) + '\n', 'utf8');
169
+ }
170
+
171
+ console.log(JSON.stringify(evidenceBundleReport, null, 2));
172
+ process.exit(evidenceBundleReport.passed ? 0 : 1);
173
+ }
174
+
175
+ runBenchmarkEvidenceBundle();
@@ -148,6 +148,7 @@ async function validateRequiredFiles() {
148
148
  'scripts/validate.mjs',
149
149
  'scripts/llm-judge.mjs',
150
150
  'scripts/detection-benchmark.mjs',
151
+ 'scripts/benchmark-evidence-bundle.mjs',
151
152
  'scripts/benchmark-gate.mjs',
152
153
  'scripts/benchmark-intelligence.mjs',
153
154
  'scripts/governance-weekly-report.mjs',
@@ -173,6 +174,7 @@ async function validateRequiredFiles() {
173
174
  'docs/v1.7-issue-breakdown.md',
174
175
  'docs/v1.8-operations-playbook.md',
175
176
  'docs/v2-upgrade-playbook.md',
177
+ '.agent-context/state/benchmark-reproducibility.json',
176
178
  '.agent-context/state/benchmark-watchlist.json',
177
179
  '.agent-context/state/skill-platform.json',
178
180
  '.agent-context/skills/index.json',