@ryuenn3123/agentic-senior-core 3.0.37 → 3.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/.agent-context/prompts/bootstrap-design.md +109 -146
  2. package/.agent-context/rules/frontend-architecture.md +92 -108
  3. package/.agent-context/state/README.md +26 -0
  4. package/.agent-context/state/architecture-map.md +32 -17
  5. package/.agent-context/state/dependency-map.md +31 -22
  6. package/.cursor/mcp.json +10 -0
  7. package/.cursor/rules/agentic-senior-core.mdc +48 -0
  8. package/.cursorrules +22 -88
  9. package/.gemini/instructions.md +25 -16
  10. package/.github/copilot-instructions.md +25 -16
  11. package/.github/instructions/agentic-senior-core.instructions.md +47 -0
  12. package/.instructions.md +98 -207
  13. package/.windsurf/rules/agentic-senior-core.md +43 -0
  14. package/.windsurfrules +22 -88
  15. package/AGENTS.md +23 -26
  16. package/CLAUDE.md +43 -0
  17. package/CONTRIBUTING.md +7 -2
  18. package/GEMINI.md +43 -0
  19. package/README.md +25 -7
  20. package/lib/cli/backup.mjs +4 -4
  21. package/lib/cli/commands/init/project-context.mjs +101 -0
  22. package/lib/cli/commands/init/runtime-environment.mjs +59 -0
  23. package/lib/cli/commands/init/setup-decisions.mjs +83 -0
  24. package/lib/cli/commands/init.mjs +33 -250
  25. package/lib/cli/commands/optimize.mjs +1 -1
  26. package/lib/cli/commands/upgrade.mjs +34 -16
  27. package/lib/cli/compiler.mjs +59 -17
  28. package/lib/cli/constants.mjs +5 -0
  29. package/lib/cli/detector.mjs +4 -0
  30. package/lib/cli/init-detection-flow.mjs +9 -1
  31. package/lib/cli/init-selection.mjs +0 -5
  32. package/lib/cli/preflight.mjs +3 -3
  33. package/lib/cli/project-scaffolder/design-contract/validation.mjs +789 -0
  34. package/lib/cli/project-scaffolder/design-contract.mjs +119 -924
  35. package/lib/cli/project-scaffolder/prompt-builders.mjs +69 -84
  36. package/lib/cli/project-scaffolder.mjs +0 -2
  37. package/lib/cli/utils/filesystem.mjs +79 -0
  38. package/lib/cli/utils/managed-surface.mjs +237 -0
  39. package/lib/cli/utils/prompting.mjs +44 -0
  40. package/lib/cli/utils.mjs +33 -335
  41. package/package.json +21 -2
  42. package/scripts/bump-version.mjs +15 -13
  43. package/scripts/clean-local-artifacts.mjs +76 -0
  44. package/scripts/docs-quality-drift-report.mjs +5 -0
  45. package/scripts/frontend-usability-audit.mjs +23 -19
  46. package/scripts/governance-weekly-report.mjs +37 -15
  47. package/scripts/single-source-lazy-loading-audit.mjs +24 -0
  48. package/scripts/sync-thin-adapters.mjs +99 -129
  49. package/scripts/v3-purge-audit.mjs +5 -0
  50. package/scripts/validate/config.mjs +10 -0
  51. package/scripts/validate/coverage-checks.mjs +55 -0
  52. package/scripts/validate.mjs +20 -0
  53. package/.agent-context/marketplace/trust-tiers.json +0 -114
  54. package/.agent-context/state/benchmark-analysis.json +0 -431
  55. package/.agent-context/state/benchmark-evidence-bundle.json +0 -1040
  56. package/.agent-context/state/benchmark-history.json +0 -75
  57. package/.agent-context/state/benchmark-trend-report.csv +0 -5
  58. package/.agent-context/state/benchmark-trend-report.json +0 -140
  59. package/.agent-context/state/benchmark-writer-judge-matrix.json +0 -462
  60. package/.agent-context/state/memory-continuity-benchmark.json +0 -132
  61. package/.agent-context/state/onboarding-report.json +0 -102
  62. package/.agent-context/state/quality-trend-report.json +0 -89
  63. package/.agent-context/state/token-optimization-benchmark.json +0 -130
  64. package/.agent-context/state/weekly-governance-report.json +0 -329
  65. package/lib/cli/compatibility.mjs +0 -124
  66. package/scripts/validate-evidence-bundle.mjs +0 -76
@@ -1,1040 +0,0 @@
1
- {
2
- "generatedAt": "2026-04-17T03:20:15.400Z",
3
- "reportName": "benchmark-evidence-bundle",
4
- "phase": "v2.5.2",
5
- "releaseVersion": "2.0.26",
6
- "passed": true,
7
- "failureCount": 0,
8
- "methodology": {
9
- "deterministicRuntime": {
10
- "timezone": "UTC",
11
- "locale": "C",
12
- "nodeMajor": "22",
13
- "lineEndings": "LF-preferred",
14
- "shellNotes": "PowerShell and POSIX shells are supported; prefer portable commands for benchmark reruns."
15
- },
16
- "scenarioCount": 4,
17
- "commandCount": 8
18
- },
19
- "rerunInstructions": [
20
- "Run npm run benchmark:detection to regenerate detection benchmark output.",
21
- "Run npm run benchmark:gate to validate benchmark anti-regression thresholds.",
22
- "Run npm run benchmark:intelligence to validate benchmark watchlist freshness.",
23
- "Run npm run benchmark:bundle to emit a reproducible benchmark evidence bundle.",
24
- "Run npm run benchmark:writer-judge to emit writer-judge side-by-side matrix output.",
25
- "Run npm run benchmark:continuity to validate cross-agent memory hydration, privacy redaction, and token-savings behavior."
26
- ],
27
- "commandExamples": [
28
- "npm run benchmark:detection",
29
- "npm run benchmark:gate",
30
- "npm run benchmark:intelligence",
31
- "npm run benchmark:bundle",
32
- "npm run benchmark:writer-judge",
33
- "node ./scripts/benchmark-evidence-bundle.mjs --stdout-only",
34
- "npm run benchmark:continuity",
35
- "node ./scripts/memory-continuity-benchmark.mjs --stdout-only"
36
- ],
37
- "rawInputs": {
38
- "scenarios": [
39
- {
40
- "id": "planning",
41
- "category": "planning",
42
- "inputReferences": [
43
- ".agent-context/state/architecture-map.md",
44
- ".agent-context/state/dependency-map.md",
45
- ".agent-context/rules/architecture.md"
46
- ],
47
- "expectedSignals": [
48
- "clear sequencing",
49
- "risk mapping",
50
- "rollback path"
51
- ],
52
- "primaryCommand": "npm run benchmark:detection"
53
- },
54
- {
55
- "id": "refactor",
56
- "category": "refactor",
57
- "inputReferences": [
58
- "tests/cli-smoke.test.mjs",
59
- "scripts/validate.mjs"
60
- ],
61
- "expectedSignals": [
62
- "regression awareness",
63
- "small safe diffs",
64
- "test-backed changes"
65
- ],
66
- "primaryCommand": "npm run benchmark:gate"
67
- },
68
- {
69
- "id": "security",
70
- "category": "security",
71
- "inputReferences": [
72
- ".agent-context/rules/security.md",
73
- "scripts/forbidden-content-check.mjs"
74
- ],
75
- "expectedSignals": [
76
- "secret hygiene",
77
- "unsafe pattern detection",
78
- "release blocking on risk"
79
- ],
80
- "primaryCommand": "npm run gate:release"
81
- },
82
- {
83
- "id": "delivery",
84
- "category": "delivery",
85
- "inputReferences": [
86
- "scripts/release-gate.mjs",
87
- "scripts/benchmark-intelligence.mjs",
88
- ".agent-context/state/benchmark-watchlist.json"
89
- ],
90
- "expectedSignals": [
91
- "release readiness",
92
- "competitive coverage",
93
- "SLA freshness"
94
- ],
95
- "primaryCommand": "npm run benchmark:intelligence"
96
- }
97
- ],
98
- "benchmarkThresholds": {
99
- "minimumTop1Accuracy": 0.9,
100
- "maximumManualCorrectionRate": 0.12,
101
- "maximumTop1AccuracyDrop": 0.02,
102
- "maximumManualCorrectionIncrease": 0.03,
103
- "previousReleaseBaseline": {
104
- "top1Accuracy": 0.9167,
105
- "manualCorrectionRate": 0.0833
106
- }
107
- },
108
- "benchmarkWatchlist": [
109
- {
110
- "repository": "sickn33/antigravity-awesome-skills",
111
- "owner": "core-architecture",
112
- "lastReviewedAt": "2026-04-17"
113
- },
114
- {
115
- "repository": "github/awesome-copilot",
116
- "owner": "core-architecture",
117
- "lastReviewedAt": "2026-04-17"
118
- },
119
- {
120
- "repository": "MiniMax-AI/skills",
121
- "owner": "frontend-governance",
122
- "lastReviewedAt": "2026-04-17"
123
- }
124
- ],
125
- "memorySchema": {
126
- "schemaVersion": "1.0.0",
127
- "schemaName": "cross-agent-memory-observation",
128
- "description": "Provider-agnostic schema for persistent memory observations shared across coding agents and IDE hosts.",
129
- "requiredFields": [
130
- "id",
131
- "projectId",
132
- "sessionId",
133
- "adapterId",
134
- "eventType",
135
- "timestamp",
136
- "title",
137
- "summary",
138
- "detail",
139
- "privacy"
140
- ],
141
- "fieldDefinitions": {
142
- "id": {
143
- "type": "string",
144
- "description": "Stable unique observation identifier."
145
- },
146
- "projectId": {
147
- "type": "string",
148
- "description": "Repository or workspace identifier."
149
- },
150
- "sessionId": {
151
- "type": "string",
152
- "description": "Source session identifier from host adapter."
153
- },
154
- "adapterId": {
155
- "type": "string",
156
- "allowedValues": [
157
- "claude-code",
158
- "gemini-cli",
159
- "vscode-chat",
160
- "custom"
161
- ],
162
- "description": "Host adapter that captured this observation."
163
- },
164
- "eventType": {
165
- "type": "string",
166
- "allowedValues": [
167
- "prompt",
168
- "tool-use",
169
- "decision",
170
- "summary",
171
- "issue",
172
- "context"
173
- ],
174
- "description": "Observation type for retrieval filtering."
175
- },
176
- "timestamp": {
177
- "type": "string",
178
- "format": "date-time",
179
- "description": "ISO timestamp when observation was captured."
180
- },
181
- "title": {
182
- "type": "string",
183
- "description": "Compact human-readable headline."
184
- },
185
- "summary": {
186
- "type": "string",
187
- "description": "Compact session-start payload used for progressive disclosure."
188
- },
189
- "detail": {
190
- "type": "string",
191
- "description": "Expanded observation text fetched on demand."
192
- },
193
- "tags": {
194
- "type": "array",
195
- "items": "string",
196
- "description": "Optional normalized tags for query refinement."
197
- },
198
- "privacy": {
199
- "type": "object",
200
- "requiredFields": [
201
- "level",
202
- "redactionApplied"
203
- ],
204
- "fieldDefinitions": {
205
- "level": {
206
- "type": "string",
207
- "allowedValues": [
208
- "public",
209
- "internal",
210
- "restricted"
211
- ],
212
- "description": "Privacy classification level."
213
- },
214
- "redactionApplied": {
215
- "type": "boolean",
216
- "description": "Indicates whether privacy sanitization modified payload content."
217
- },
218
- "redactionReasons": {
219
- "type": "array",
220
- "items": "string",
221
- "description": "Redaction reason tags such as private-tag or token-like-value."
222
- }
223
- }
224
- }
225
- },
226
- "retrievalContract": {
227
- "sessionStartPayload": [
228
- "id",
229
- "adapterId",
230
- "eventType",
231
- "timestamp",
232
- "title",
233
- "summary"
234
- ],
235
- "onDemandPayload": [
236
- "detail",
237
- "tags",
238
- "privacy"
239
- ],
240
- "progressiveDisclosure": true
241
- }
242
- },
243
- "memoryAdapterContract": {
244
- "schemaVersion": "1.0.0",
245
- "contractName": "cross-agent-memory-adapter",
246
- "description": "Adapter contract for ingesting and retrieving shared memory observations across IDE hosts.",
247
- "requiredAdapters": [
248
- "claude-code",
249
- "gemini-cli",
250
- "vscode-chat"
251
- ],
252
- "requiredOperations": {
253
- "ingestion": [
254
- "captureObservation",
255
- "captureSessionSummary"
256
- ],
257
- "retrieval": [
258
- "searchIndex",
259
- "getTimeline",
260
- "getObservations"
261
- ],
262
- "privacy": [
263
- "applyPrivateTagRedaction",
264
- "applyInlineSecretRedaction"
265
- ]
266
- },
267
- "adapters": [
268
- {
269
- "adapterId": "claude-code",
270
- "hostType": "plugin-hooks",
271
- "status": "pilot-ready",
272
- "ingestionEvents": [
273
- "SessionStart",
274
- "UserPromptSubmit",
275
- "PostToolUse",
276
- "Stop",
277
- "SessionEnd"
278
- ],
279
- "retrievalMode": "mcp-tools"
280
- },
281
- {
282
- "adapterId": "gemini-cli",
283
- "hostType": "plugin-hooks",
284
- "status": "pilot-ready",
285
- "ingestionEvents": [
286
- "session_start",
287
- "prompt_submit",
288
- "post_tool",
289
- "session_end"
290
- ],
291
- "retrievalMode": "mcp-tools"
292
- },
293
- {
294
- "adapterId": "vscode-chat",
295
- "hostType": "chat-customization-plugin",
296
- "status": "pilot-ready",
297
- "ingestionEvents": [
298
- "chatStart",
299
- "promptSubmit",
300
- "postToolUse",
301
- "chatEnd"
302
- ],
303
- "retrievalMode": "mcp-tools"
304
- }
305
- ],
306
- "notes": [
307
- "Web chat hosts are explicitly out of scope for this pilot because local runtime hooks are unavailable.",
308
- "Adapters should emit provider-agnostic payloads matching .agent-context/state/memory-schema-v1.json."
309
- ]
310
- }
311
- },
312
- "rubric": {
313
- "benchmarkThresholds": {
314
- "minimumTop1Accuracy": 0.9,
315
- "maximumManualCorrectionRate": 0.12,
316
- "maximumTop1AccuracyDrop": 0.02,
317
- "maximumManualCorrectionIncrease": 0.03
318
- },
319
- "intelligenceSlaDays": 14,
320
- "reliabilityThresholds": {
321
- "minimumConfidenceGap": 0.1,
322
- "maximumLowConfidenceRate": 0.2,
323
- "maximumIncorrectDetectionRate": 0.1
324
- },
325
- "continuityThresholds": {
326
- "minimumRelevantRecall": 0.8,
327
- "minimumSessionStartTokenSavingsPercent": 35,
328
- "maximumUnsafeObservationCount": 0
329
- }
330
- },
331
- "bugIndicators": {
332
- "incorrectFixtureCount": 1,
333
- "incorrectDetectionRate": 0.0833,
334
- "manualCorrectionFixtureCount": 1,
335
- "manualCorrectionRate": 0.0833,
336
- "lowConfidenceFixtureCount": 1,
337
- "lowConfidenceRate": 0.0833,
338
- "flaggedFixtures": [
339
- {
340
- "fixtureName": "mixed-ts-python",
341
- "confidenceGap": 0.02,
342
- "detectedStack": "python.md",
343
- "expectedStack": "typescript.md",
344
- "isCorrect": false,
345
- "needsManualCorrection": true
346
- }
347
- ]
348
- },
349
- "reliabilitySignals": {
350
- "passed": true,
351
- "failureCount": 0,
352
- "riskLevel": "monitor",
353
- "thresholds": {
354
- "minimumConfidenceGap": 0.1,
355
- "maximumLowConfidenceRate": 0.2,
356
- "maximumIncorrectDetectionRate": 0.1
357
- },
358
- "metrics": {
359
- "fixtureCount": 12,
360
- "incorrectFixtureCount": 1,
361
- "lowConfidenceFixtureCount": 1,
362
- "manualCorrectionFixtureCount": 1,
363
- "incorrectDetectionRate": 0.0833,
364
- "lowConfidenceRate": 0.0833,
365
- "manualCorrectionRate": 0.0833
366
- },
367
- "checks": [
368
- {
369
- "checkName": "incorrect-detection-rate",
370
- "passed": true,
371
- "details": "incorrectRate=0.0833 max=0.1"
372
- },
373
- {
374
- "checkName": "low-confidence-rate",
375
- "passed": true,
376
- "details": "lowConfidenceRate=0.0833 max=0.2"
377
- },
378
- {
379
- "checkName": "manual-correction-early-warning",
380
- "passed": true,
381
- "details": "manualCorrectionRate=0.0833 warningThreshold=0.12"
382
- }
383
- ],
384
- "flaggedFixtures": [
385
- {
386
- "fixtureName": "mixed-ts-python",
387
- "confidenceGap": 0.02,
388
- "detectedStack": "python.md",
389
- "expectedStack": "typescript.md",
390
- "isCorrect": false,
391
- "needsManualCorrection": true
392
- }
393
- ]
394
- },
395
- "securityIndicators": {
396
- "forbiddenContent": {
397
- "checkName": "forbidden-content-scan",
398
- "passed": true,
399
- "exitCode": 0,
400
- "details": "No forbidden content detected"
401
- },
402
- "vulnerabilityScan": {
403
- "checkName": "npm-audit-indicator",
404
- "isAvailable": false,
405
- "hasKnownVulnerabilities": null,
406
- "severityCounts": null,
407
- "exitCode": 1,
408
- "error": "Payload is empty"
409
- }
410
- },
411
- "releaseDelta": {
412
- "currentReleaseVersion": "2.0.26",
413
- "previousReleaseVersion": "2.0.26",
414
- "comparedSnapshot": {
415
- "currentGeneratedAt": "2026-04-17T03:20:15.400Z",
416
- "previousGeneratedAt": "2026-04-17T03:19:31.047Z"
417
- },
418
- "top1AccuracyDelta": 0,
419
- "manualCorrectionRateDelta": 0,
420
- "staleWatchlistCountDelta": 0,
421
- "vulnerabilityTotalDelta": 0,
422
- "summary": [
423
- "top1Accuracy: +0",
424
- "manualCorrectionRate: +0",
425
- "staleWatchlistCount: +0",
426
- "vulnerabilityTotal: +0"
427
- ]
428
- },
429
- "history": [
430
- {
431
- "generatedAt": "2026-04-17T02:54:01.239Z",
432
- "releaseVersion": "2.0.26",
433
- "fixtureCount": 12,
434
- "top1Accuracy": 0.9167,
435
- "manualCorrectionRate": 0.0833,
436
- "benchmarkGatePassed": true,
437
- "intelligencePassed": true,
438
- "staleWatchlistCount": 0,
439
- "reliabilityPassed": true,
440
- "reliabilityRiskLevel": "monitor",
441
- "incorrectDetectionRate": 0.0833,
442
- "lowConfidenceRate": 0.0833,
443
- "vulnerabilityTotal": null,
444
- "criticalVulnerabilityCount": null,
445
- "forbiddenContentPassed": true
446
- },
447
- {
448
- "generatedAt": "2026-04-17T02:54:57.419Z",
449
- "releaseVersion": "2.0.26",
450
- "fixtureCount": 12,
451
- "top1Accuracy": 0.9167,
452
- "manualCorrectionRate": 0.0833,
453
- "benchmarkGatePassed": true,
454
- "intelligencePassed": true,
455
- "staleWatchlistCount": 0,
456
- "reliabilityPassed": true,
457
- "reliabilityRiskLevel": "monitor",
458
- "incorrectDetectionRate": 0.0833,
459
- "lowConfidenceRate": 0.0833,
460
- "vulnerabilityTotal": null,
461
- "criticalVulnerabilityCount": null,
462
- "forbiddenContentPassed": true
463
- },
464
- {
465
- "generatedAt": "2026-04-17T03:19:31.047Z",
466
- "releaseVersion": "2.0.26",
467
- "fixtureCount": 12,
468
- "top1Accuracy": 0.9167,
469
- "manualCorrectionRate": 0.0833,
470
- "benchmarkGatePassed": true,
471
- "intelligencePassed": true,
472
- "staleWatchlistCount": 0,
473
- "reliabilityPassed": true,
474
- "reliabilityRiskLevel": "monitor",
475
- "incorrectDetectionRate": 0.0833,
476
- "lowConfidenceRate": 0.0833,
477
- "vulnerabilityTotal": null,
478
- "criticalVulnerabilityCount": null,
479
- "forbiddenContentPassed": true
480
- },
481
- {
482
- "generatedAt": "2026-04-17T03:20:15.400Z",
483
- "releaseVersion": "2.0.26",
484
- "fixtureCount": 12,
485
- "top1Accuracy": 0.9167,
486
- "manualCorrectionRate": 0.0833,
487
- "benchmarkGatePassed": true,
488
- "intelligencePassed": true,
489
- "staleWatchlistCount": 0,
490
- "reliabilityPassed": true,
491
- "reliabilityRiskLevel": "monitor",
492
- "incorrectDetectionRate": 0.0833,
493
- "lowConfidenceRate": 0.0833,
494
- "vulnerabilityTotal": null,
495
- "criticalVulnerabilityCount": null,
496
- "forbiddenContentPassed": true
497
- }
498
- ],
499
- "trendReport": {
500
- "generatedAt": "2026-04-17T03:20:15.400Z",
501
- "reportName": "benchmark-trend-report",
502
- "releaseVersion": "2.0.26",
503
- "historyCount": 4,
504
- "releaseDelta": {
505
- "currentReleaseVersion": "2.0.26",
506
- "previousReleaseVersion": "2.0.26",
507
- "comparedSnapshot": {
508
- "currentGeneratedAt": "2026-04-17T03:20:15.400Z",
509
- "previousGeneratedAt": "2026-04-17T03:19:31.047Z"
510
- },
511
- "top1AccuracyDelta": 0,
512
- "manualCorrectionRateDelta": 0,
513
- "staleWatchlistCountDelta": 0,
514
- "vulnerabilityTotalDelta": 0,
515
- "summary": [
516
- "top1Accuracy: +0",
517
- "manualCorrectionRate: +0",
518
- "staleWatchlistCount: +0",
519
- "vulnerabilityTotal: +0"
520
- ]
521
- },
522
- "trendTable": [
523
- {
524
- "snapshotIndex": 1,
525
- "generatedAt": "2026-04-17T02:54:01.239Z",
526
- "releaseVersion": "2.0.26",
527
- "top1Accuracy": 0.9167,
528
- "manualCorrectionRate": 0.0833,
529
- "incorrectDetectionRate": 0.0833,
530
- "lowConfidenceRate": 0.0833,
531
- "staleWatchlistCount": 0,
532
- "vulnerabilityTotal": null,
533
- "criticalVulnerabilityCount": null,
534
- "benchmarkGatePassed": true,
535
- "intelligencePassed": true,
536
- "reliabilityPassed": true,
537
- "reliabilityRiskLevel": "monitor"
538
- },
539
- {
540
- "snapshotIndex": 2,
541
- "generatedAt": "2026-04-17T02:54:57.419Z",
542
- "releaseVersion": "2.0.26",
543
- "top1Accuracy": 0.9167,
544
- "manualCorrectionRate": 0.0833,
545
- "incorrectDetectionRate": 0.0833,
546
- "lowConfidenceRate": 0.0833,
547
- "staleWatchlistCount": 0,
548
- "vulnerabilityTotal": null,
549
- "criticalVulnerabilityCount": null,
550
- "benchmarkGatePassed": true,
551
- "intelligencePassed": true,
552
- "reliabilityPassed": true,
553
- "reliabilityRiskLevel": "monitor"
554
- },
555
- {
556
- "snapshotIndex": 3,
557
- "generatedAt": "2026-04-17T03:19:31.047Z",
558
- "releaseVersion": "2.0.26",
559
- "top1Accuracy": 0.9167,
560
- "manualCorrectionRate": 0.0833,
561
- "incorrectDetectionRate": 0.0833,
562
- "lowConfidenceRate": 0.0833,
563
- "staleWatchlistCount": 0,
564
- "vulnerabilityTotal": null,
565
- "criticalVulnerabilityCount": null,
566
- "benchmarkGatePassed": true,
567
- "intelligencePassed": true,
568
- "reliabilityPassed": true,
569
- "reliabilityRiskLevel": "monitor"
570
- },
571
- {
572
- "snapshotIndex": 4,
573
- "generatedAt": "2026-04-17T03:20:15.400Z",
574
- "releaseVersion": "2.0.26",
575
- "top1Accuracy": 0.9167,
576
- "manualCorrectionRate": 0.0833,
577
- "incorrectDetectionRate": 0.0833,
578
- "lowConfidenceRate": 0.0833,
579
- "staleWatchlistCount": 0,
580
- "vulnerabilityTotal": null,
581
- "criticalVulnerabilityCount": null,
582
- "benchmarkGatePassed": true,
583
- "intelligencePassed": true,
584
- "reliabilityPassed": true,
585
- "reliabilityRiskLevel": "monitor"
586
- }
587
- ],
588
- "chartSeries": {
589
- "generatedAt": [
590
- "2026-04-17T02:54:01.239Z",
591
- "2026-04-17T02:54:57.419Z",
592
- "2026-04-17T03:19:31.047Z",
593
- "2026-04-17T03:20:15.400Z"
594
- ],
595
- "top1Accuracy": [
596
- 0.9167,
597
- 0.9167,
598
- 0.9167,
599
- 0.9167
600
- ],
601
- "manualCorrectionRate": [
602
- 0.0833,
603
- 0.0833,
604
- 0.0833,
605
- 0.0833
606
- ],
607
- "incorrectDetectionRate": [
608
- 0.0833,
609
- 0.0833,
610
- 0.0833,
611
- 0.0833
612
- ],
613
- "lowConfidenceRate": [
614
- 0.0833,
615
- 0.0833,
616
- 0.0833,
617
- 0.0833
618
- ],
619
- "staleWatchlistCount": [
620
- 0,
621
- 0,
622
- 0,
623
- 0
624
- ],
625
- "vulnerabilityTotal": [
626
- null,
627
- null,
628
- null,
629
- null
630
- ]
631
- },
632
- "artifacts": {
633
- "historyPath": ".agent-context/state/benchmark-history.json",
634
- "jsonPath": ".agent-context/state/benchmark-trend-report.json",
635
- "csvPath": ".agent-context/state/benchmark-trend-report.csv",
636
- "writeMode": "stdout-and-file"
637
- }
638
- },
639
- "outputs": {
640
- "detectionBenchmark": {
641
- "generatedAt": "2026-04-17T03:20:15.113Z",
642
- "fixtureCount": 12,
643
- "top1Accuracy": 0.9167,
644
- "manualCorrectionRate": 0.0833,
645
- "fixtures": [
646
- {
647
- "fixtureName": "typescript-basic",
648
- "expectedStack": "typescript.md",
649
- "detectedStack": "typescript.md",
650
- "confidenceGap": 0.94,
651
- "needsManualCorrection": false,
652
- "isCorrect": true
653
- },
654
- {
655
- "fixtureName": "typescript-next",
656
- "expectedStack": "typescript.md",
657
- "detectedStack": "typescript.md",
658
- "confidenceGap": 0.97,
659
- "needsManualCorrection": false,
660
- "isCorrect": true
661
- },
662
- {
663
- "fixtureName": "python-poetry",
664
- "expectedStack": "python.md",
665
- "detectedStack": "python.md",
666
- "confidenceGap": 0.96,
667
- "needsManualCorrection": false,
668
- "isCorrect": true
669
- },
670
- {
671
- "fixtureName": "python-requirements",
672
- "expectedStack": "python.md",
673
- "detectedStack": "python.md",
674
- "confidenceGap": 0.78,
675
- "needsManualCorrection": false,
676
- "isCorrect": true
677
- },
678
- {
679
- "fixtureName": "java-maven",
680
- "expectedStack": "java.md",
681
- "detectedStack": "java.md",
682
- "confidenceGap": 0.95,
683
- "needsManualCorrection": false,
684
- "isCorrect": true
685
- },
686
- {
687
- "fixtureName": "java-gradle",
688
- "expectedStack": "java.md",
689
- "detectedStack": "java.md",
690
- "confidenceGap": 0.84,
691
- "needsManualCorrection": false,
692
- "isCorrect": true
693
- },
694
- {
695
- "fixtureName": "php-composer",
696
- "expectedStack": "php.md",
697
- "detectedStack": "php.md",
698
- "confidenceGap": 0.95,
699
- "needsManualCorrection": false,
700
- "isCorrect": true
701
- },
702
- {
703
- "fixtureName": "go-module",
704
- "expectedStack": "go.md",
705
- "detectedStack": "go.md",
706
- "confidenceGap": 0.96,
707
- "needsManualCorrection": false,
708
- "isCorrect": true
709
- },
710
- {
711
- "fixtureName": "dotnet-solution",
712
- "expectedStack": "csharp.md",
713
- "detectedStack": "csharp.md",
714
- "confidenceGap": 0.95,
715
- "needsManualCorrection": false,
716
- "isCorrect": true
717
- },
718
- {
719
- "fixtureName": "rust-cargo",
720
- "expectedStack": "rust.md",
721
- "detectedStack": "rust.md",
722
- "confidenceGap": 0.96,
723
- "needsManualCorrection": false,
724
- "isCorrect": true
725
- },
726
- {
727
- "fixtureName": "ruby-gemfile",
728
- "expectedStack": "ruby.md",
729
- "detectedStack": "ruby.md",
730
- "confidenceGap": 0.95,
731
- "needsManualCorrection": false,
732
- "isCorrect": true
733
- },
734
- {
735
- "fixtureName": "mixed-ts-python",
736
- "expectedStack": "typescript.md",
737
- "detectedStack": "python.md",
738
- "confidenceGap": 0.02,
739
- "needsManualCorrection": true,
740
- "isCorrect": false
741
- }
742
- ]
743
- },
744
- "benchmarkGate": {
745
- "generatedAt": "2026-04-17T03:20:15.211Z",
746
- "gateName": "benchmark-gate",
747
- "passed": true,
748
- "failureCount": 0,
749
- "benchmarkResult": {
750
- "fixtureCount": 12,
751
- "top1Accuracy": 0.9167,
752
- "manualCorrectionRate": 0.0833
753
- },
754
- "thresholds": {
755
- "minimumTop1Accuracy": 0.9,
756
- "maximumManualCorrectionRate": 0.12,
757
- "maximumTop1AccuracyDrop": 0.02,
758
- "maximumManualCorrectionIncrease": 0.03,
759
- "previousReleaseBaseline": {
760
- "top1Accuracy": 0.9167,
761
- "manualCorrectionRate": 0.0833
762
- }
763
- },
764
- "results": [
765
- {
766
- "checkName": "minimum-top1-accuracy",
767
- "passed": true,
768
- "details": "top1Accuracy=0.9167 minimum=0.9"
769
- },
770
- {
771
- "checkName": "maximum-manual-correction-rate",
772
- "passed": true,
773
- "details": "manualCorrectionRate=0.0833 maximum=0.12"
774
- },
775
- {
776
- "checkName": "maximum-top1-accuracy-drop",
777
- "passed": true,
778
- "details": "drop=0 maximum=0.02"
779
- },
780
- {
781
- "checkName": "maximum-manual-correction-increase",
782
- "passed": true,
783
- "details": "increase=0 maximum=0.03"
784
- }
785
- ]
786
- },
787
- "benchmarkIntelligence": {
788
- "generatedAt": "2026-04-17T03:20:15.258Z",
789
- "reportName": "benchmark-intelligence",
790
- "passed": true,
791
- "failureCount": 0,
792
- "reviewSlaDays": 14,
793
- "watchlist": [
794
- {
795
- "repository": "sickn33/antigravity-awesome-skills",
796
- "owner": "core-architecture",
797
- "lastReviewedAt": "2026-04-17",
798
- "ageInDays": 0,
799
- "stale": false
800
- },
801
- {
802
- "repository": "github/awesome-copilot",
803
- "owner": "core-architecture",
804
- "lastReviewedAt": "2026-04-17",
805
- "ageInDays": 0,
806
- "stale": false
807
- },
808
- {
809
- "repository": "MiniMax-AI/skills",
810
- "owner": "frontend-governance",
811
- "lastReviewedAt": "2026-04-17",
812
- "ageInDays": 0,
813
- "stale": false
814
- }
815
- ],
816
- "results": [
817
- {
818
- "checkName": "required-benchmark-repository",
819
- "repository": "sickn33/antigravity-awesome-skills",
820
- "passed": true,
821
- "details": "sickn33/antigravity-awesome-skills is present in watchlist"
822
- },
823
- {
824
- "checkName": "required-benchmark-repository",
825
- "repository": "github/awesome-copilot",
826
- "passed": true,
827
- "details": "github/awesome-copilot is present in watchlist"
828
- },
829
- {
830
- "checkName": "required-benchmark-repository",
831
- "repository": "MiniMax-AI/skills",
832
- "passed": true,
833
- "details": "MiniMax-AI/skills is present in watchlist"
834
- },
835
- {
836
- "checkName": "watchlist-owner-defined",
837
- "repository": "sickn33/antigravity-awesome-skills",
838
- "passed": true,
839
- "details": "Owner core-architecture is defined"
840
- },
841
- {
842
- "checkName": "review-sla-compliance",
843
- "repository": "sickn33/antigravity-awesome-skills",
844
- "passed": true,
845
- "details": "ageInDays=0 slaDays=14"
846
- },
847
- {
848
- "checkName": "watchlist-owner-defined",
849
- "repository": "github/awesome-copilot",
850
- "passed": true,
851
- "details": "Owner core-architecture is defined"
852
- },
853
- {
854
- "checkName": "review-sla-compliance",
855
- "repository": "github/awesome-copilot",
856
- "passed": true,
857
- "details": "ageInDays=0 slaDays=14"
858
- },
859
- {
860
- "checkName": "watchlist-owner-defined",
861
- "repository": "MiniMax-AI/skills",
862
- "passed": true,
863
- "details": "Owner frontend-governance is defined"
864
- },
865
- {
866
- "checkName": "review-sla-compliance",
867
- "repository": "MiniMax-AI/skills",
868
- "passed": true,
869
- "details": "ageInDays=0 slaDays=14"
870
- }
871
- ]
872
- },
873
- "memoryContinuityBenchmark": {
874
- "generatedAt": "2026-04-17T03:20:15.324Z",
875
- "reportName": "memory-continuity-benchmark",
876
- "schemaVersion": "1.0.0",
877
- "passed": true,
878
- "failureCount": 0,
879
- "thresholds": {
880
- "minimumRelevantRecall": 0.8,
881
- "minimumSessionStartTokenSavingsPercent": 35,
882
- "maximumUnsafeObservationCount": 0
883
- },
884
- "adapterCoverage": {
885
- "requiredAdapterIds": [
886
- "claude-code",
887
- "gemini-cli",
888
- "vscode-chat"
889
- ],
890
- "availableAdapterIds": [
891
- "claude-code",
892
- "gemini-cli",
893
- "vscode-chat"
894
- ],
895
- "missingAdapterIds": [],
896
- "passed": true
897
- },
898
- "privacyControls": {
899
- "redactedObservationCount": 2,
900
- "privateTagRedactionCount": 1,
901
- "inlineRedactionCount": 1,
902
- "unsafeObservationCount": 0
903
- },
904
- "continuitySummary": {
905
- "totalObservationCount": 5,
906
- "scenarioCount": 3,
907
- "averageRelevantRecall": 1,
908
- "averageSessionStartTokenSavingsPercent": 63.17
909
- },
910
- "scenarios": [
911
- {
912
- "scenarioId": "docker-lane-hydration",
913
- "query": "what is docker strategy for development and production",
914
- "expectedObservationIds": [
915
- "obs-001"
916
- ],
917
- "indexObservationIds": [
918
- "obs-001",
919
- "obs-005",
920
- "obs-003",
921
- "obs-002",
922
- "obs-004"
923
- ],
924
- "hydratedObservationIds": [
925
- "obs-001"
926
- ],
927
- "relevantRecall": 1,
928
- "fullContextTokenEstimate": 267,
929
- "sessionStartTokenEstimate": 103,
930
- "sessionStartTokenSavingsPercent": 61.42
931
- },
932
- {
933
- "scenarioId": "runtime-hydration",
934
- "query": "which runtime target should we prefer on windows with wsl",
935
- "expectedObservationIds": [
936
- "obs-002"
937
- ],
938
- "indexObservationIds": [
939
- "obs-002",
940
- "obs-001",
941
- "obs-005",
942
- "obs-004",
943
- "obs-003"
944
- ],
945
- "hydratedObservationIds": [
946
- "obs-002"
947
- ],
948
- "relevantRecall": 1,
949
- "fullContextTokenEstimate": 267,
950
- "sessionStartTokenEstimate": 97,
951
- "sessionStartTokenSavingsPercent": 63.67
952
- },
953
- {
954
- "scenarioId": "frontend-quality-hydration",
955
- "query": "show frontend rubric quality decisions",
956
- "expectedObservationIds": [
957
- "obs-003"
958
- ],
959
- "indexObservationIds": [
960
- "obs-003",
961
- "obs-005",
962
- "obs-004",
963
- "obs-002",
964
- "obs-001"
965
- ],
966
- "hydratedObservationIds": [
967
- "obs-003"
968
- ],
969
- "relevantRecall": 1,
970
- "fullContextTokenEstimate": 267,
971
- "sessionStartTokenEstimate": 95,
972
- "sessionStartTokenSavingsPercent": 64.42
973
- }
974
- ],
975
- "references": {
976
- "memorySchemaPath": ".agent-context/state/memory-schema-v1.json",
977
- "memoryAdapterContractPath": ".agent-context/state/memory-adapter-contract.json",
978
- "benchmarkOutputPath": ".agent-context/state/memory-continuity-benchmark.json",
979
- "schemaDeclaredVersion": "1.0.0",
980
- "adapterContractVersion": "1.0.0"
981
- },
982
- "checks": [
983
- {
984
- "checkName": "adapter-coverage",
985
- "passed": true,
986
- "details": "required=3 missing=0"
987
- },
988
- {
989
- "checkName": "continuity-recall-threshold",
990
- "passed": true,
991
- "details": "averageRelevantRecall=1 minimum=0.8"
992
- },
993
- {
994
- "checkName": "session-start-token-savings-threshold",
995
- "passed": true,
996
- "details": "averageSessionStartTokenSavingsPercent=63.17 minimum=35"
997
- },
998
- {
999
- "checkName": "privacy-redaction-safety",
1000
- "passed": true,
1001
- "details": "unsafeObservationCount=0 max=0"
1002
- }
1003
- ]
1004
- }
1005
- },
1006
- "executions": [
1007
- {
1008
- "scriptPath": "scripts/detection-benchmark.mjs",
1009
- "exitCode": 0,
1010
- "parseError": null,
1011
- "stderr": null,
1012
- "reportName": null,
1013
- "passed": null
1014
- },
1015
- {
1016
- "scriptPath": "scripts/benchmark-gate.mjs",
1017
- "exitCode": 0,
1018
- "parseError": null,
1019
- "stderr": null,
1020
- "reportName": "benchmark-gate",
1021
- "passed": true
1022
- },
1023
- {
1024
- "scriptPath": "scripts/benchmark-intelligence.mjs",
1025
- "exitCode": 0,
1026
- "parseError": null,
1027
- "stderr": null,
1028
- "reportName": "benchmark-intelligence",
1029
- "passed": true
1030
- },
1031
- {
1032
- "scriptPath": "scripts/memory-continuity-benchmark.mjs",
1033
- "exitCode": 0,
1034
- "parseError": null,
1035
- "stderr": null,
1036
- "reportName": "memory-continuity-benchmark",
1037
- "passed": true
1038
- }
1039
- ]
1040
- }