@ryuenn3123/agentic-senior-core 2.0.26 → 2.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  {
2
- "generatedAt": "2026-04-13T15:56:01.200Z",
2
+ "generatedAt": "2026-04-17T03:20:15.400Z",
3
3
  "reportName": "benchmark-evidence-bundle",
4
- "phase": "v2.5.1",
4
+ "phase": "v2.5.2",
5
+ "releaseVersion": "2.0.26",
5
6
  "passed": true,
6
7
  "failureCount": 0,
7
8
  "methodology": {
@@ -13,20 +14,25 @@
13
14
  "shellNotes": "PowerShell and POSIX shells are supported; prefer portable commands for benchmark reruns."
14
15
  },
15
16
  "scenarioCount": 4,
16
- "commandCount": 5
17
+ "commandCount": 8
17
18
  },
18
19
  "rerunInstructions": [
19
20
  "Run npm run benchmark:detection to regenerate detection benchmark output.",
20
21
  "Run npm run benchmark:gate to validate benchmark anti-regression thresholds.",
21
22
  "Run npm run benchmark:intelligence to validate benchmark watchlist freshness.",
22
- "Run npm run benchmark:bundle to emit a reproducible benchmark evidence bundle."
23
+ "Run npm run benchmark:bundle to emit a reproducible benchmark evidence bundle.",
24
+ "Run npm run benchmark:writer-judge to emit writer-judge side-by-side matrix output.",
25
+ "Run npm run benchmark:continuity to validate cross-agent memory hydration, privacy redaction, and token-savings behavior."
23
26
  ],
24
27
  "commandExamples": [
25
28
  "npm run benchmark:detection",
26
29
  "npm run benchmark:gate",
27
30
  "npm run benchmark:intelligence",
28
31
  "npm run benchmark:bundle",
29
- "node ./scripts/benchmark-evidence-bundle.mjs --stdout-only"
32
+ "npm run benchmark:writer-judge",
33
+ "node ./scripts/benchmark-evidence-bundle.mjs --stdout-only",
34
+ "npm run benchmark:continuity",
35
+ "node ./scripts/memory-continuity-benchmark.mjs --stdout-only"
30
36
  ],
31
37
  "rawInputs": {
32
38
  "scenarios": [
@@ -103,19 +109,205 @@
103
109
  {
104
110
  "repository": "sickn33/antigravity-awesome-skills",
105
111
  "owner": "core-architecture",
106
- "lastReviewedAt": "2026-04-02"
112
+ "lastReviewedAt": "2026-04-17"
107
113
  },
108
114
  {
109
115
  "repository": "github/awesome-copilot",
110
116
  "owner": "core-architecture",
111
- "lastReviewedAt": "2026-04-02"
117
+ "lastReviewedAt": "2026-04-17"
112
118
  },
113
119
  {
114
120
  "repository": "MiniMax-AI/skills",
115
121
  "owner": "frontend-governance",
116
- "lastReviewedAt": "2026-04-02"
122
+ "lastReviewedAt": "2026-04-17"
117
123
  }
118
- ]
124
+ ],
125
+ "memorySchema": {
126
+ "schemaVersion": "1.0.0",
127
+ "schemaName": "cross-agent-memory-observation",
128
+ "description": "Provider-agnostic schema for persistent memory observations shared across coding agents and IDE hosts.",
129
+ "requiredFields": [
130
+ "id",
131
+ "projectId",
132
+ "sessionId",
133
+ "adapterId",
134
+ "eventType",
135
+ "timestamp",
136
+ "title",
137
+ "summary",
138
+ "detail",
139
+ "privacy"
140
+ ],
141
+ "fieldDefinitions": {
142
+ "id": {
143
+ "type": "string",
144
+ "description": "Stable unique observation identifier."
145
+ },
146
+ "projectId": {
147
+ "type": "string",
148
+ "description": "Repository or workspace identifier."
149
+ },
150
+ "sessionId": {
151
+ "type": "string",
152
+ "description": "Source session identifier from host adapter."
153
+ },
154
+ "adapterId": {
155
+ "type": "string",
156
+ "allowedValues": [
157
+ "claude-code",
158
+ "gemini-cli",
159
+ "vscode-chat",
160
+ "custom"
161
+ ],
162
+ "description": "Host adapter that captured this observation."
163
+ },
164
+ "eventType": {
165
+ "type": "string",
166
+ "allowedValues": [
167
+ "prompt",
168
+ "tool-use",
169
+ "decision",
170
+ "summary",
171
+ "issue",
172
+ "context"
173
+ ],
174
+ "description": "Observation type for retrieval filtering."
175
+ },
176
+ "timestamp": {
177
+ "type": "string",
178
+ "format": "date-time",
179
+ "description": "ISO timestamp when observation was captured."
180
+ },
181
+ "title": {
182
+ "type": "string",
183
+ "description": "Compact human-readable headline."
184
+ },
185
+ "summary": {
186
+ "type": "string",
187
+ "description": "Compact session-start payload used for progressive disclosure."
188
+ },
189
+ "detail": {
190
+ "type": "string",
191
+ "description": "Expanded observation text fetched on demand."
192
+ },
193
+ "tags": {
194
+ "type": "array",
195
+ "items": "string",
196
+ "description": "Optional normalized tags for query refinement."
197
+ },
198
+ "privacy": {
199
+ "type": "object",
200
+ "requiredFields": [
201
+ "level",
202
+ "redactionApplied"
203
+ ],
204
+ "fieldDefinitions": {
205
+ "level": {
206
+ "type": "string",
207
+ "allowedValues": [
208
+ "public",
209
+ "internal",
210
+ "restricted"
211
+ ],
212
+ "description": "Privacy classification level."
213
+ },
214
+ "redactionApplied": {
215
+ "type": "boolean",
216
+ "description": "Indicates whether privacy sanitization modified payload content."
217
+ },
218
+ "redactionReasons": {
219
+ "type": "array",
220
+ "items": "string",
221
+ "description": "Redaction reason tags such as private-tag or token-like-value."
222
+ }
223
+ }
224
+ }
225
+ },
226
+ "retrievalContract": {
227
+ "sessionStartPayload": [
228
+ "id",
229
+ "adapterId",
230
+ "eventType",
231
+ "timestamp",
232
+ "title",
233
+ "summary"
234
+ ],
235
+ "onDemandPayload": [
236
+ "detail",
237
+ "tags",
238
+ "privacy"
239
+ ],
240
+ "progressiveDisclosure": true
241
+ }
242
+ },
243
+ "memoryAdapterContract": {
244
+ "schemaVersion": "1.0.0",
245
+ "contractName": "cross-agent-memory-adapter",
246
+ "description": "Adapter contract for ingesting and retrieving shared memory observations across IDE hosts.",
247
+ "requiredAdapters": [
248
+ "claude-code",
249
+ "gemini-cli",
250
+ "vscode-chat"
251
+ ],
252
+ "requiredOperations": {
253
+ "ingestion": [
254
+ "captureObservation",
255
+ "captureSessionSummary"
256
+ ],
257
+ "retrieval": [
258
+ "searchIndex",
259
+ "getTimeline",
260
+ "getObservations"
261
+ ],
262
+ "privacy": [
263
+ "applyPrivateTagRedaction",
264
+ "applyInlineSecretRedaction"
265
+ ]
266
+ },
267
+ "adapters": [
268
+ {
269
+ "adapterId": "claude-code",
270
+ "hostType": "plugin-hooks",
271
+ "status": "pilot-ready",
272
+ "ingestionEvents": [
273
+ "SessionStart",
274
+ "UserPromptSubmit",
275
+ "PostToolUse",
276
+ "Stop",
277
+ "SessionEnd"
278
+ ],
279
+ "retrievalMode": "mcp-tools"
280
+ },
281
+ {
282
+ "adapterId": "gemini-cli",
283
+ "hostType": "plugin-hooks",
284
+ "status": "pilot-ready",
285
+ "ingestionEvents": [
286
+ "session_start",
287
+ "prompt_submit",
288
+ "post_tool",
289
+ "session_end"
290
+ ],
291
+ "retrievalMode": "mcp-tools"
292
+ },
293
+ {
294
+ "adapterId": "vscode-chat",
295
+ "hostType": "chat-customization-plugin",
296
+ "status": "pilot-ready",
297
+ "ingestionEvents": [
298
+ "chatStart",
299
+ "promptSubmit",
300
+ "postToolUse",
301
+ "chatEnd"
302
+ ],
303
+ "retrievalMode": "mcp-tools"
304
+ }
305
+ ],
306
+ "notes": [
307
+ "Web chat hosts are explicitly out of scope for this pilot because local runtime hooks are unavailable.",
308
+ "Adapters should emit provider-agnostic payloads matching .agent-context/state/memory-schema-v1.json."
309
+ ]
310
+ }
119
311
  },
120
312
  "rubric": {
121
313
  "benchmarkThresholds": {
@@ -124,11 +316,329 @@
124
316
  "maximumTop1AccuracyDrop": 0.02,
125
317
  "maximumManualCorrectionIncrease": 0.03
126
318
  },
127
- "intelligenceSlaDays": 14
319
+ "intelligenceSlaDays": 14,
320
+ "reliabilityThresholds": {
321
+ "minimumConfidenceGap": 0.1,
322
+ "maximumLowConfidenceRate": 0.2,
323
+ "maximumIncorrectDetectionRate": 0.1
324
+ },
325
+ "continuityThresholds": {
326
+ "minimumRelevantRecall": 0.8,
327
+ "minimumSessionStartTokenSavingsPercent": 35,
328
+ "maximumUnsafeObservationCount": 0
329
+ }
330
+ },
331
+ "bugIndicators": {
332
+ "incorrectFixtureCount": 1,
333
+ "incorrectDetectionRate": 0.0833,
334
+ "manualCorrectionFixtureCount": 1,
335
+ "manualCorrectionRate": 0.0833,
336
+ "lowConfidenceFixtureCount": 1,
337
+ "lowConfidenceRate": 0.0833,
338
+ "flaggedFixtures": [
339
+ {
340
+ "fixtureName": "mixed-ts-python",
341
+ "confidenceGap": 0.02,
342
+ "detectedStack": "python.md",
343
+ "expectedStack": "typescript.md",
344
+ "isCorrect": false,
345
+ "needsManualCorrection": true
346
+ }
347
+ ]
348
+ },
349
+ "reliabilitySignals": {
350
+ "passed": true,
351
+ "failureCount": 0,
352
+ "riskLevel": "monitor",
353
+ "thresholds": {
354
+ "minimumConfidenceGap": 0.1,
355
+ "maximumLowConfidenceRate": 0.2,
356
+ "maximumIncorrectDetectionRate": 0.1
357
+ },
358
+ "metrics": {
359
+ "fixtureCount": 12,
360
+ "incorrectFixtureCount": 1,
361
+ "lowConfidenceFixtureCount": 1,
362
+ "manualCorrectionFixtureCount": 1,
363
+ "incorrectDetectionRate": 0.0833,
364
+ "lowConfidenceRate": 0.0833,
365
+ "manualCorrectionRate": 0.0833
366
+ },
367
+ "checks": [
368
+ {
369
+ "checkName": "incorrect-detection-rate",
370
+ "passed": true,
371
+ "details": "incorrectRate=0.0833 max=0.1"
372
+ },
373
+ {
374
+ "checkName": "low-confidence-rate",
375
+ "passed": true,
376
+ "details": "lowConfidenceRate=0.0833 max=0.2"
377
+ },
378
+ {
379
+ "checkName": "manual-correction-early-warning",
380
+ "passed": true,
381
+ "details": "manualCorrectionRate=0.0833 warningThreshold=0.12"
382
+ }
383
+ ],
384
+ "flaggedFixtures": [
385
+ {
386
+ "fixtureName": "mixed-ts-python",
387
+ "confidenceGap": 0.02,
388
+ "detectedStack": "python.md",
389
+ "expectedStack": "typescript.md",
390
+ "isCorrect": false,
391
+ "needsManualCorrection": true
392
+ }
393
+ ]
394
+ },
395
+ "securityIndicators": {
396
+ "forbiddenContent": {
397
+ "checkName": "forbidden-content-scan",
398
+ "passed": true,
399
+ "exitCode": 0,
400
+ "details": "No forbidden content detected"
401
+ },
402
+ "vulnerabilityScan": {
403
+ "checkName": "npm-audit-indicator",
404
+ "isAvailable": false,
405
+ "hasKnownVulnerabilities": null,
406
+ "severityCounts": null,
407
+ "exitCode": 1,
408
+ "error": "Payload is empty"
409
+ }
410
+ },
411
+ "releaseDelta": {
412
+ "currentReleaseVersion": "2.0.26",
413
+ "previousReleaseVersion": "2.0.26",
414
+ "comparedSnapshot": {
415
+ "currentGeneratedAt": "2026-04-17T03:20:15.400Z",
416
+ "previousGeneratedAt": "2026-04-17T03:19:31.047Z"
417
+ },
418
+ "top1AccuracyDelta": 0,
419
+ "manualCorrectionRateDelta": 0,
420
+ "staleWatchlistCountDelta": 0,
421
+ "vulnerabilityTotalDelta": 0,
422
+ "summary": [
423
+ "top1Accuracy: +0",
424
+ "manualCorrectionRate: +0",
425
+ "staleWatchlistCount: +0",
426
+ "vulnerabilityTotal: +0"
427
+ ]
428
+ },
429
+ "history": [
430
+ {
431
+ "generatedAt": "2026-04-17T02:54:01.239Z",
432
+ "releaseVersion": "2.0.26",
433
+ "fixtureCount": 12,
434
+ "top1Accuracy": 0.9167,
435
+ "manualCorrectionRate": 0.0833,
436
+ "benchmarkGatePassed": true,
437
+ "intelligencePassed": true,
438
+ "staleWatchlistCount": 0,
439
+ "reliabilityPassed": true,
440
+ "reliabilityRiskLevel": "monitor",
441
+ "incorrectDetectionRate": 0.0833,
442
+ "lowConfidenceRate": 0.0833,
443
+ "vulnerabilityTotal": null,
444
+ "criticalVulnerabilityCount": null,
445
+ "forbiddenContentPassed": true
446
+ },
447
+ {
448
+ "generatedAt": "2026-04-17T02:54:57.419Z",
449
+ "releaseVersion": "2.0.26",
450
+ "fixtureCount": 12,
451
+ "top1Accuracy": 0.9167,
452
+ "manualCorrectionRate": 0.0833,
453
+ "benchmarkGatePassed": true,
454
+ "intelligencePassed": true,
455
+ "staleWatchlistCount": 0,
456
+ "reliabilityPassed": true,
457
+ "reliabilityRiskLevel": "monitor",
458
+ "incorrectDetectionRate": 0.0833,
459
+ "lowConfidenceRate": 0.0833,
460
+ "vulnerabilityTotal": null,
461
+ "criticalVulnerabilityCount": null,
462
+ "forbiddenContentPassed": true
463
+ },
464
+ {
465
+ "generatedAt": "2026-04-17T03:19:31.047Z",
466
+ "releaseVersion": "2.0.26",
467
+ "fixtureCount": 12,
468
+ "top1Accuracy": 0.9167,
469
+ "manualCorrectionRate": 0.0833,
470
+ "benchmarkGatePassed": true,
471
+ "intelligencePassed": true,
472
+ "staleWatchlistCount": 0,
473
+ "reliabilityPassed": true,
474
+ "reliabilityRiskLevel": "monitor",
475
+ "incorrectDetectionRate": 0.0833,
476
+ "lowConfidenceRate": 0.0833,
477
+ "vulnerabilityTotal": null,
478
+ "criticalVulnerabilityCount": null,
479
+ "forbiddenContentPassed": true
480
+ },
481
+ {
482
+ "generatedAt": "2026-04-17T03:20:15.400Z",
483
+ "releaseVersion": "2.0.26",
484
+ "fixtureCount": 12,
485
+ "top1Accuracy": 0.9167,
486
+ "manualCorrectionRate": 0.0833,
487
+ "benchmarkGatePassed": true,
488
+ "intelligencePassed": true,
489
+ "staleWatchlistCount": 0,
490
+ "reliabilityPassed": true,
491
+ "reliabilityRiskLevel": "monitor",
492
+ "incorrectDetectionRate": 0.0833,
493
+ "lowConfidenceRate": 0.0833,
494
+ "vulnerabilityTotal": null,
495
+ "criticalVulnerabilityCount": null,
496
+ "forbiddenContentPassed": true
497
+ }
498
+ ],
499
+ "trendReport": {
500
+ "generatedAt": "2026-04-17T03:20:15.400Z",
501
+ "reportName": "benchmark-trend-report",
502
+ "releaseVersion": "2.0.26",
503
+ "historyCount": 4,
504
+ "releaseDelta": {
505
+ "currentReleaseVersion": "2.0.26",
506
+ "previousReleaseVersion": "2.0.26",
507
+ "comparedSnapshot": {
508
+ "currentGeneratedAt": "2026-04-17T03:20:15.400Z",
509
+ "previousGeneratedAt": "2026-04-17T03:19:31.047Z"
510
+ },
511
+ "top1AccuracyDelta": 0,
512
+ "manualCorrectionRateDelta": 0,
513
+ "staleWatchlistCountDelta": 0,
514
+ "vulnerabilityTotalDelta": 0,
515
+ "summary": [
516
+ "top1Accuracy: +0",
517
+ "manualCorrectionRate: +0",
518
+ "staleWatchlistCount: +0",
519
+ "vulnerabilityTotal: +0"
520
+ ]
521
+ },
522
+ "trendTable": [
523
+ {
524
+ "snapshotIndex": 1,
525
+ "generatedAt": "2026-04-17T02:54:01.239Z",
526
+ "releaseVersion": "2.0.26",
527
+ "top1Accuracy": 0.9167,
528
+ "manualCorrectionRate": 0.0833,
529
+ "incorrectDetectionRate": 0.0833,
530
+ "lowConfidenceRate": 0.0833,
531
+ "staleWatchlistCount": 0,
532
+ "vulnerabilityTotal": null,
533
+ "criticalVulnerabilityCount": null,
534
+ "benchmarkGatePassed": true,
535
+ "intelligencePassed": true,
536
+ "reliabilityPassed": true,
537
+ "reliabilityRiskLevel": "monitor"
538
+ },
539
+ {
540
+ "snapshotIndex": 2,
541
+ "generatedAt": "2026-04-17T02:54:57.419Z",
542
+ "releaseVersion": "2.0.26",
543
+ "top1Accuracy": 0.9167,
544
+ "manualCorrectionRate": 0.0833,
545
+ "incorrectDetectionRate": 0.0833,
546
+ "lowConfidenceRate": 0.0833,
547
+ "staleWatchlistCount": 0,
548
+ "vulnerabilityTotal": null,
549
+ "criticalVulnerabilityCount": null,
550
+ "benchmarkGatePassed": true,
551
+ "intelligencePassed": true,
552
+ "reliabilityPassed": true,
553
+ "reliabilityRiskLevel": "monitor"
554
+ },
555
+ {
556
+ "snapshotIndex": 3,
557
+ "generatedAt": "2026-04-17T03:19:31.047Z",
558
+ "releaseVersion": "2.0.26",
559
+ "top1Accuracy": 0.9167,
560
+ "manualCorrectionRate": 0.0833,
561
+ "incorrectDetectionRate": 0.0833,
562
+ "lowConfidenceRate": 0.0833,
563
+ "staleWatchlistCount": 0,
564
+ "vulnerabilityTotal": null,
565
+ "criticalVulnerabilityCount": null,
566
+ "benchmarkGatePassed": true,
567
+ "intelligencePassed": true,
568
+ "reliabilityPassed": true,
569
+ "reliabilityRiskLevel": "monitor"
570
+ },
571
+ {
572
+ "snapshotIndex": 4,
573
+ "generatedAt": "2026-04-17T03:20:15.400Z",
574
+ "releaseVersion": "2.0.26",
575
+ "top1Accuracy": 0.9167,
576
+ "manualCorrectionRate": 0.0833,
577
+ "incorrectDetectionRate": 0.0833,
578
+ "lowConfidenceRate": 0.0833,
579
+ "staleWatchlistCount": 0,
580
+ "vulnerabilityTotal": null,
581
+ "criticalVulnerabilityCount": null,
582
+ "benchmarkGatePassed": true,
583
+ "intelligencePassed": true,
584
+ "reliabilityPassed": true,
585
+ "reliabilityRiskLevel": "monitor"
586
+ }
587
+ ],
588
+ "chartSeries": {
589
+ "generatedAt": [
590
+ "2026-04-17T02:54:01.239Z",
591
+ "2026-04-17T02:54:57.419Z",
592
+ "2026-04-17T03:19:31.047Z",
593
+ "2026-04-17T03:20:15.400Z"
594
+ ],
595
+ "top1Accuracy": [
596
+ 0.9167,
597
+ 0.9167,
598
+ 0.9167,
599
+ 0.9167
600
+ ],
601
+ "manualCorrectionRate": [
602
+ 0.0833,
603
+ 0.0833,
604
+ 0.0833,
605
+ 0.0833
606
+ ],
607
+ "incorrectDetectionRate": [
608
+ 0.0833,
609
+ 0.0833,
610
+ 0.0833,
611
+ 0.0833
612
+ ],
613
+ "lowConfidenceRate": [
614
+ 0.0833,
615
+ 0.0833,
616
+ 0.0833,
617
+ 0.0833
618
+ ],
619
+ "staleWatchlistCount": [
620
+ 0,
621
+ 0,
622
+ 0,
623
+ 0
624
+ ],
625
+ "vulnerabilityTotal": [
626
+ null,
627
+ null,
628
+ null,
629
+ null
630
+ ]
631
+ },
632
+ "artifacts": {
633
+ "historyPath": ".agent-context/state/benchmark-history.json",
634
+ "jsonPath": ".agent-context/state/benchmark-trend-report.json",
635
+ "csvPath": ".agent-context/state/benchmark-trend-report.csv",
636
+ "writeMode": "stdout-and-file"
637
+ }
128
638
  },
129
639
  "outputs": {
130
640
  "detectionBenchmark": {
131
- "generatedAt": "2026-04-13T15:56:01.040Z",
641
+ "generatedAt": "2026-04-17T03:20:15.113Z",
132
642
  "fixtureCount": 12,
133
643
  "top1Accuracy": 0.9167,
134
644
  "manualCorrectionRate": 0.0833,
@@ -232,7 +742,7 @@
232
742
  ]
233
743
  },
234
744
  "benchmarkGate": {
235
- "generatedAt": "2026-04-13T15:56:01.144Z",
745
+ "generatedAt": "2026-04-17T03:20:15.211Z",
236
746
  "gateName": "benchmark-gate",
237
747
  "passed": true,
238
748
  "failureCount": 0,
@@ -275,7 +785,7 @@
275
785
  ]
276
786
  },
277
787
  "benchmarkIntelligence": {
278
- "generatedAt": "2026-04-13T15:56:01.192Z",
788
+ "generatedAt": "2026-04-17T03:20:15.258Z",
279
789
  "reportName": "benchmark-intelligence",
280
790
  "passed": true,
281
791
  "failureCount": 0,
@@ -284,22 +794,22 @@
284
794
  {
285
795
  "repository": "sickn33/antigravity-awesome-skills",
286
796
  "owner": "core-architecture",
287
- "lastReviewedAt": "2026-04-02",
288
- "ageInDays": 11,
797
+ "lastReviewedAt": "2026-04-17",
798
+ "ageInDays": 0,
289
799
  "stale": false
290
800
  },
291
801
  {
292
802
  "repository": "github/awesome-copilot",
293
803
  "owner": "core-architecture",
294
- "lastReviewedAt": "2026-04-02",
295
- "ageInDays": 11,
804
+ "lastReviewedAt": "2026-04-17",
805
+ "ageInDays": 0,
296
806
  "stale": false
297
807
  },
298
808
  {
299
809
  "repository": "MiniMax-AI/skills",
300
810
  "owner": "frontend-governance",
301
- "lastReviewedAt": "2026-04-02",
302
- "ageInDays": 11,
811
+ "lastReviewedAt": "2026-04-17",
812
+ "ageInDays": 0,
303
813
  "stale": false
304
814
  }
305
815
  ],
@@ -332,7 +842,7 @@
332
842
  "checkName": "review-sla-compliance",
333
843
  "repository": "sickn33/antigravity-awesome-skills",
334
844
  "passed": true,
335
- "details": "ageInDays=11 slaDays=14"
845
+ "details": "ageInDays=0 slaDays=14"
336
846
  },
337
847
  {
338
848
  "checkName": "watchlist-owner-defined",
@@ -344,7 +854,7 @@
344
854
  "checkName": "review-sla-compliance",
345
855
  "repository": "github/awesome-copilot",
346
856
  "passed": true,
347
- "details": "ageInDays=11 slaDays=14"
857
+ "details": "ageInDays=0 slaDays=14"
348
858
  },
349
859
  {
350
860
  "checkName": "watchlist-owner-defined",
@@ -356,7 +866,139 @@
356
866
  "checkName": "review-sla-compliance",
357
867
  "repository": "MiniMax-AI/skills",
358
868
  "passed": true,
359
- "details": "ageInDays=11 slaDays=14"
869
+ "details": "ageInDays=0 slaDays=14"
870
+ }
871
+ ]
872
+ },
873
+ "memoryContinuityBenchmark": {
874
+ "generatedAt": "2026-04-17T03:20:15.324Z",
875
+ "reportName": "memory-continuity-benchmark",
876
+ "schemaVersion": "1.0.0",
877
+ "passed": true,
878
+ "failureCount": 0,
879
+ "thresholds": {
880
+ "minimumRelevantRecall": 0.8,
881
+ "minimumSessionStartTokenSavingsPercent": 35,
882
+ "maximumUnsafeObservationCount": 0
883
+ },
884
+ "adapterCoverage": {
885
+ "requiredAdapterIds": [
886
+ "claude-code",
887
+ "gemini-cli",
888
+ "vscode-chat"
889
+ ],
890
+ "availableAdapterIds": [
891
+ "claude-code",
892
+ "gemini-cli",
893
+ "vscode-chat"
894
+ ],
895
+ "missingAdapterIds": [],
896
+ "passed": true
897
+ },
898
+ "privacyControls": {
899
+ "redactedObservationCount": 2,
900
+ "privateTagRedactionCount": 1,
901
+ "inlineRedactionCount": 1,
902
+ "unsafeObservationCount": 0
903
+ },
904
+ "continuitySummary": {
905
+ "totalObservationCount": 5,
906
+ "scenarioCount": 3,
907
+ "averageRelevantRecall": 1,
908
+ "averageSessionStartTokenSavingsPercent": 63.17
909
+ },
910
+ "scenarios": [
911
+ {
912
+ "scenarioId": "docker-lane-hydration",
913
+ "query": "what is docker strategy for development and production",
914
+ "expectedObservationIds": [
915
+ "obs-001"
916
+ ],
917
+ "indexObservationIds": [
918
+ "obs-001",
919
+ "obs-005",
920
+ "obs-003",
921
+ "obs-002",
922
+ "obs-004"
923
+ ],
924
+ "hydratedObservationIds": [
925
+ "obs-001"
926
+ ],
927
+ "relevantRecall": 1,
928
+ "fullContextTokenEstimate": 267,
929
+ "sessionStartTokenEstimate": 103,
930
+ "sessionStartTokenSavingsPercent": 61.42
931
+ },
932
+ {
933
+ "scenarioId": "runtime-hydration",
934
+ "query": "which runtime target should we prefer on windows with wsl",
935
+ "expectedObservationIds": [
936
+ "obs-002"
937
+ ],
938
+ "indexObservationIds": [
939
+ "obs-002",
940
+ "obs-001",
941
+ "obs-005",
942
+ "obs-004",
943
+ "obs-003"
944
+ ],
945
+ "hydratedObservationIds": [
946
+ "obs-002"
947
+ ],
948
+ "relevantRecall": 1,
949
+ "fullContextTokenEstimate": 267,
950
+ "sessionStartTokenEstimate": 97,
951
+ "sessionStartTokenSavingsPercent": 63.67
952
+ },
953
+ {
954
+ "scenarioId": "frontend-quality-hydration",
955
+ "query": "show frontend rubric quality decisions",
956
+ "expectedObservationIds": [
957
+ "obs-003"
958
+ ],
959
+ "indexObservationIds": [
960
+ "obs-003",
961
+ "obs-005",
962
+ "obs-004",
963
+ "obs-002",
964
+ "obs-001"
965
+ ],
966
+ "hydratedObservationIds": [
967
+ "obs-003"
968
+ ],
969
+ "relevantRecall": 1,
970
+ "fullContextTokenEstimate": 267,
971
+ "sessionStartTokenEstimate": 95,
972
+ "sessionStartTokenSavingsPercent": 64.42
973
+ }
974
+ ],
975
+ "references": {
976
+ "memorySchemaPath": ".agent-context/state/memory-schema-v1.json",
977
+ "memoryAdapterContractPath": ".agent-context/state/memory-adapter-contract.json",
978
+ "benchmarkOutputPath": ".agent-context/state/memory-continuity-benchmark.json",
979
+ "schemaDeclaredVersion": "1.0.0",
980
+ "adapterContractVersion": "1.0.0"
981
+ },
982
+ "checks": [
983
+ {
984
+ "checkName": "adapter-coverage",
985
+ "passed": true,
986
+ "details": "required=3 missing=0"
987
+ },
988
+ {
989
+ "checkName": "continuity-recall-threshold",
990
+ "passed": true,
991
+ "details": "averageRelevantRecall=1 minimum=0.8"
992
+ },
993
+ {
994
+ "checkName": "session-start-token-savings-threshold",
995
+ "passed": true,
996
+ "details": "averageSessionStartTokenSavingsPercent=63.17 minimum=35"
997
+ },
998
+ {
999
+ "checkName": "privacy-redaction-safety",
1000
+ "passed": true,
1001
+ "details": "unsafeObservationCount=0 max=0"
360
1002
  }
361
1003
  ]
362
1004
  }
@@ -385,6 +1027,14 @@
385
1027
  "stderr": null,
386
1028
  "reportName": "benchmark-intelligence",
387
1029
  "passed": true
1030
+ },
1031
+ {
1032
+ "scriptPath": "scripts/memory-continuity-benchmark.mjs",
1033
+ "exitCode": 0,
1034
+ "parseError": null,
1035
+ "stderr": null,
1036
+ "reportName": "memory-continuity-benchmark",
1037
+ "passed": true
388
1038
  }
389
1039
  ]
390
1040
  }