@ryuenn3123/agentic-senior-core 2.0.25 → 2.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/review-checklists/frontend-excellence-rubric.md +54 -0
- package/.agent-context/review-checklists/frontend-skill-parity.md +1 -0
- package/.agent-context/review-checklists/frontend-usability.md +1 -0
- package/.agent-context/rules/docker-runtime.md +29 -0
- package/.agent-context/skills/frontend/README.md +1 -0
- package/.agent-context/skills/frontend.md +4 -0
- package/.agent-context/state/benchmark-evidence-bundle.json +672 -22
- package/.agent-context/state/benchmark-history.json +75 -0
- package/.agent-context/state/benchmark-trend-report.csv +5 -0
- package/.agent-context/state/benchmark-trend-report.json +140 -0
- package/.agent-context/state/benchmark-watchlist.json +3 -3
- package/.agent-context/state/memory-adapter-contract.json +52 -0
- package/.agent-context/state/memory-continuity-benchmark.json +132 -0
- package/.agent-context/state/memory-schema-v1.json +88 -0
- package/.cursorrules +1 -1
- package/.windsurfrules +1 -1
- package/README.md +29 -0
- package/lib/cli/commands/init.mjs +358 -16
- package/lib/cli/commands/optimize.mjs +12 -0
- package/lib/cli/commands/upgrade.mjs +30 -1
- package/lib/cli/compiler.mjs +55 -1
- package/lib/cli/constants.mjs +83 -0
- package/lib/cli/detector.mjs +11 -1
- package/lib/cli/memory-continuity.mjs +266 -0
- package/lib/cli/project-scaffolder.mjs +174 -1
- package/lib/cli/skill-selector.mjs +60 -38
- package/lib/cli/templates/architecture-decision-record.md.tmpl +39 -0
- package/lib/cli/templates/flow-overview.md.tmpl +12 -0
- package/lib/cli/templates/project-brief.md.id.tmpl +2 -0
- package/lib/cli/templates/project-brief.md.tmpl +26 -0
- package/lib/cli/utils.mjs +2 -1
- package/package.json +2 -1
- package/scripts/benchmark-evidence-bundle.mjs +493 -16
- package/scripts/frontend-usability-audit.mjs +21 -0
- package/scripts/memory-continuity-benchmark.mjs +322 -0
- package/scripts/release-gate.mjs +30 -0
- package/scripts/validate.mjs +5 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
3
|
+
"reportName": "benchmark-history",
|
|
4
|
+
"maxEntries": 90,
|
|
5
|
+
"history": [
|
|
6
|
+
{
|
|
7
|
+
"generatedAt": "2026-04-17T02:54:01.239Z",
|
|
8
|
+
"releaseVersion": "2.0.26",
|
|
9
|
+
"fixtureCount": 12,
|
|
10
|
+
"top1Accuracy": 0.9167,
|
|
11
|
+
"manualCorrectionRate": 0.0833,
|
|
12
|
+
"benchmarkGatePassed": true,
|
|
13
|
+
"intelligencePassed": true,
|
|
14
|
+
"staleWatchlistCount": 0,
|
|
15
|
+
"reliabilityPassed": true,
|
|
16
|
+
"reliabilityRiskLevel": "monitor",
|
|
17
|
+
"incorrectDetectionRate": 0.0833,
|
|
18
|
+
"lowConfidenceRate": 0.0833,
|
|
19
|
+
"vulnerabilityTotal": null,
|
|
20
|
+
"criticalVulnerabilityCount": null,
|
|
21
|
+
"forbiddenContentPassed": true
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"generatedAt": "2026-04-17T02:54:57.419Z",
|
|
25
|
+
"releaseVersion": "2.0.26",
|
|
26
|
+
"fixtureCount": 12,
|
|
27
|
+
"top1Accuracy": 0.9167,
|
|
28
|
+
"manualCorrectionRate": 0.0833,
|
|
29
|
+
"benchmarkGatePassed": true,
|
|
30
|
+
"intelligencePassed": true,
|
|
31
|
+
"staleWatchlistCount": 0,
|
|
32
|
+
"reliabilityPassed": true,
|
|
33
|
+
"reliabilityRiskLevel": "monitor",
|
|
34
|
+
"incorrectDetectionRate": 0.0833,
|
|
35
|
+
"lowConfidenceRate": 0.0833,
|
|
36
|
+
"vulnerabilityTotal": null,
|
|
37
|
+
"criticalVulnerabilityCount": null,
|
|
38
|
+
"forbiddenContentPassed": true
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"generatedAt": "2026-04-17T03:19:31.047Z",
|
|
42
|
+
"releaseVersion": "2.0.26",
|
|
43
|
+
"fixtureCount": 12,
|
|
44
|
+
"top1Accuracy": 0.9167,
|
|
45
|
+
"manualCorrectionRate": 0.0833,
|
|
46
|
+
"benchmarkGatePassed": true,
|
|
47
|
+
"intelligencePassed": true,
|
|
48
|
+
"staleWatchlistCount": 0,
|
|
49
|
+
"reliabilityPassed": true,
|
|
50
|
+
"reliabilityRiskLevel": "monitor",
|
|
51
|
+
"incorrectDetectionRate": 0.0833,
|
|
52
|
+
"lowConfidenceRate": 0.0833,
|
|
53
|
+
"vulnerabilityTotal": null,
|
|
54
|
+
"criticalVulnerabilityCount": null,
|
|
55
|
+
"forbiddenContentPassed": true
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
59
|
+
"releaseVersion": "2.0.26",
|
|
60
|
+
"fixtureCount": 12,
|
|
61
|
+
"top1Accuracy": 0.9167,
|
|
62
|
+
"manualCorrectionRate": 0.0833,
|
|
63
|
+
"benchmarkGatePassed": true,
|
|
64
|
+
"intelligencePassed": true,
|
|
65
|
+
"staleWatchlistCount": 0,
|
|
66
|
+
"reliabilityPassed": true,
|
|
67
|
+
"reliabilityRiskLevel": "monitor",
|
|
68
|
+
"incorrectDetectionRate": 0.0833,
|
|
69
|
+
"lowConfidenceRate": 0.0833,
|
|
70
|
+
"vulnerabilityTotal": null,
|
|
71
|
+
"criticalVulnerabilityCount": null,
|
|
72
|
+
"forbiddenContentPassed": true
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
snapshotIndex,generatedAt,releaseVersion,top1Accuracy,manualCorrectionRate,incorrectDetectionRate,lowConfidenceRate,staleWatchlistCount,vulnerabilityTotal,criticalVulnerabilityCount,benchmarkGatePassed,intelligencePassed,reliabilityPassed,reliabilityRiskLevel
|
|
2
|
+
"1","2026-04-17T02:54:01.239Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
3
|
+
"2","2026-04-17T02:54:57.419Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
4
|
+
"3","2026-04-17T03:19:31.047Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
5
|
+
"4","2026-04-17T03:20:15.400Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
3
|
+
"reportName": "benchmark-trend-report",
|
|
4
|
+
"releaseVersion": "2.0.26",
|
|
5
|
+
"historyCount": 4,
|
|
6
|
+
"releaseDelta": {
|
|
7
|
+
"currentReleaseVersion": "2.0.26",
|
|
8
|
+
"previousReleaseVersion": "2.0.26",
|
|
9
|
+
"comparedSnapshot": {
|
|
10
|
+
"currentGeneratedAt": "2026-04-17T03:20:15.400Z",
|
|
11
|
+
"previousGeneratedAt": "2026-04-17T03:19:31.047Z"
|
|
12
|
+
},
|
|
13
|
+
"top1AccuracyDelta": 0,
|
|
14
|
+
"manualCorrectionRateDelta": 0,
|
|
15
|
+
"staleWatchlistCountDelta": 0,
|
|
16
|
+
"vulnerabilityTotalDelta": 0,
|
|
17
|
+
"summary": [
|
|
18
|
+
"top1Accuracy: +0",
|
|
19
|
+
"manualCorrectionRate: +0",
|
|
20
|
+
"staleWatchlistCount: +0",
|
|
21
|
+
"vulnerabilityTotal: +0"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
"trendTable": [
|
|
25
|
+
{
|
|
26
|
+
"snapshotIndex": 1,
|
|
27
|
+
"generatedAt": "2026-04-17T02:54:01.239Z",
|
|
28
|
+
"releaseVersion": "2.0.26",
|
|
29
|
+
"top1Accuracy": 0.9167,
|
|
30
|
+
"manualCorrectionRate": 0.0833,
|
|
31
|
+
"incorrectDetectionRate": 0.0833,
|
|
32
|
+
"lowConfidenceRate": 0.0833,
|
|
33
|
+
"staleWatchlistCount": 0,
|
|
34
|
+
"vulnerabilityTotal": null,
|
|
35
|
+
"criticalVulnerabilityCount": null,
|
|
36
|
+
"benchmarkGatePassed": true,
|
|
37
|
+
"intelligencePassed": true,
|
|
38
|
+
"reliabilityPassed": true,
|
|
39
|
+
"reliabilityRiskLevel": "monitor"
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"snapshotIndex": 2,
|
|
43
|
+
"generatedAt": "2026-04-17T02:54:57.419Z",
|
|
44
|
+
"releaseVersion": "2.0.26",
|
|
45
|
+
"top1Accuracy": 0.9167,
|
|
46
|
+
"manualCorrectionRate": 0.0833,
|
|
47
|
+
"incorrectDetectionRate": 0.0833,
|
|
48
|
+
"lowConfidenceRate": 0.0833,
|
|
49
|
+
"staleWatchlistCount": 0,
|
|
50
|
+
"vulnerabilityTotal": null,
|
|
51
|
+
"criticalVulnerabilityCount": null,
|
|
52
|
+
"benchmarkGatePassed": true,
|
|
53
|
+
"intelligencePassed": true,
|
|
54
|
+
"reliabilityPassed": true,
|
|
55
|
+
"reliabilityRiskLevel": "monitor"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"snapshotIndex": 3,
|
|
59
|
+
"generatedAt": "2026-04-17T03:19:31.047Z",
|
|
60
|
+
"releaseVersion": "2.0.26",
|
|
61
|
+
"top1Accuracy": 0.9167,
|
|
62
|
+
"manualCorrectionRate": 0.0833,
|
|
63
|
+
"incorrectDetectionRate": 0.0833,
|
|
64
|
+
"lowConfidenceRate": 0.0833,
|
|
65
|
+
"staleWatchlistCount": 0,
|
|
66
|
+
"vulnerabilityTotal": null,
|
|
67
|
+
"criticalVulnerabilityCount": null,
|
|
68
|
+
"benchmarkGatePassed": true,
|
|
69
|
+
"intelligencePassed": true,
|
|
70
|
+
"reliabilityPassed": true,
|
|
71
|
+
"reliabilityRiskLevel": "monitor"
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"snapshotIndex": 4,
|
|
75
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
76
|
+
"releaseVersion": "2.0.26",
|
|
77
|
+
"top1Accuracy": 0.9167,
|
|
78
|
+
"manualCorrectionRate": 0.0833,
|
|
79
|
+
"incorrectDetectionRate": 0.0833,
|
|
80
|
+
"lowConfidenceRate": 0.0833,
|
|
81
|
+
"staleWatchlistCount": 0,
|
|
82
|
+
"vulnerabilityTotal": null,
|
|
83
|
+
"criticalVulnerabilityCount": null,
|
|
84
|
+
"benchmarkGatePassed": true,
|
|
85
|
+
"intelligencePassed": true,
|
|
86
|
+
"reliabilityPassed": true,
|
|
87
|
+
"reliabilityRiskLevel": "monitor"
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
"chartSeries": {
|
|
91
|
+
"generatedAt": [
|
|
92
|
+
"2026-04-17T02:54:01.239Z",
|
|
93
|
+
"2026-04-17T02:54:57.419Z",
|
|
94
|
+
"2026-04-17T03:19:31.047Z",
|
|
95
|
+
"2026-04-17T03:20:15.400Z"
|
|
96
|
+
],
|
|
97
|
+
"top1Accuracy": [
|
|
98
|
+
0.9167,
|
|
99
|
+
0.9167,
|
|
100
|
+
0.9167,
|
|
101
|
+
0.9167
|
|
102
|
+
],
|
|
103
|
+
"manualCorrectionRate": [
|
|
104
|
+
0.0833,
|
|
105
|
+
0.0833,
|
|
106
|
+
0.0833,
|
|
107
|
+
0.0833
|
|
108
|
+
],
|
|
109
|
+
"incorrectDetectionRate": [
|
|
110
|
+
0.0833,
|
|
111
|
+
0.0833,
|
|
112
|
+
0.0833,
|
|
113
|
+
0.0833
|
|
114
|
+
],
|
|
115
|
+
"lowConfidenceRate": [
|
|
116
|
+
0.0833,
|
|
117
|
+
0.0833,
|
|
118
|
+
0.0833,
|
|
119
|
+
0.0833
|
|
120
|
+
],
|
|
121
|
+
"staleWatchlistCount": [
|
|
122
|
+
0,
|
|
123
|
+
0,
|
|
124
|
+
0,
|
|
125
|
+
0
|
|
126
|
+
],
|
|
127
|
+
"vulnerabilityTotal": [
|
|
128
|
+
null,
|
|
129
|
+
null,
|
|
130
|
+
null,
|
|
131
|
+
null
|
|
132
|
+
]
|
|
133
|
+
},
|
|
134
|
+
"artifacts": {
|
|
135
|
+
"historyPath": ".agent-context/state/benchmark-history.json",
|
|
136
|
+
"jsonPath": ".agent-context/state/benchmark-trend-report.json",
|
|
137
|
+
"csvPath": ".agent-context/state/benchmark-trend-report.csv",
|
|
138
|
+
"writeMode": "stdout-and-file"
|
|
139
|
+
}
|
|
140
|
+
}
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
{
|
|
4
4
|
"repository": "sickn33/antigravity-awesome-skills",
|
|
5
5
|
"owner": "core-architecture",
|
|
6
|
-
"lastReviewedAt": "2026-04-
|
|
6
|
+
"lastReviewedAt": "2026-04-17"
|
|
7
7
|
},
|
|
8
8
|
{
|
|
9
9
|
"repository": "github/awesome-copilot",
|
|
10
10
|
"owner": "core-architecture",
|
|
11
|
-
"lastReviewedAt": "2026-04-
|
|
11
|
+
"lastReviewedAt": "2026-04-17"
|
|
12
12
|
},
|
|
13
13
|
{
|
|
14
14
|
"repository": "MiniMax-AI/skills",
|
|
15
15
|
"owner": "frontend-governance",
|
|
16
|
-
"lastReviewedAt": "2026-04-
|
|
16
|
+
"lastReviewedAt": "2026-04-17"
|
|
17
17
|
}
|
|
18
18
|
]
|
|
19
19
|
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": "1.0.0",
|
|
3
|
+
"contractName": "cross-agent-memory-adapter",
|
|
4
|
+
"description": "Adapter contract for ingesting and retrieving shared memory observations across IDE hosts.",
|
|
5
|
+
"requiredAdapters": [
|
|
6
|
+
"claude-code",
|
|
7
|
+
"gemini-cli",
|
|
8
|
+
"vscode-chat"
|
|
9
|
+
],
|
|
10
|
+
"requiredOperations": {
|
|
11
|
+
"ingestion": [
|
|
12
|
+
"captureObservation",
|
|
13
|
+
"captureSessionSummary"
|
|
14
|
+
],
|
|
15
|
+
"retrieval": [
|
|
16
|
+
"searchIndex",
|
|
17
|
+
"getTimeline",
|
|
18
|
+
"getObservations"
|
|
19
|
+
],
|
|
20
|
+
"privacy": [
|
|
21
|
+
"applyPrivateTagRedaction",
|
|
22
|
+
"applyInlineSecretRedaction"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"adapters": [
|
|
26
|
+
{
|
|
27
|
+
"adapterId": "claude-code",
|
|
28
|
+
"hostType": "plugin-hooks",
|
|
29
|
+
"status": "pilot-ready",
|
|
30
|
+
"ingestionEvents": ["SessionStart", "UserPromptSubmit", "PostToolUse", "Stop", "SessionEnd"],
|
|
31
|
+
"retrievalMode": "mcp-tools"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"adapterId": "gemini-cli",
|
|
35
|
+
"hostType": "plugin-hooks",
|
|
36
|
+
"status": "pilot-ready",
|
|
37
|
+
"ingestionEvents": ["session_start", "prompt_submit", "post_tool", "session_end"],
|
|
38
|
+
"retrievalMode": "mcp-tools"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"adapterId": "vscode-chat",
|
|
42
|
+
"hostType": "chat-customization-plugin",
|
|
43
|
+
"status": "pilot-ready",
|
|
44
|
+
"ingestionEvents": ["chatStart", "promptSubmit", "postToolUse", "chatEnd"],
|
|
45
|
+
"retrievalMode": "mcp-tools"
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
"notes": [
|
|
49
|
+
"Web chat hosts are explicitly out of scope for this pilot because local runtime hooks are unavailable.",
|
|
50
|
+
"Adapters should emit provider-agnostic payloads matching .agent-context/state/memory-schema-v1.json."
|
|
51
|
+
]
|
|
52
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-17T03:27:46.024Z",
|
|
3
|
+
"reportName": "memory-continuity-benchmark",
|
|
4
|
+
"schemaVersion": "1.0.0",
|
|
5
|
+
"passed": true,
|
|
6
|
+
"failureCount": 0,
|
|
7
|
+
"thresholds": {
|
|
8
|
+
"minimumRelevantRecall": 0.8,
|
|
9
|
+
"minimumSessionStartTokenSavingsPercent": 35,
|
|
10
|
+
"maximumUnsafeObservationCount": 0
|
|
11
|
+
},
|
|
12
|
+
"adapterCoverage": {
|
|
13
|
+
"requiredAdapterIds": [
|
|
14
|
+
"claude-code",
|
|
15
|
+
"gemini-cli",
|
|
16
|
+
"vscode-chat"
|
|
17
|
+
],
|
|
18
|
+
"availableAdapterIds": [
|
|
19
|
+
"claude-code",
|
|
20
|
+
"gemini-cli",
|
|
21
|
+
"vscode-chat"
|
|
22
|
+
],
|
|
23
|
+
"missingAdapterIds": [],
|
|
24
|
+
"passed": true
|
|
25
|
+
},
|
|
26
|
+
"privacyControls": {
|
|
27
|
+
"redactedObservationCount": 2,
|
|
28
|
+
"privateTagRedactionCount": 1,
|
|
29
|
+
"inlineRedactionCount": 1,
|
|
30
|
+
"unsafeObservationCount": 0
|
|
31
|
+
},
|
|
32
|
+
"continuitySummary": {
|
|
33
|
+
"totalObservationCount": 5,
|
|
34
|
+
"scenarioCount": 3,
|
|
35
|
+
"averageRelevantRecall": 1,
|
|
36
|
+
"averageSessionStartTokenSavingsPercent": 63.17
|
|
37
|
+
},
|
|
38
|
+
"scenarios": [
|
|
39
|
+
{
|
|
40
|
+
"scenarioId": "docker-lane-hydration",
|
|
41
|
+
"query": "what is docker strategy for development and production",
|
|
42
|
+
"expectedObservationIds": [
|
|
43
|
+
"obs-001"
|
|
44
|
+
],
|
|
45
|
+
"indexObservationIds": [
|
|
46
|
+
"obs-001",
|
|
47
|
+
"obs-005",
|
|
48
|
+
"obs-003",
|
|
49
|
+
"obs-002",
|
|
50
|
+
"obs-004"
|
|
51
|
+
],
|
|
52
|
+
"hydratedObservationIds": [
|
|
53
|
+
"obs-001"
|
|
54
|
+
],
|
|
55
|
+
"relevantRecall": 1,
|
|
56
|
+
"fullContextTokenEstimate": 267,
|
|
57
|
+
"sessionStartTokenEstimate": 103,
|
|
58
|
+
"sessionStartTokenSavingsPercent": 61.42
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"scenarioId": "runtime-hydration",
|
|
62
|
+
"query": "which runtime target should we prefer on windows with wsl",
|
|
63
|
+
"expectedObservationIds": [
|
|
64
|
+
"obs-002"
|
|
65
|
+
],
|
|
66
|
+
"indexObservationIds": [
|
|
67
|
+
"obs-002",
|
|
68
|
+
"obs-001",
|
|
69
|
+
"obs-005",
|
|
70
|
+
"obs-004",
|
|
71
|
+
"obs-003"
|
|
72
|
+
],
|
|
73
|
+
"hydratedObservationIds": [
|
|
74
|
+
"obs-002"
|
|
75
|
+
],
|
|
76
|
+
"relevantRecall": 1,
|
|
77
|
+
"fullContextTokenEstimate": 267,
|
|
78
|
+
"sessionStartTokenEstimate": 97,
|
|
79
|
+
"sessionStartTokenSavingsPercent": 63.67
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"scenarioId": "frontend-quality-hydration",
|
|
83
|
+
"query": "show frontend rubric quality decisions",
|
|
84
|
+
"expectedObservationIds": [
|
|
85
|
+
"obs-003"
|
|
86
|
+
],
|
|
87
|
+
"indexObservationIds": [
|
|
88
|
+
"obs-003",
|
|
89
|
+
"obs-005",
|
|
90
|
+
"obs-004",
|
|
91
|
+
"obs-002",
|
|
92
|
+
"obs-001"
|
|
93
|
+
],
|
|
94
|
+
"hydratedObservationIds": [
|
|
95
|
+
"obs-003"
|
|
96
|
+
],
|
|
97
|
+
"relevantRecall": 1,
|
|
98
|
+
"fullContextTokenEstimate": 267,
|
|
99
|
+
"sessionStartTokenEstimate": 95,
|
|
100
|
+
"sessionStartTokenSavingsPercent": 64.42
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
"references": {
|
|
104
|
+
"memorySchemaPath": ".agent-context/state/memory-schema-v1.json",
|
|
105
|
+
"memoryAdapterContractPath": ".agent-context/state/memory-adapter-contract.json",
|
|
106
|
+
"benchmarkOutputPath": ".agent-context/state/memory-continuity-benchmark.json",
|
|
107
|
+
"schemaDeclaredVersion": "1.0.0",
|
|
108
|
+
"adapterContractVersion": "1.0.0"
|
|
109
|
+
},
|
|
110
|
+
"checks": [
|
|
111
|
+
{
|
|
112
|
+
"checkName": "adapter-coverage",
|
|
113
|
+
"passed": true,
|
|
114
|
+
"details": "required=3 missing=0"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"checkName": "continuity-recall-threshold",
|
|
118
|
+
"passed": true,
|
|
119
|
+
"details": "averageRelevantRecall=1 minimum=0.8"
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"checkName": "session-start-token-savings-threshold",
|
|
123
|
+
"passed": true,
|
|
124
|
+
"details": "averageSessionStartTokenSavingsPercent=63.17 minimum=35"
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"checkName": "privacy-redaction-safety",
|
|
128
|
+
"passed": true,
|
|
129
|
+
"details": "unsafeObservationCount=0 max=0"
|
|
130
|
+
}
|
|
131
|
+
]
|
|
132
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": "1.0.0",
|
|
3
|
+
"schemaName": "cross-agent-memory-observation",
|
|
4
|
+
"description": "Provider-agnostic schema for persistent memory observations shared across coding agents and IDE hosts.",
|
|
5
|
+
"requiredFields": [
|
|
6
|
+
"id",
|
|
7
|
+
"projectId",
|
|
8
|
+
"sessionId",
|
|
9
|
+
"adapterId",
|
|
10
|
+
"eventType",
|
|
11
|
+
"timestamp",
|
|
12
|
+
"title",
|
|
13
|
+
"summary",
|
|
14
|
+
"detail",
|
|
15
|
+
"privacy"
|
|
16
|
+
],
|
|
17
|
+
"fieldDefinitions": {
|
|
18
|
+
"id": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Stable unique observation identifier."
|
|
21
|
+
},
|
|
22
|
+
"projectId": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "Repository or workspace identifier."
|
|
25
|
+
},
|
|
26
|
+
"sessionId": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "Source session identifier from host adapter."
|
|
29
|
+
},
|
|
30
|
+
"adapterId": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"allowedValues": ["claude-code", "gemini-cli", "vscode-chat", "custom"],
|
|
33
|
+
"description": "Host adapter that captured this observation."
|
|
34
|
+
},
|
|
35
|
+
"eventType": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"allowedValues": ["prompt", "tool-use", "decision", "summary", "issue", "context"],
|
|
38
|
+
"description": "Observation type for retrieval filtering."
|
|
39
|
+
},
|
|
40
|
+
"timestamp": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"format": "date-time",
|
|
43
|
+
"description": "ISO timestamp when observation was captured."
|
|
44
|
+
},
|
|
45
|
+
"title": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"description": "Compact human-readable headline."
|
|
48
|
+
},
|
|
49
|
+
"summary": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"description": "Compact session-start payload used for progressive disclosure."
|
|
52
|
+
},
|
|
53
|
+
"detail": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"description": "Expanded observation text fetched on demand."
|
|
56
|
+
},
|
|
57
|
+
"tags": {
|
|
58
|
+
"type": "array",
|
|
59
|
+
"items": "string",
|
|
60
|
+
"description": "Optional normalized tags for query refinement."
|
|
61
|
+
},
|
|
62
|
+
"privacy": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"requiredFields": ["level", "redactionApplied"],
|
|
65
|
+
"fieldDefinitions": {
|
|
66
|
+
"level": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"allowedValues": ["public", "internal", "restricted"],
|
|
69
|
+
"description": "Privacy classification level."
|
|
70
|
+
},
|
|
71
|
+
"redactionApplied": {
|
|
72
|
+
"type": "boolean",
|
|
73
|
+
"description": "Indicates whether privacy sanitization modified payload content."
|
|
74
|
+
},
|
|
75
|
+
"redactionReasons": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": "string",
|
|
78
|
+
"description": "Redaction reason tags such as private-tag or token-like-value."
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"retrievalContract": {
|
|
84
|
+
"sessionStartPayload": ["id", "adapterId", "eventType", "timestamp", "title", "summary"],
|
|
85
|
+
"onDemandPayload": ["detail", "tags", "privacy"],
|
|
86
|
+
"progressiveDisclosure": true
|
|
87
|
+
}
|
|
88
|
+
}
|
package/.cursorrules
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
|
|
2
2
|
|
|
3
|
-
Generated by Agentic-Senior-Core CLI v2.0.
|
|
3
|
+
Generated by Agentic-Senior-Core CLI v2.0.27
|
|
4
4
|
Timestamp: 2026-04-15T00:14:51.184Z
|
|
5
5
|
Selected profile: beginner
|
|
6
6
|
Selected policy file: .agent-context/policies/llm-judge-threshold.json
|
package/.windsurfrules
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
|
|
2
2
|
|
|
3
|
-
Generated by Agentic-Senior-Core CLI v2.0.
|
|
3
|
+
Generated by Agentic-Senior-Core CLI v2.0.27
|
|
4
4
|
Timestamp: 2026-04-15T00:14:51.184Z
|
|
5
5
|
Selected profile: beginner
|
|
6
6
|
Selected policy file: .agent-context/policies/llm-judge-threshold.json
|
package/README.md
CHANGED
|
@@ -142,6 +142,13 @@ npx @ryuenn3123/agentic-senior-core init --newbie
|
|
|
142
142
|
|
|
143
143
|
- `init` creates governance files **in your project folder** (the folder where you run the command).
|
|
144
144
|
- `init` does not copy repository workflows from this project into your target repository.
|
|
145
|
+
- For fresh projects, `init` asks what you are building first (API, web, mobile, CLI, library) and filters stack choices to match that scope.
|
|
146
|
+
- For mobile scope, stack choices are limited to mobile-relevant options (`react-native`, `flutter`).
|
|
147
|
+
- For existing projects, `init` auto-applies detected stack signals (including additional stack signals for polyglot repositories) so you do not need to re-select language manually.
|
|
148
|
+
- For web projects, `init` can capture separate frontend and backend stacks plus separate frontend/backend blueprints, and keeps this dual architecture context in the onboarding report.
|
|
149
|
+
- `init` detects runtime environment (Linux/WSL, Windows, macOS) and supports explicit override with `--runtime-env`.
|
|
150
|
+
- Project discovery now captures Docker strategy (none, development-only, production-only, both) and keeps development and production container intent separated.
|
|
151
|
+
- Docker setup is expected to be generated dynamically by AI from real project context, not fixed static templates.
|
|
145
152
|
- `init` project discovery accepts answers in any language; prompts stay in English, but non-English answers are supported.
|
|
146
153
|
- Generated docs default to English for consistency; use `--docs-lang` only when you explicitly need a different output language.
|
|
147
154
|
- After docs scaffolding, CLI prints prompt starter examples so users can iterate by prompt without rewriting full project context.
|
|
@@ -287,6 +294,27 @@ For CI pipelines that only need stdout JSON:
|
|
|
287
294
|
node ./scripts/benchmark-writer-judge-matrix.mjs --stdout-only
|
|
288
295
|
```
|
|
289
296
|
|
|
297
|
+
### Memory Continuity Benchmark (V2.5.3 Pilot)
|
|
298
|
+
|
|
299
|
+
Validate cross-agent memory hydration quality, privacy redaction safety, and session-start token savings:
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
npm run benchmark:continuity
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
This command writes:
|
|
306
|
+
- `.agent-context/state/memory-continuity-benchmark.json`
|
|
307
|
+
|
|
308
|
+
Reference artifacts:
|
|
309
|
+
- `.agent-context/state/memory-schema-v1.json`
|
|
310
|
+
- `.agent-context/state/memory-adapter-contract.json`
|
|
311
|
+
|
|
312
|
+
For CI pipelines that only need stdout JSON:
|
|
313
|
+
|
|
314
|
+
```bash
|
|
315
|
+
node ./scripts/memory-continuity-benchmark.mjs --stdout-only
|
|
316
|
+
```
|
|
317
|
+
|
|
290
318
|
### Benchmark Quickstart Path (V2.5)
|
|
291
319
|
|
|
292
320
|
For new users, run this minimal sequence first:
|
|
@@ -295,6 +323,7 @@ For new users, run this minimal sequence first:
|
|
|
295
323
|
npm run benchmark:detection
|
|
296
324
|
npm run benchmark:writer-judge
|
|
297
325
|
npm run benchmark:bundle
|
|
326
|
+
npm run benchmark:continuity
|
|
298
327
|
```
|
|
299
328
|
|
|
300
329
|
This gives a fast baseline of accuracy, writer-judge comparison, and evidence packaging in one pass.
|