@ryuenn3123/agentic-senior-core 2.0.26 → 2.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent-context/state/benchmark-evidence-bundle.json +672 -22
- package/.agent-context/state/benchmark-history.json +75 -0
- package/.agent-context/state/benchmark-trend-report.csv +5 -0
- package/.agent-context/state/benchmark-trend-report.json +140 -0
- package/.agent-context/state/benchmark-watchlist.json +3 -3
- package/.agent-context/state/memory-adapter-contract.json +52 -0
- package/.agent-context/state/memory-continuity-benchmark.json +132 -0
- package/.agent-context/state/memory-schema-v1.json +88 -0
- package/.cursorrules +1 -1
- package/.windsurfrules +1 -1
- package/README.md +43 -0
- package/lib/cli/commands/init.mjs +47 -1
- package/lib/cli/compiler.mjs +22 -0
- package/lib/cli/memory-continuity.mjs +395 -0
- package/lib/cli/utils.mjs +3 -1
- package/package.json +2 -1
- package/scripts/benchmark-evidence-bundle.mjs +493 -16
- package/scripts/memory-continuity-benchmark.mjs +322 -0
- package/scripts/validate.mjs +3 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
3
|
+
"reportName": "benchmark-history",
|
|
4
|
+
"maxEntries": 90,
|
|
5
|
+
"history": [
|
|
6
|
+
{
|
|
7
|
+
"generatedAt": "2026-04-17T02:54:01.239Z",
|
|
8
|
+
"releaseVersion": "2.0.26",
|
|
9
|
+
"fixtureCount": 12,
|
|
10
|
+
"top1Accuracy": 0.9167,
|
|
11
|
+
"manualCorrectionRate": 0.0833,
|
|
12
|
+
"benchmarkGatePassed": true,
|
|
13
|
+
"intelligencePassed": true,
|
|
14
|
+
"staleWatchlistCount": 0,
|
|
15
|
+
"reliabilityPassed": true,
|
|
16
|
+
"reliabilityRiskLevel": "monitor",
|
|
17
|
+
"incorrectDetectionRate": 0.0833,
|
|
18
|
+
"lowConfidenceRate": 0.0833,
|
|
19
|
+
"vulnerabilityTotal": null,
|
|
20
|
+
"criticalVulnerabilityCount": null,
|
|
21
|
+
"forbiddenContentPassed": true
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"generatedAt": "2026-04-17T02:54:57.419Z",
|
|
25
|
+
"releaseVersion": "2.0.26",
|
|
26
|
+
"fixtureCount": 12,
|
|
27
|
+
"top1Accuracy": 0.9167,
|
|
28
|
+
"manualCorrectionRate": 0.0833,
|
|
29
|
+
"benchmarkGatePassed": true,
|
|
30
|
+
"intelligencePassed": true,
|
|
31
|
+
"staleWatchlistCount": 0,
|
|
32
|
+
"reliabilityPassed": true,
|
|
33
|
+
"reliabilityRiskLevel": "monitor",
|
|
34
|
+
"incorrectDetectionRate": 0.0833,
|
|
35
|
+
"lowConfidenceRate": 0.0833,
|
|
36
|
+
"vulnerabilityTotal": null,
|
|
37
|
+
"criticalVulnerabilityCount": null,
|
|
38
|
+
"forbiddenContentPassed": true
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"generatedAt": "2026-04-17T03:19:31.047Z",
|
|
42
|
+
"releaseVersion": "2.0.26",
|
|
43
|
+
"fixtureCount": 12,
|
|
44
|
+
"top1Accuracy": 0.9167,
|
|
45
|
+
"manualCorrectionRate": 0.0833,
|
|
46
|
+
"benchmarkGatePassed": true,
|
|
47
|
+
"intelligencePassed": true,
|
|
48
|
+
"staleWatchlistCount": 0,
|
|
49
|
+
"reliabilityPassed": true,
|
|
50
|
+
"reliabilityRiskLevel": "monitor",
|
|
51
|
+
"incorrectDetectionRate": 0.0833,
|
|
52
|
+
"lowConfidenceRate": 0.0833,
|
|
53
|
+
"vulnerabilityTotal": null,
|
|
54
|
+
"criticalVulnerabilityCount": null,
|
|
55
|
+
"forbiddenContentPassed": true
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
59
|
+
"releaseVersion": "2.0.26",
|
|
60
|
+
"fixtureCount": 12,
|
|
61
|
+
"top1Accuracy": 0.9167,
|
|
62
|
+
"manualCorrectionRate": 0.0833,
|
|
63
|
+
"benchmarkGatePassed": true,
|
|
64
|
+
"intelligencePassed": true,
|
|
65
|
+
"staleWatchlistCount": 0,
|
|
66
|
+
"reliabilityPassed": true,
|
|
67
|
+
"reliabilityRiskLevel": "monitor",
|
|
68
|
+
"incorrectDetectionRate": 0.0833,
|
|
69
|
+
"lowConfidenceRate": 0.0833,
|
|
70
|
+
"vulnerabilityTotal": null,
|
|
71
|
+
"criticalVulnerabilityCount": null,
|
|
72
|
+
"forbiddenContentPassed": true
|
|
73
|
+
}
|
|
74
|
+
]
|
|
75
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
snapshotIndex,generatedAt,releaseVersion,top1Accuracy,manualCorrectionRate,incorrectDetectionRate,lowConfidenceRate,staleWatchlistCount,vulnerabilityTotal,criticalVulnerabilityCount,benchmarkGatePassed,intelligencePassed,reliabilityPassed,reliabilityRiskLevel
|
|
2
|
+
"1","2026-04-17T02:54:01.239Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
3
|
+
"2","2026-04-17T02:54:57.419Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
4
|
+
"3","2026-04-17T03:19:31.047Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
5
|
+
"4","2026-04-17T03:20:15.400Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
3
|
+
"reportName": "benchmark-trend-report",
|
|
4
|
+
"releaseVersion": "2.0.26",
|
|
5
|
+
"historyCount": 4,
|
|
6
|
+
"releaseDelta": {
|
|
7
|
+
"currentReleaseVersion": "2.0.26",
|
|
8
|
+
"previousReleaseVersion": "2.0.26",
|
|
9
|
+
"comparedSnapshot": {
|
|
10
|
+
"currentGeneratedAt": "2026-04-17T03:20:15.400Z",
|
|
11
|
+
"previousGeneratedAt": "2026-04-17T03:19:31.047Z"
|
|
12
|
+
},
|
|
13
|
+
"top1AccuracyDelta": 0,
|
|
14
|
+
"manualCorrectionRateDelta": 0,
|
|
15
|
+
"staleWatchlistCountDelta": 0,
|
|
16
|
+
"vulnerabilityTotalDelta": 0,
|
|
17
|
+
"summary": [
|
|
18
|
+
"top1Accuracy: +0",
|
|
19
|
+
"manualCorrectionRate: +0",
|
|
20
|
+
"staleWatchlistCount: +0",
|
|
21
|
+
"vulnerabilityTotal: +0"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
"trendTable": [
|
|
25
|
+
{
|
|
26
|
+
"snapshotIndex": 1,
|
|
27
|
+
"generatedAt": "2026-04-17T02:54:01.239Z",
|
|
28
|
+
"releaseVersion": "2.0.26",
|
|
29
|
+
"top1Accuracy": 0.9167,
|
|
30
|
+
"manualCorrectionRate": 0.0833,
|
|
31
|
+
"incorrectDetectionRate": 0.0833,
|
|
32
|
+
"lowConfidenceRate": 0.0833,
|
|
33
|
+
"staleWatchlistCount": 0,
|
|
34
|
+
"vulnerabilityTotal": null,
|
|
35
|
+
"criticalVulnerabilityCount": null,
|
|
36
|
+
"benchmarkGatePassed": true,
|
|
37
|
+
"intelligencePassed": true,
|
|
38
|
+
"reliabilityPassed": true,
|
|
39
|
+
"reliabilityRiskLevel": "monitor"
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"snapshotIndex": 2,
|
|
43
|
+
"generatedAt": "2026-04-17T02:54:57.419Z",
|
|
44
|
+
"releaseVersion": "2.0.26",
|
|
45
|
+
"top1Accuracy": 0.9167,
|
|
46
|
+
"manualCorrectionRate": 0.0833,
|
|
47
|
+
"incorrectDetectionRate": 0.0833,
|
|
48
|
+
"lowConfidenceRate": 0.0833,
|
|
49
|
+
"staleWatchlistCount": 0,
|
|
50
|
+
"vulnerabilityTotal": null,
|
|
51
|
+
"criticalVulnerabilityCount": null,
|
|
52
|
+
"benchmarkGatePassed": true,
|
|
53
|
+
"intelligencePassed": true,
|
|
54
|
+
"reliabilityPassed": true,
|
|
55
|
+
"reliabilityRiskLevel": "monitor"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"snapshotIndex": 3,
|
|
59
|
+
"generatedAt": "2026-04-17T03:19:31.047Z",
|
|
60
|
+
"releaseVersion": "2.0.26",
|
|
61
|
+
"top1Accuracy": 0.9167,
|
|
62
|
+
"manualCorrectionRate": 0.0833,
|
|
63
|
+
"incorrectDetectionRate": 0.0833,
|
|
64
|
+
"lowConfidenceRate": 0.0833,
|
|
65
|
+
"staleWatchlistCount": 0,
|
|
66
|
+
"vulnerabilityTotal": null,
|
|
67
|
+
"criticalVulnerabilityCount": null,
|
|
68
|
+
"benchmarkGatePassed": true,
|
|
69
|
+
"intelligencePassed": true,
|
|
70
|
+
"reliabilityPassed": true,
|
|
71
|
+
"reliabilityRiskLevel": "monitor"
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"snapshotIndex": 4,
|
|
75
|
+
"generatedAt": "2026-04-17T03:20:15.400Z",
|
|
76
|
+
"releaseVersion": "2.0.26",
|
|
77
|
+
"top1Accuracy": 0.9167,
|
|
78
|
+
"manualCorrectionRate": 0.0833,
|
|
79
|
+
"incorrectDetectionRate": 0.0833,
|
|
80
|
+
"lowConfidenceRate": 0.0833,
|
|
81
|
+
"staleWatchlistCount": 0,
|
|
82
|
+
"vulnerabilityTotal": null,
|
|
83
|
+
"criticalVulnerabilityCount": null,
|
|
84
|
+
"benchmarkGatePassed": true,
|
|
85
|
+
"intelligencePassed": true,
|
|
86
|
+
"reliabilityPassed": true,
|
|
87
|
+
"reliabilityRiskLevel": "monitor"
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
"chartSeries": {
|
|
91
|
+
"generatedAt": [
|
|
92
|
+
"2026-04-17T02:54:01.239Z",
|
|
93
|
+
"2026-04-17T02:54:57.419Z",
|
|
94
|
+
"2026-04-17T03:19:31.047Z",
|
|
95
|
+
"2026-04-17T03:20:15.400Z"
|
|
96
|
+
],
|
|
97
|
+
"top1Accuracy": [
|
|
98
|
+
0.9167,
|
|
99
|
+
0.9167,
|
|
100
|
+
0.9167,
|
|
101
|
+
0.9167
|
|
102
|
+
],
|
|
103
|
+
"manualCorrectionRate": [
|
|
104
|
+
0.0833,
|
|
105
|
+
0.0833,
|
|
106
|
+
0.0833,
|
|
107
|
+
0.0833
|
|
108
|
+
],
|
|
109
|
+
"incorrectDetectionRate": [
|
|
110
|
+
0.0833,
|
|
111
|
+
0.0833,
|
|
112
|
+
0.0833,
|
|
113
|
+
0.0833
|
|
114
|
+
],
|
|
115
|
+
"lowConfidenceRate": [
|
|
116
|
+
0.0833,
|
|
117
|
+
0.0833,
|
|
118
|
+
0.0833,
|
|
119
|
+
0.0833
|
|
120
|
+
],
|
|
121
|
+
"staleWatchlistCount": [
|
|
122
|
+
0,
|
|
123
|
+
0,
|
|
124
|
+
0,
|
|
125
|
+
0
|
|
126
|
+
],
|
|
127
|
+
"vulnerabilityTotal": [
|
|
128
|
+
null,
|
|
129
|
+
null,
|
|
130
|
+
null,
|
|
131
|
+
null
|
|
132
|
+
]
|
|
133
|
+
},
|
|
134
|
+
"artifacts": {
|
|
135
|
+
"historyPath": ".agent-context/state/benchmark-history.json",
|
|
136
|
+
"jsonPath": ".agent-context/state/benchmark-trend-report.json",
|
|
137
|
+
"csvPath": ".agent-context/state/benchmark-trend-report.csv",
|
|
138
|
+
"writeMode": "stdout-and-file"
|
|
139
|
+
}
|
|
140
|
+
}
|
|
@@ -3,17 +3,17 @@
|
|
|
3
3
|
{
|
|
4
4
|
"repository": "sickn33/antigravity-awesome-skills",
|
|
5
5
|
"owner": "core-architecture",
|
|
6
|
-
"lastReviewedAt": "2026-04-
|
|
6
|
+
"lastReviewedAt": "2026-04-17"
|
|
7
7
|
},
|
|
8
8
|
{
|
|
9
9
|
"repository": "github/awesome-copilot",
|
|
10
10
|
"owner": "core-architecture",
|
|
11
|
-
"lastReviewedAt": "2026-04-
|
|
11
|
+
"lastReviewedAt": "2026-04-17"
|
|
12
12
|
},
|
|
13
13
|
{
|
|
14
14
|
"repository": "MiniMax-AI/skills",
|
|
15
15
|
"owner": "frontend-governance",
|
|
16
|
-
"lastReviewedAt": "2026-04-
|
|
16
|
+
"lastReviewedAt": "2026-04-17"
|
|
17
17
|
}
|
|
18
18
|
]
|
|
19
19
|
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": "1.0.0",
|
|
3
|
+
"contractName": "cross-agent-memory-adapter",
|
|
4
|
+
"description": "Adapter contract for ingesting and retrieving shared memory observations across IDE hosts.",
|
|
5
|
+
"requiredAdapters": [
|
|
6
|
+
"claude-code",
|
|
7
|
+
"gemini-cli",
|
|
8
|
+
"vscode-chat"
|
|
9
|
+
],
|
|
10
|
+
"requiredOperations": {
|
|
11
|
+
"ingestion": [
|
|
12
|
+
"captureObservation",
|
|
13
|
+
"captureSessionSummary"
|
|
14
|
+
],
|
|
15
|
+
"retrieval": [
|
|
16
|
+
"searchIndex",
|
|
17
|
+
"getTimeline",
|
|
18
|
+
"getObservations"
|
|
19
|
+
],
|
|
20
|
+
"privacy": [
|
|
21
|
+
"applyPrivateTagRedaction",
|
|
22
|
+
"applyInlineSecretRedaction"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"adapters": [
|
|
26
|
+
{
|
|
27
|
+
"adapterId": "claude-code",
|
|
28
|
+
"hostType": "plugin-hooks",
|
|
29
|
+
"status": "pilot-ready",
|
|
30
|
+
"ingestionEvents": ["SessionStart", "UserPromptSubmit", "PostToolUse", "Stop", "SessionEnd"],
|
|
31
|
+
"retrievalMode": "mcp-tools"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"adapterId": "gemini-cli",
|
|
35
|
+
"hostType": "plugin-hooks",
|
|
36
|
+
"status": "pilot-ready",
|
|
37
|
+
"ingestionEvents": ["session_start", "prompt_submit", "post_tool", "session_end"],
|
|
38
|
+
"retrievalMode": "mcp-tools"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"adapterId": "vscode-chat",
|
|
42
|
+
"hostType": "chat-customization-plugin",
|
|
43
|
+
"status": "pilot-ready",
|
|
44
|
+
"ingestionEvents": ["chatStart", "promptSubmit", "postToolUse", "chatEnd"],
|
|
45
|
+
"retrievalMode": "mcp-tools"
|
|
46
|
+
}
|
|
47
|
+
],
|
|
48
|
+
"notes": [
|
|
49
|
+
"Web chat hosts are explicitly out of scope for this pilot because local runtime hooks are unavailable.",
|
|
50
|
+
"Adapters should emit provider-agnostic payloads matching .agent-context/state/memory-schema-v1.json."
|
|
51
|
+
]
|
|
52
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generatedAt": "2026-04-17T03:41:09.595Z",
|
|
3
|
+
"reportName": "memory-continuity-benchmark",
|
|
4
|
+
"schemaVersion": "1.0.0",
|
|
5
|
+
"passed": true,
|
|
6
|
+
"failureCount": 0,
|
|
7
|
+
"thresholds": {
|
|
8
|
+
"minimumRelevantRecall": 0.8,
|
|
9
|
+
"minimumSessionStartTokenSavingsPercent": 35,
|
|
10
|
+
"maximumUnsafeObservationCount": 0
|
|
11
|
+
},
|
|
12
|
+
"adapterCoverage": {
|
|
13
|
+
"requiredAdapterIds": [
|
|
14
|
+
"claude-code",
|
|
15
|
+
"gemini-cli",
|
|
16
|
+
"vscode-chat"
|
|
17
|
+
],
|
|
18
|
+
"availableAdapterIds": [
|
|
19
|
+
"claude-code",
|
|
20
|
+
"gemini-cli",
|
|
21
|
+
"vscode-chat"
|
|
22
|
+
],
|
|
23
|
+
"missingAdapterIds": [],
|
|
24
|
+
"passed": true
|
|
25
|
+
},
|
|
26
|
+
"privacyControls": {
|
|
27
|
+
"redactedObservationCount": 2,
|
|
28
|
+
"privateTagRedactionCount": 1,
|
|
29
|
+
"inlineRedactionCount": 1,
|
|
30
|
+
"unsafeObservationCount": 0
|
|
31
|
+
},
|
|
32
|
+
"continuitySummary": {
|
|
33
|
+
"totalObservationCount": 5,
|
|
34
|
+
"scenarioCount": 3,
|
|
35
|
+
"averageRelevantRecall": 1,
|
|
36
|
+
"averageSessionStartTokenSavingsPercent": 63.17
|
|
37
|
+
},
|
|
38
|
+
"scenarios": [
|
|
39
|
+
{
|
|
40
|
+
"scenarioId": "docker-lane-hydration",
|
|
41
|
+
"query": "what is docker strategy for development and production",
|
|
42
|
+
"expectedObservationIds": [
|
|
43
|
+
"obs-001"
|
|
44
|
+
],
|
|
45
|
+
"indexObservationIds": [
|
|
46
|
+
"obs-001",
|
|
47
|
+
"obs-005",
|
|
48
|
+
"obs-003",
|
|
49
|
+
"obs-002",
|
|
50
|
+
"obs-004"
|
|
51
|
+
],
|
|
52
|
+
"hydratedObservationIds": [
|
|
53
|
+
"obs-001"
|
|
54
|
+
],
|
|
55
|
+
"relevantRecall": 1,
|
|
56
|
+
"fullContextTokenEstimate": 267,
|
|
57
|
+
"sessionStartTokenEstimate": 103,
|
|
58
|
+
"sessionStartTokenSavingsPercent": 61.42
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"scenarioId": "runtime-hydration",
|
|
62
|
+
"query": "which runtime target should we prefer on windows with wsl",
|
|
63
|
+
"expectedObservationIds": [
|
|
64
|
+
"obs-002"
|
|
65
|
+
],
|
|
66
|
+
"indexObservationIds": [
|
|
67
|
+
"obs-002",
|
|
68
|
+
"obs-001",
|
|
69
|
+
"obs-005",
|
|
70
|
+
"obs-004",
|
|
71
|
+
"obs-003"
|
|
72
|
+
],
|
|
73
|
+
"hydratedObservationIds": [
|
|
74
|
+
"obs-002"
|
|
75
|
+
],
|
|
76
|
+
"relevantRecall": 1,
|
|
77
|
+
"fullContextTokenEstimate": 267,
|
|
78
|
+
"sessionStartTokenEstimate": 97,
|
|
79
|
+
"sessionStartTokenSavingsPercent": 63.67
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"scenarioId": "frontend-quality-hydration",
|
|
83
|
+
"query": "show frontend rubric quality decisions",
|
|
84
|
+
"expectedObservationIds": [
|
|
85
|
+
"obs-003"
|
|
86
|
+
],
|
|
87
|
+
"indexObservationIds": [
|
|
88
|
+
"obs-003",
|
|
89
|
+
"obs-005",
|
|
90
|
+
"obs-004",
|
|
91
|
+
"obs-002",
|
|
92
|
+
"obs-001"
|
|
93
|
+
],
|
|
94
|
+
"hydratedObservationIds": [
|
|
95
|
+
"obs-003"
|
|
96
|
+
],
|
|
97
|
+
"relevantRecall": 1,
|
|
98
|
+
"fullContextTokenEstimate": 267,
|
|
99
|
+
"sessionStartTokenEstimate": 95,
|
|
100
|
+
"sessionStartTokenSavingsPercent": 64.42
|
|
101
|
+
}
|
|
102
|
+
],
|
|
103
|
+
"references": {
|
|
104
|
+
"memorySchemaPath": ".agent-context/state/memory-schema-v1.json",
|
|
105
|
+
"memoryAdapterContractPath": ".agent-context/state/memory-adapter-contract.json",
|
|
106
|
+
"benchmarkOutputPath": ".agent-context/state/memory-continuity-benchmark.json",
|
|
107
|
+
"schemaDeclaredVersion": "1.0.0",
|
|
108
|
+
"adapterContractVersion": "1.0.0"
|
|
109
|
+
},
|
|
110
|
+
"checks": [
|
|
111
|
+
{
|
|
112
|
+
"checkName": "adapter-coverage",
|
|
113
|
+
"passed": true,
|
|
114
|
+
"details": "required=3 missing=0"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"checkName": "continuity-recall-threshold",
|
|
118
|
+
"passed": true,
|
|
119
|
+
"details": "averageRelevantRecall=1 minimum=0.8"
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"checkName": "session-start-token-savings-threshold",
|
|
123
|
+
"passed": true,
|
|
124
|
+
"details": "averageSessionStartTokenSavingsPercent=63.17 minimum=35"
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
"checkName": "privacy-redaction-safety",
|
|
128
|
+
"passed": true,
|
|
129
|
+
"details": "unsafeObservationCount=0 max=0"
|
|
130
|
+
}
|
|
131
|
+
]
|
|
132
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": "1.0.0",
|
|
3
|
+
"schemaName": "cross-agent-memory-observation",
|
|
4
|
+
"description": "Provider-agnostic schema for persistent memory observations shared across coding agents and IDE hosts.",
|
|
5
|
+
"requiredFields": [
|
|
6
|
+
"id",
|
|
7
|
+
"projectId",
|
|
8
|
+
"sessionId",
|
|
9
|
+
"adapterId",
|
|
10
|
+
"eventType",
|
|
11
|
+
"timestamp",
|
|
12
|
+
"title",
|
|
13
|
+
"summary",
|
|
14
|
+
"detail",
|
|
15
|
+
"privacy"
|
|
16
|
+
],
|
|
17
|
+
"fieldDefinitions": {
|
|
18
|
+
"id": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Stable unique observation identifier."
|
|
21
|
+
},
|
|
22
|
+
"projectId": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "Repository or workspace identifier."
|
|
25
|
+
},
|
|
26
|
+
"sessionId": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"description": "Source session identifier from host adapter."
|
|
29
|
+
},
|
|
30
|
+
"adapterId": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"allowedValues": ["claude-code", "gemini-cli", "vscode-chat", "custom"],
|
|
33
|
+
"description": "Host adapter that captured this observation."
|
|
34
|
+
},
|
|
35
|
+
"eventType": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"allowedValues": ["prompt", "tool-use", "decision", "summary", "issue", "context"],
|
|
38
|
+
"description": "Observation type for retrieval filtering."
|
|
39
|
+
},
|
|
40
|
+
"timestamp": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"format": "date-time",
|
|
43
|
+
"description": "ISO timestamp when observation was captured."
|
|
44
|
+
},
|
|
45
|
+
"title": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"description": "Compact human-readable headline."
|
|
48
|
+
},
|
|
49
|
+
"summary": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"description": "Compact session-start payload used for progressive disclosure."
|
|
52
|
+
},
|
|
53
|
+
"detail": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"description": "Expanded observation text fetched on demand."
|
|
56
|
+
},
|
|
57
|
+
"tags": {
|
|
58
|
+
"type": "array",
|
|
59
|
+
"items": "string",
|
|
60
|
+
"description": "Optional normalized tags for query refinement."
|
|
61
|
+
},
|
|
62
|
+
"privacy": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"requiredFields": ["level", "redactionApplied"],
|
|
65
|
+
"fieldDefinitions": {
|
|
66
|
+
"level": {
|
|
67
|
+
"type": "string",
|
|
68
|
+
"allowedValues": ["public", "internal", "restricted"],
|
|
69
|
+
"description": "Privacy classification level."
|
|
70
|
+
},
|
|
71
|
+
"redactionApplied": {
|
|
72
|
+
"type": "boolean",
|
|
73
|
+
"description": "Indicates whether privacy sanitization modified payload content."
|
|
74
|
+
},
|
|
75
|
+
"redactionReasons": {
|
|
76
|
+
"type": "array",
|
|
77
|
+
"items": "string",
|
|
78
|
+
"description": "Redaction reason tags such as private-tag or token-like-value."
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
"retrievalContract": {
|
|
84
|
+
"sessionStartPayload": ["id", "adapterId", "eventType", "timestamp", "title", "summary"],
|
|
85
|
+
"onDemandPayload": ["detail", "tags", "privacy"],
|
|
86
|
+
"progressiveDisclosure": true
|
|
87
|
+
}
|
|
88
|
+
}
|
package/.cursorrules
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
|
|
2
2
|
|
|
3
|
-
Generated by Agentic-Senior-Core CLI v2.
|
|
3
|
+
Generated by Agentic-Senior-Core CLI v2.5.3
|
|
4
4
|
Timestamp: 2026-04-15T00:14:51.184Z
|
|
5
5
|
Selected profile: beginner
|
|
6
6
|
Selected policy file: .agent-context/policies/llm-judge-threshold.json
|
package/.windsurfrules
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
|
|
2
2
|
|
|
3
|
-
Generated by Agentic-Senior-Core CLI v2.
|
|
3
|
+
Generated by Agentic-Senior-Core CLI v2.5.3
|
|
4
4
|
Timestamp: 2026-04-15T00:14:51.184Z
|
|
5
5
|
Selected profile: beginner
|
|
6
6
|
Selected policy file: .agent-context/policies/llm-judge-threshold.json
|
package/README.md
CHANGED
|
@@ -235,6 +235,27 @@ npx @ryuenn3123/agentic-senior-core init --no-token-optimize
|
|
|
235
235
|
When enabled, the CLI writes `.agent-context/state/token-optimization.json`, regenerates compiled rules, and adds compact command guidance to `.cursorrules` and `.windsurfrules`.
|
|
236
236
|
If an external token proxy is available, the CLI prints setup hints. If not, native fallback guidance stays active, so outside users are not forced to install extra tooling.
|
|
237
237
|
|
|
238
|
+
### Memory Continuity Mode (Enabled by Default on Init)
|
|
239
|
+
|
|
240
|
+
By default, every `init` flow also enables memory continuity automatically.
|
|
241
|
+
This allows cross-session context carryover through compact index-first retrieval plus selective hydration.
|
|
242
|
+
|
|
243
|
+
Quick start:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Default behavior (explicit flag optional)
|
|
247
|
+
npx @ryuenn3123/agentic-senior-core init --memory-continuity
|
|
248
|
+
|
|
249
|
+
# Opt out when needed
|
|
250
|
+
npx @ryuenn3123/agentic-senior-core init --no-memory-continuity
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
When enabled, the CLI writes `.agent-context/state/memory-continuity.json`, regenerates compiled rules, and injects `MEMORY CONTINUITY PROFILE` guidance into `.cursorrules` and `.windsurfrules`.
|
|
254
|
+
|
|
255
|
+
Compatibility note:
|
|
256
|
+
- Works for local IDE, CLI, and cloud IDE chat runtimes that support the adapter contract or MCP retrieval path.
|
|
257
|
+
- Generic web chat runtimes without repository tool hooks cannot auto-hydrate memory at runtime and should use manual summary handoff.
|
|
258
|
+
|
|
238
259
|
### Token Efficiency Benchmark Snapshot
|
|
239
260
|
|
|
240
261
|
Latest local benchmark (2026-04-11) from `.agent-context/state/token-optimization-benchmark.json`:
|
|
@@ -294,6 +315,27 @@ For CI pipelines that only need stdout JSON:
|
|
|
294
315
|
node ./scripts/benchmark-writer-judge-matrix.mjs --stdout-only
|
|
295
316
|
```
|
|
296
317
|
|
|
318
|
+
### Memory Continuity Benchmark (V2.5.3 Pilot)
|
|
319
|
+
|
|
320
|
+
Validate cross-agent memory hydration quality, privacy redaction safety, and session-start token savings:
|
|
321
|
+
|
|
322
|
+
```bash
|
|
323
|
+
npm run benchmark:continuity
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
This command writes:
|
|
327
|
+
- `.agent-context/state/memory-continuity-benchmark.json`
|
|
328
|
+
|
|
329
|
+
Reference artifacts:
|
|
330
|
+
- `.agent-context/state/memory-schema-v1.json`
|
|
331
|
+
- `.agent-context/state/memory-adapter-contract.json`
|
|
332
|
+
|
|
333
|
+
For CI pipelines that only need stdout JSON:
|
|
334
|
+
|
|
335
|
+
```bash
|
|
336
|
+
node ./scripts/memory-continuity-benchmark.mjs --stdout-only
|
|
337
|
+
```
|
|
338
|
+
|
|
297
339
|
### Benchmark Quickstart Path (V2.5)
|
|
298
340
|
|
|
299
341
|
For new users, run this minimal sequence first:
|
|
@@ -302,6 +344,7 @@ For new users, run this minimal sequence first:
|
|
|
302
344
|
npm run benchmark:detection
|
|
303
345
|
npm run benchmark:writer-judge
|
|
304
346
|
npm run benchmark:bundle
|
|
347
|
+
npm run benchmark:continuity
|
|
305
348
|
```
|
|
306
349
|
|
|
307
350
|
This gives a fast baseline of accuracy, writer-judge comparison, and evidence packaging in one pass.
|