@ryuenn3123/agentic-senior-core 2.0.26 → 2.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ {
2
+ "generatedAt": "2026-04-17T03:20:15.400Z",
3
+ "reportName": "benchmark-history",
4
+ "maxEntries": 90,
5
+ "history": [
6
+ {
7
+ "generatedAt": "2026-04-17T02:54:01.239Z",
8
+ "releaseVersion": "2.0.26",
9
+ "fixtureCount": 12,
10
+ "top1Accuracy": 0.9167,
11
+ "manualCorrectionRate": 0.0833,
12
+ "benchmarkGatePassed": true,
13
+ "intelligencePassed": true,
14
+ "staleWatchlistCount": 0,
15
+ "reliabilityPassed": true,
16
+ "reliabilityRiskLevel": "monitor",
17
+ "incorrectDetectionRate": 0.0833,
18
+ "lowConfidenceRate": 0.0833,
19
+ "vulnerabilityTotal": null,
20
+ "criticalVulnerabilityCount": null,
21
+ "forbiddenContentPassed": true
22
+ },
23
+ {
24
+ "generatedAt": "2026-04-17T02:54:57.419Z",
25
+ "releaseVersion": "2.0.26",
26
+ "fixtureCount": 12,
27
+ "top1Accuracy": 0.9167,
28
+ "manualCorrectionRate": 0.0833,
29
+ "benchmarkGatePassed": true,
30
+ "intelligencePassed": true,
31
+ "staleWatchlistCount": 0,
32
+ "reliabilityPassed": true,
33
+ "reliabilityRiskLevel": "monitor",
34
+ "incorrectDetectionRate": 0.0833,
35
+ "lowConfidenceRate": 0.0833,
36
+ "vulnerabilityTotal": null,
37
+ "criticalVulnerabilityCount": null,
38
+ "forbiddenContentPassed": true
39
+ },
40
+ {
41
+ "generatedAt": "2026-04-17T03:19:31.047Z",
42
+ "releaseVersion": "2.0.26",
43
+ "fixtureCount": 12,
44
+ "top1Accuracy": 0.9167,
45
+ "manualCorrectionRate": 0.0833,
46
+ "benchmarkGatePassed": true,
47
+ "intelligencePassed": true,
48
+ "staleWatchlistCount": 0,
49
+ "reliabilityPassed": true,
50
+ "reliabilityRiskLevel": "monitor",
51
+ "incorrectDetectionRate": 0.0833,
52
+ "lowConfidenceRate": 0.0833,
53
+ "vulnerabilityTotal": null,
54
+ "criticalVulnerabilityCount": null,
55
+ "forbiddenContentPassed": true
56
+ },
57
+ {
58
+ "generatedAt": "2026-04-17T03:20:15.400Z",
59
+ "releaseVersion": "2.0.26",
60
+ "fixtureCount": 12,
61
+ "top1Accuracy": 0.9167,
62
+ "manualCorrectionRate": 0.0833,
63
+ "benchmarkGatePassed": true,
64
+ "intelligencePassed": true,
65
+ "staleWatchlistCount": 0,
66
+ "reliabilityPassed": true,
67
+ "reliabilityRiskLevel": "monitor",
68
+ "incorrectDetectionRate": 0.0833,
69
+ "lowConfidenceRate": 0.0833,
70
+ "vulnerabilityTotal": null,
71
+ "criticalVulnerabilityCount": null,
72
+ "forbiddenContentPassed": true
73
+ }
74
+ ]
75
+ }
@@ -0,0 +1,5 @@
1
+ snapshotIndex,generatedAt,releaseVersion,top1Accuracy,manualCorrectionRate,incorrectDetectionRate,lowConfidenceRate,staleWatchlistCount,vulnerabilityTotal,criticalVulnerabilityCount,benchmarkGatePassed,intelligencePassed,reliabilityPassed,reliabilityRiskLevel
2
+ "1","2026-04-17T02:54:01.239Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
3
+ "2","2026-04-17T02:54:57.419Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
4
+ "3","2026-04-17T03:19:31.047Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
5
+ "4","2026-04-17T03:20:15.400Z","2.0.26","0.9167","0.0833","0.0833","0.0833","0",,,"true","true","true","monitor"
@@ -0,0 +1,140 @@
1
+ {
2
+ "generatedAt": "2026-04-17T03:20:15.400Z",
3
+ "reportName": "benchmark-trend-report",
4
+ "releaseVersion": "2.0.26",
5
+ "historyCount": 4,
6
+ "releaseDelta": {
7
+ "currentReleaseVersion": "2.0.26",
8
+ "previousReleaseVersion": "2.0.26",
9
+ "comparedSnapshot": {
10
+ "currentGeneratedAt": "2026-04-17T03:20:15.400Z",
11
+ "previousGeneratedAt": "2026-04-17T03:19:31.047Z"
12
+ },
13
+ "top1AccuracyDelta": 0,
14
+ "manualCorrectionRateDelta": 0,
15
+ "staleWatchlistCountDelta": 0,
16
+ "vulnerabilityTotalDelta": 0,
17
+ "summary": [
18
+ "top1Accuracy: +0",
19
+ "manualCorrectionRate: +0",
20
+ "staleWatchlistCount: +0",
21
+ "vulnerabilityTotal: +0"
22
+ ]
23
+ },
24
+ "trendTable": [
25
+ {
26
+ "snapshotIndex": 1,
27
+ "generatedAt": "2026-04-17T02:54:01.239Z",
28
+ "releaseVersion": "2.0.26",
29
+ "top1Accuracy": 0.9167,
30
+ "manualCorrectionRate": 0.0833,
31
+ "incorrectDetectionRate": 0.0833,
32
+ "lowConfidenceRate": 0.0833,
33
+ "staleWatchlistCount": 0,
34
+ "vulnerabilityTotal": null,
35
+ "criticalVulnerabilityCount": null,
36
+ "benchmarkGatePassed": true,
37
+ "intelligencePassed": true,
38
+ "reliabilityPassed": true,
39
+ "reliabilityRiskLevel": "monitor"
40
+ },
41
+ {
42
+ "snapshotIndex": 2,
43
+ "generatedAt": "2026-04-17T02:54:57.419Z",
44
+ "releaseVersion": "2.0.26",
45
+ "top1Accuracy": 0.9167,
46
+ "manualCorrectionRate": 0.0833,
47
+ "incorrectDetectionRate": 0.0833,
48
+ "lowConfidenceRate": 0.0833,
49
+ "staleWatchlistCount": 0,
50
+ "vulnerabilityTotal": null,
51
+ "criticalVulnerabilityCount": null,
52
+ "benchmarkGatePassed": true,
53
+ "intelligencePassed": true,
54
+ "reliabilityPassed": true,
55
+ "reliabilityRiskLevel": "monitor"
56
+ },
57
+ {
58
+ "snapshotIndex": 3,
59
+ "generatedAt": "2026-04-17T03:19:31.047Z",
60
+ "releaseVersion": "2.0.26",
61
+ "top1Accuracy": 0.9167,
62
+ "manualCorrectionRate": 0.0833,
63
+ "incorrectDetectionRate": 0.0833,
64
+ "lowConfidenceRate": 0.0833,
65
+ "staleWatchlistCount": 0,
66
+ "vulnerabilityTotal": null,
67
+ "criticalVulnerabilityCount": null,
68
+ "benchmarkGatePassed": true,
69
+ "intelligencePassed": true,
70
+ "reliabilityPassed": true,
71
+ "reliabilityRiskLevel": "monitor"
72
+ },
73
+ {
74
+ "snapshotIndex": 4,
75
+ "generatedAt": "2026-04-17T03:20:15.400Z",
76
+ "releaseVersion": "2.0.26",
77
+ "top1Accuracy": 0.9167,
78
+ "manualCorrectionRate": 0.0833,
79
+ "incorrectDetectionRate": 0.0833,
80
+ "lowConfidenceRate": 0.0833,
81
+ "staleWatchlistCount": 0,
82
+ "vulnerabilityTotal": null,
83
+ "criticalVulnerabilityCount": null,
84
+ "benchmarkGatePassed": true,
85
+ "intelligencePassed": true,
86
+ "reliabilityPassed": true,
87
+ "reliabilityRiskLevel": "monitor"
88
+ }
89
+ ],
90
+ "chartSeries": {
91
+ "generatedAt": [
92
+ "2026-04-17T02:54:01.239Z",
93
+ "2026-04-17T02:54:57.419Z",
94
+ "2026-04-17T03:19:31.047Z",
95
+ "2026-04-17T03:20:15.400Z"
96
+ ],
97
+ "top1Accuracy": [
98
+ 0.9167,
99
+ 0.9167,
100
+ 0.9167,
101
+ 0.9167
102
+ ],
103
+ "manualCorrectionRate": [
104
+ 0.0833,
105
+ 0.0833,
106
+ 0.0833,
107
+ 0.0833
108
+ ],
109
+ "incorrectDetectionRate": [
110
+ 0.0833,
111
+ 0.0833,
112
+ 0.0833,
113
+ 0.0833
114
+ ],
115
+ "lowConfidenceRate": [
116
+ 0.0833,
117
+ 0.0833,
118
+ 0.0833,
119
+ 0.0833
120
+ ],
121
+ "staleWatchlistCount": [
122
+ 0,
123
+ 0,
124
+ 0,
125
+ 0
126
+ ],
127
+ "vulnerabilityTotal": [
128
+ null,
129
+ null,
130
+ null,
131
+ null
132
+ ]
133
+ },
134
+ "artifacts": {
135
+ "historyPath": ".agent-context/state/benchmark-history.json",
136
+ "jsonPath": ".agent-context/state/benchmark-trend-report.json",
137
+ "csvPath": ".agent-context/state/benchmark-trend-report.csv",
138
+ "writeMode": "stdout-and-file"
139
+ }
140
+ }
@@ -3,17 +3,17 @@
3
3
  {
4
4
  "repository": "sickn33/antigravity-awesome-skills",
5
5
  "owner": "core-architecture",
6
- "lastReviewedAt": "2026-04-02"
6
+ "lastReviewedAt": "2026-04-17"
7
7
  },
8
8
  {
9
9
  "repository": "github/awesome-copilot",
10
10
  "owner": "core-architecture",
11
- "lastReviewedAt": "2026-04-02"
11
+ "lastReviewedAt": "2026-04-17"
12
12
  },
13
13
  {
14
14
  "repository": "MiniMax-AI/skills",
15
15
  "owner": "frontend-governance",
16
- "lastReviewedAt": "2026-04-02"
16
+ "lastReviewedAt": "2026-04-17"
17
17
  }
18
18
  ]
19
19
  }
@@ -0,0 +1,52 @@
1
+ {
2
+ "schemaVersion": "1.0.0",
3
+ "contractName": "cross-agent-memory-adapter",
4
+ "description": "Adapter contract for ingesting and retrieving shared memory observations across IDE hosts.",
5
+ "requiredAdapters": [
6
+ "claude-code",
7
+ "gemini-cli",
8
+ "vscode-chat"
9
+ ],
10
+ "requiredOperations": {
11
+ "ingestion": [
12
+ "captureObservation",
13
+ "captureSessionSummary"
14
+ ],
15
+ "retrieval": [
16
+ "searchIndex",
17
+ "getTimeline",
18
+ "getObservations"
19
+ ],
20
+ "privacy": [
21
+ "applyPrivateTagRedaction",
22
+ "applyInlineSecretRedaction"
23
+ ]
24
+ },
25
+ "adapters": [
26
+ {
27
+ "adapterId": "claude-code",
28
+ "hostType": "plugin-hooks",
29
+ "status": "pilot-ready",
30
+ "ingestionEvents": ["SessionStart", "UserPromptSubmit", "PostToolUse", "Stop", "SessionEnd"],
31
+ "retrievalMode": "mcp-tools"
32
+ },
33
+ {
34
+ "adapterId": "gemini-cli",
35
+ "hostType": "plugin-hooks",
36
+ "status": "pilot-ready",
37
+ "ingestionEvents": ["session_start", "prompt_submit", "post_tool", "session_end"],
38
+ "retrievalMode": "mcp-tools"
39
+ },
40
+ {
41
+ "adapterId": "vscode-chat",
42
+ "hostType": "chat-customization-plugin",
43
+ "status": "pilot-ready",
44
+ "ingestionEvents": ["chatStart", "promptSubmit", "postToolUse", "chatEnd"],
45
+ "retrievalMode": "mcp-tools"
46
+ }
47
+ ],
48
+ "notes": [
49
+ "Web chat hosts are explicitly out of scope for this pilot because local runtime hooks are unavailable.",
50
+ "Adapters should emit provider-agnostic payloads matching .agent-context/state/memory-schema-v1.json."
51
+ ]
52
+ }
@@ -0,0 +1,132 @@
1
+ {
2
+ "generatedAt": "2026-04-17T03:27:46.024Z",
3
+ "reportName": "memory-continuity-benchmark",
4
+ "schemaVersion": "1.0.0",
5
+ "passed": true,
6
+ "failureCount": 0,
7
+ "thresholds": {
8
+ "minimumRelevantRecall": 0.8,
9
+ "minimumSessionStartTokenSavingsPercent": 35,
10
+ "maximumUnsafeObservationCount": 0
11
+ },
12
+ "adapterCoverage": {
13
+ "requiredAdapterIds": [
14
+ "claude-code",
15
+ "gemini-cli",
16
+ "vscode-chat"
17
+ ],
18
+ "availableAdapterIds": [
19
+ "claude-code",
20
+ "gemini-cli",
21
+ "vscode-chat"
22
+ ],
23
+ "missingAdapterIds": [],
24
+ "passed": true
25
+ },
26
+ "privacyControls": {
27
+ "redactedObservationCount": 2,
28
+ "privateTagRedactionCount": 1,
29
+ "inlineRedactionCount": 1,
30
+ "unsafeObservationCount": 0
31
+ },
32
+ "continuitySummary": {
33
+ "totalObservationCount": 5,
34
+ "scenarioCount": 3,
35
+ "averageRelevantRecall": 1,
36
+ "averageSessionStartTokenSavingsPercent": 63.17
37
+ },
38
+ "scenarios": [
39
+ {
40
+ "scenarioId": "docker-lane-hydration",
41
+ "query": "what is docker strategy for development and production",
42
+ "expectedObservationIds": [
43
+ "obs-001"
44
+ ],
45
+ "indexObservationIds": [
46
+ "obs-001",
47
+ "obs-005",
48
+ "obs-003",
49
+ "obs-002",
50
+ "obs-004"
51
+ ],
52
+ "hydratedObservationIds": [
53
+ "obs-001"
54
+ ],
55
+ "relevantRecall": 1,
56
+ "fullContextTokenEstimate": 267,
57
+ "sessionStartTokenEstimate": 103,
58
+ "sessionStartTokenSavingsPercent": 61.42
59
+ },
60
+ {
61
+ "scenarioId": "runtime-hydration",
62
+ "query": "which runtime target should we prefer on windows with wsl",
63
+ "expectedObservationIds": [
64
+ "obs-002"
65
+ ],
66
+ "indexObservationIds": [
67
+ "obs-002",
68
+ "obs-001",
69
+ "obs-005",
70
+ "obs-004",
71
+ "obs-003"
72
+ ],
73
+ "hydratedObservationIds": [
74
+ "obs-002"
75
+ ],
76
+ "relevantRecall": 1,
77
+ "fullContextTokenEstimate": 267,
78
+ "sessionStartTokenEstimate": 97,
79
+ "sessionStartTokenSavingsPercent": 63.67
80
+ },
81
+ {
82
+ "scenarioId": "frontend-quality-hydration",
83
+ "query": "show frontend rubric quality decisions",
84
+ "expectedObservationIds": [
85
+ "obs-003"
86
+ ],
87
+ "indexObservationIds": [
88
+ "obs-003",
89
+ "obs-005",
90
+ "obs-004",
91
+ "obs-002",
92
+ "obs-001"
93
+ ],
94
+ "hydratedObservationIds": [
95
+ "obs-003"
96
+ ],
97
+ "relevantRecall": 1,
98
+ "fullContextTokenEstimate": 267,
99
+ "sessionStartTokenEstimate": 95,
100
+ "sessionStartTokenSavingsPercent": 64.42
101
+ }
102
+ ],
103
+ "references": {
104
+ "memorySchemaPath": ".agent-context/state/memory-schema-v1.json",
105
+ "memoryAdapterContractPath": ".agent-context/state/memory-adapter-contract.json",
106
+ "benchmarkOutputPath": ".agent-context/state/memory-continuity-benchmark.json",
107
+ "schemaDeclaredVersion": "1.0.0",
108
+ "adapterContractVersion": "1.0.0"
109
+ },
110
+ "checks": [
111
+ {
112
+ "checkName": "adapter-coverage",
113
+ "passed": true,
114
+ "details": "required=3 missing=0"
115
+ },
116
+ {
117
+ "checkName": "continuity-recall-threshold",
118
+ "passed": true,
119
+ "details": "averageRelevantRecall=1 minimum=0.8"
120
+ },
121
+ {
122
+ "checkName": "session-start-token-savings-threshold",
123
+ "passed": true,
124
+ "details": "averageSessionStartTokenSavingsPercent=63.17 minimum=35"
125
+ },
126
+ {
127
+ "checkName": "privacy-redaction-safety",
128
+ "passed": true,
129
+ "details": "unsafeObservationCount=0 max=0"
130
+ }
131
+ ]
132
+ }
@@ -0,0 +1,88 @@
1
+ {
2
+ "schemaVersion": "1.0.0",
3
+ "schemaName": "cross-agent-memory-observation",
4
+ "description": "Provider-agnostic schema for persistent memory observations shared across coding agents and IDE hosts.",
5
+ "requiredFields": [
6
+ "id",
7
+ "projectId",
8
+ "sessionId",
9
+ "adapterId",
10
+ "eventType",
11
+ "timestamp",
12
+ "title",
13
+ "summary",
14
+ "detail",
15
+ "privacy"
16
+ ],
17
+ "fieldDefinitions": {
18
+ "id": {
19
+ "type": "string",
20
+ "description": "Stable unique observation identifier."
21
+ },
22
+ "projectId": {
23
+ "type": "string",
24
+ "description": "Repository or workspace identifier."
25
+ },
26
+ "sessionId": {
27
+ "type": "string",
28
+ "description": "Source session identifier from host adapter."
29
+ },
30
+ "adapterId": {
31
+ "type": "string",
32
+ "allowedValues": ["claude-code", "gemini-cli", "vscode-chat", "custom"],
33
+ "description": "Host adapter that captured this observation."
34
+ },
35
+ "eventType": {
36
+ "type": "string",
37
+ "allowedValues": ["prompt", "tool-use", "decision", "summary", "issue", "context"],
38
+ "description": "Observation type for retrieval filtering."
39
+ },
40
+ "timestamp": {
41
+ "type": "string",
42
+ "format": "date-time",
43
+ "description": "ISO timestamp when observation was captured."
44
+ },
45
+ "title": {
46
+ "type": "string",
47
+ "description": "Compact human-readable headline."
48
+ },
49
+ "summary": {
50
+ "type": "string",
51
+ "description": "Compact session-start payload used for progressive disclosure."
52
+ },
53
+ "detail": {
54
+ "type": "string",
55
+ "description": "Expanded observation text fetched on demand."
56
+ },
57
+ "tags": {
58
+ "type": "array",
59
+ "items": "string",
60
+ "description": "Optional normalized tags for query refinement."
61
+ },
62
+ "privacy": {
63
+ "type": "object",
64
+ "requiredFields": ["level", "redactionApplied"],
65
+ "fieldDefinitions": {
66
+ "level": {
67
+ "type": "string",
68
+ "allowedValues": ["public", "internal", "restricted"],
69
+ "description": "Privacy classification level."
70
+ },
71
+ "redactionApplied": {
72
+ "type": "boolean",
73
+ "description": "Indicates whether privacy sanitization modified payload content."
74
+ },
75
+ "redactionReasons": {
76
+ "type": "array",
77
+ "items": "string",
78
+ "description": "Redaction reason tags such as private-tag or token-like-value."
79
+ }
80
+ }
81
+ }
82
+ },
83
+ "retrievalContract": {
84
+ "sessionStartPayload": ["id", "adapterId", "eventType", "timestamp", "title", "summary"],
85
+ "onDemandPayload": ["detail", "tags", "privacy"],
86
+ "progressiveDisclosure": true
87
+ }
88
+ }
package/.cursorrules CHANGED
@@ -1,6 +1,6 @@
1
1
  # AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
2
2
 
3
- Generated by Agentic-Senior-Core CLI v2.0.26
3
+ Generated by Agentic-Senior-Core CLI v2.0.27
4
4
  Timestamp: 2026-04-15T00:14:51.184Z
5
5
  Selected profile: beginner
6
6
  Selected policy file: .agent-context/policies/llm-judge-threshold.json
package/.windsurfrules CHANGED
@@ -1,6 +1,6 @@
1
1
  # AGENTIC-SENIOR-CORE DYNAMIC GOVERNANCE RULESET
2
2
 
3
- Generated by Agentic-Senior-Core CLI v2.0.26
3
+ Generated by Agentic-Senior-Core CLI v2.0.27
4
4
  Timestamp: 2026-04-15T00:14:51.184Z
5
5
  Selected profile: beginner
6
6
  Selected policy file: .agent-context/policies/llm-judge-threshold.json
package/README.md CHANGED
@@ -294,6 +294,27 @@ For CI pipelines that only need stdout JSON:
294
294
  node ./scripts/benchmark-writer-judge-matrix.mjs --stdout-only
295
295
  ```
296
296
 
297
+ ### Memory Continuity Benchmark (V2.5.3 Pilot)
298
+
299
+ Validate cross-agent memory hydration quality, privacy redaction safety, and session-start token savings:
300
+
301
+ ```bash
302
+ npm run benchmark:continuity
303
+ ```
304
+
305
+ This command writes:
306
+ - `.agent-context/state/memory-continuity-benchmark.json`
307
+
308
+ Reference artifacts:
309
+ - `.agent-context/state/memory-schema-v1.json`
310
+ - `.agent-context/state/memory-adapter-contract.json`
311
+
312
+ For CI pipelines that only need stdout JSON:
313
+
314
+ ```bash
315
+ node ./scripts/memory-continuity-benchmark.mjs --stdout-only
316
+ ```
317
+
297
318
  ### Benchmark Quickstart Path (V2.5)
298
319
 
299
320
  For new users, run this minimal sequence first:
@@ -302,6 +323,7 @@ For new users, run this minimal sequence first:
302
323
  npm run benchmark:detection
303
324
  npm run benchmark:writer-judge
304
325
  npm run benchmark:bundle
326
+ npm run benchmark:continuity
305
327
  ```
306
328
 
307
329
  This gives a fast baseline of accuracy, writer-judge comparison, and evidence packaging in one pass.