@ryuenn3123/agentic-senior-core 3.0.49 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/.agent-context/prompts/bootstrap-design.md +2 -1
  2. package/.agent-context/review-checklists/pr-checklist.md +1 -0
  3. package/.agent-context/rules/api-docs.md +63 -45
  4. package/.agent-context/rules/architecture.md +133 -118
  5. package/.agent-context/rules/database-design.md +36 -16
  6. package/.agent-context/rules/docker-runtime.md +66 -43
  7. package/.agent-context/rules/efficiency-vs-hype.md +38 -17
  8. package/.agent-context/rules/error-handling.md +35 -14
  9. package/.agent-context/rules/event-driven.md +35 -18
  10. package/.agent-context/rules/frontend-architecture.md +103 -74
  11. package/.agent-context/rules/git-workflow.md +81 -197
  12. package/.agent-context/rules/microservices.md +42 -41
  13. package/.agent-context/rules/naming-conv.md +27 -6
  14. package/.agent-context/rules/performance.md +32 -10
  15. package/.agent-context/rules/realtime.md +26 -9
  16. package/.agent-context/rules/security.md +39 -19
  17. package/.agent-context/rules/testing.md +36 -15
  18. package/AGENTS.md +9 -9
  19. package/README.md +10 -1
  20. package/lib/cli/commands/init.mjs +1 -0
  21. package/lib/cli/compiler.mjs +1 -0
  22. package/lib/cli/detector/constants.mjs +135 -0
  23. package/lib/cli/detector/design-evidence/collector.mjs +256 -0
  24. package/lib/cli/detector/design-evidence/constants.mjs +39 -0
  25. package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
  26. package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
  27. package/lib/cli/detector/design-evidence/summary.mjs +109 -0
  28. package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
  29. package/lib/cli/detector/design-evidence.mjs +25 -610
  30. package/lib/cli/detector/stack-detection.mjs +243 -0
  31. package/lib/cli/detector/ui-signals.mjs +150 -0
  32. package/lib/cli/detector/workspace-scan.mjs +177 -0
  33. package/lib/cli/detector.mjs +20 -688
  34. package/lib/cli/memory-continuity.mjs +1 -0
  35. package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
  36. package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +116 -0
  37. package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
  38. package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
  39. package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
  40. package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +222 -0
  41. package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
  42. package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
  43. package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
  44. package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
  45. package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
  46. package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
  47. package/lib/cli/project-scaffolder/design-contract/validation.mjs +59 -896
  48. package/lib/cli/project-scaffolder/design-contract.mjs +147 -557
  49. package/mcp.json +30 -9
  50. package/package.json +17 -2
  51. package/scripts/audit-cache-layer-contract.mjs +258 -0
  52. package/scripts/audit-caching-scope-hygiene.mjs +263 -0
  53. package/scripts/audit-file-size.mjs +219 -0
  54. package/scripts/audit-reflection-citations.mjs +163 -0
  55. package/scripts/audit-release-bundle.mjs +170 -0
  56. package/scripts/audit-rule-id-uniqueness.mjs +313 -0
  57. package/scripts/benchmark-evidence-bundle.mjs +1 -0
  58. package/scripts/build-release-benchmark-bundle.mjs +204 -0
  59. package/scripts/context-triggered-audit.mjs +1 -0
  60. package/scripts/documentation-boundary-audit.mjs +1 -0
  61. package/scripts/explain-on-demand-audit.mjs +2 -1
  62. package/scripts/frontend-usability-audit.mjs +10 -10
  63. package/scripts/llm-judge/checklist-loader.mjs +45 -0
  64. package/scripts/llm-judge/constants.mjs +66 -0
  65. package/scripts/llm-judge/diff-collection.mjs +74 -0
  66. package/scripts/llm-judge/prompting.mjs +78 -0
  67. package/scripts/llm-judge/providers.mjs +111 -0
  68. package/scripts/llm-judge/verdict.mjs +134 -0
  69. package/scripts/llm-judge.mjs +21 -482
  70. package/scripts/mcp-server/tool-registry.mjs +55 -0
  71. package/scripts/mcp-server/tools.mjs +137 -1
  72. package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
  73. package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
  74. package/scripts/migrate-rule-format/render-new.mjs +169 -0
  75. package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
  76. package/scripts/migrate-rule-format.mjs +192 -0
  77. package/scripts/release-gate/constants.mjs +1 -1
  78. package/scripts/release-gate/static-checks.mjs +1 -1
  79. package/scripts/rules-guardian-audit.mjs +5 -2
  80. package/scripts/single-source-lazy-loading-audit.mjs +2 -1
  81. package/scripts/ui-design-judge/git-input.mjs +3 -0
  82. package/scripts/validate/config.mjs +3 -2
  83. package/scripts/validate/coverage-checks.mjs +1 -1
  84. package/scripts/validate.mjs +93 -1
package/mcp.json CHANGED
@@ -1,25 +1,36 @@
1
1
  {
2
2
  "version": "1.1",
3
3
  "name": "agentic-senior-core",
4
- "description": "MCP configuration for governance-aware diagnostics, scoped rule loading, and self-healing workflows with dynamic knowledge injection.",
4
+ "description": "MCP configuration for governance-aware diagnostics and scoped rule loading. Five knowledge layers ship as concrete file-backed surfaces today; four additional layers are declared as planned dynamic surfaces. See docs/architecture-vision.md for the roadmap.",
5
5
  "knowledgeLayers": {
6
6
  "enabled": true,
7
- "description": "9-layer dynamic knowledge injection for AI agents",
7
+ "description": "Knowledge layer registry. 5 layers are file-backed and active today (rules, prompts, state, policies, project-context). 4 layers are reserved as planned dynamic surfaces (stack-strategies, architecture-playbooks, execution-contracts, governance-modes) and resolve to no-op until implemented in a later phase.",
8
+ "implementedLayerCount": 5,
9
+ "plannedLayerCount": 4,
10
+ "totalDeclaredLayers": 9,
11
+ "roadmapReference": "docs/architecture-vision.md",
8
12
  "layers": {
9
13
  "rules": {
10
14
  "path": ".agent-context/rules",
11
15
  "count": 15,
12
- "autoLoad": true
16
+ "autoLoad": true,
17
+ "status": "implemented"
13
18
  },
14
19
  "stack-strategies": {
15
20
  "path": "dynamic",
16
21
  "autoLoad": true,
17
- "type": "virtual"
22
+ "type": "virtual",
23
+ "status": "planned",
24
+ "plannedPhase": "phase-2-or-later",
25
+ "rationale": "Reserved for runtime-decision-signal cache derived from detected project stack."
18
26
  },
19
27
  "architecture-playbooks": {
20
28
  "path": "dynamic",
21
29
  "autoLoad": true,
22
- "type": "virtual"
30
+ "type": "virtual",
31
+ "status": "planned",
32
+ "plannedPhase": "phase-2-or-later",
33
+ "rationale": "Reserved for structural-planning playbooks derived from repo evidence."
23
34
  },
24
35
  "execution-contracts": {
25
36
  "path": "dynamic",
@@ -29,12 +40,16 @@
29
40
  "review-checklists",
30
41
  "policies"
31
42
  ],
32
- "type": "virtual"
43
+ "type": "virtual",
44
+ "status": "planned",
45
+ "plannedPhase": "phase-3-anti-halu",
46
+ "rationale": "Reserved for active-contract resolution. Today the agent reads prompts, review-checklists, and policies directly from their file-backed layers; the dedicated execution-contract resolver is planned for the validation MCP tools in Phase 3."
33
47
  },
34
48
  "prompts": {
35
49
  "path": ".agent-context/prompts",
36
50
  "count": 4,
37
51
  "autoLoad": true,
52
+ "status": "implemented",
38
53
  "templates": [
39
54
  "init-project",
40
55
  "bootstrap-design",
@@ -50,22 +65,28 @@
50
65
  "regulated",
51
66
  "startup"
52
67
  ],
53
- "type": "virtual"
68
+ "type": "virtual",
69
+ "status": "planned",
70
+ "plannedPhase": "phase-2-or-later",
71
+ "rationale": "Reserved for governance-mode selection metadata. Profile selection lives in policy files today and the mode-resolver is planned."
54
72
  },
55
73
  "state": {
56
74
  "path": ".agent-context/state",
57
75
  "count": 22,
58
- "autoLoad": true
76
+ "autoLoad": true,
77
+ "status": "implemented"
59
78
  },
60
79
  "policies": {
61
80
  "path": ".agent-context/policies",
62
81
  "count": 1,
63
- "autoLoad": true
82
+ "autoLoad": true,
83
+ "status": "implemented"
64
84
  },
65
85
  "project-context": {
66
86
  "path": "docs",
67
87
  "count": 0,
68
88
  "autoLoad": false,
89
+ "status": "implemented",
69
90
  "sources": [
70
91
  "project-brief",
71
92
  "architecture-decision-record",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ryuenn3123/agentic-senior-core",
3
- "version": "3.0.49",
3
+ "version": "4.0.0",
4
4
  "type": "module",
5
5
  "description": "Force your AI Agent to code like a Staff Engineer, not a Junior.",
6
6
  "bin": {
@@ -56,7 +56,14 @@
56
56
  "audit:rules-guardian": "node ./scripts/rules-guardian-audit.mjs",
57
57
  "audit:explain-on-demand": "node ./scripts/explain-on-demand-audit.mjs",
58
58
  "audit:single-source-lazy-loading": "node ./scripts/single-source-lazy-loading-audit.mjs",
59
+ "audit:cache-layer-contract": "node ./scripts/audit-cache-layer-contract.mjs",
60
+ "audit:reflection-citations": "node ./scripts/audit-reflection-citations.mjs",
61
+ "audit:caching-scope-hygiene": "node ./scripts/audit-caching-scope-hygiene.mjs",
62
+ "audit:release-bundle": "node ./scripts/audit-release-bundle.mjs",
63
+ "audit:file-size": "node ./scripts/audit-file-size.mjs",
64
+ "audit:rule-id-uniqueness": "node ./scripts/audit-rule-id-uniqueness.mjs",
59
65
  "audit:v3-purge": "node ./scripts/v3-purge-audit.mjs",
66
+ "build:release-bundle": "node ./scripts/build-release-benchmark-bundle.mjs",
60
67
  "sync:adapters": "node ./scripts/sync-thin-adapters.mjs",
61
68
  "check:adapters": "node ./scripts/sync-thin-adapters.mjs --check",
62
69
  "gate:release": "node ./scripts/release-gate.mjs && node ./scripts/forbidden-content-check.mjs",
@@ -69,11 +76,19 @@
69
76
  "benchmark:gate": "node ./scripts/benchmark-gate.mjs",
70
77
  "benchmark:intelligence": "node ./scripts/benchmark-intelligence.mjs",
71
78
  "benchmark:continuity": "node ./scripts/memory-continuity-benchmark.mjs",
79
+ "benchmark:cache-phase-2": "node ./benchmarks/token-usage/run-cache-simulation.mjs",
80
+ "benchmark:anti-halu": "node ./benchmarks/anti-halu/run-benchmark.mjs",
72
81
  "report:quality-trend": "node ./scripts/quality-trend-report.mjs",
73
82
  "report:docs-quality-drift": "node ./scripts/docs-quality-drift-report.mjs",
74
83
  "report:governance-weekly": "node ./scripts/governance-weekly-report.mjs",
75
84
  "clean:local": "node ./scripts/clean-local-artifacts.mjs",
76
85
  "validate": "node ./scripts/validate.mjs",
77
- "test": "node --test ./tests/cli-smoke.test.mjs ./tests/mcp-server.test.mjs ./tests/llm-judge.test.mjs ./tests/ui-rubric-calibration.test.mjs ./tests/operations.test.mjs ./tests/knowledge-injection.test.mjs"
86
+ "test": "node --test ./tests/cli-smoke.test.mjs ./tests/mcp-server.test.mjs ./tests/llm-judge.test.mjs ./tests/ui-rubric-calibration.test.mjs ./tests/operations.test.mjs ./tests/knowledge-injection.test.mjs ./tests/migrate-rule-format.test.mjs ./tests/audit-caching-scope-hygiene.test.mjs ./benchmarks/token-usage/lib/token-counter.test.mjs ./benchmarks/token-usage/lib/provider-cache-matrix.test.mjs ./benchmarks/token-usage/lib/cache-layer-contract.test.mjs ./benchmarks/token-usage/lib/cache-economics.test.mjs"
87
+ },
88
+ "devDependencies": {
89
+ "@anthropic-ai/sdk": "^0.96.0",
90
+ "@google/genai": "^2.3.0",
91
+ "tiktoken": "^1.0.22",
92
+ "yaml": "^2.9.0"
78
93
  }
79
94
  }
@@ -0,0 +1,258 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * audit-cache-layer-contract.mjs
4
+ *
5
+ * Phase 2 cache-layer integrity gate. Validates provider cache metadata,
6
+ * fixture segmentation, and the emitted cache simulation JSON without calling
7
+ * provider APIs.
8
+ */
9
+
10
+ import { existsSync, readFileSync } from 'node:fs';
11
+ import { dirname, join, resolve } from 'node:path';
12
+ import { fileURLToPath } from 'node:url';
13
+
14
+ import {
15
+ CACHE_LAYER_DEFINITIONS,
16
+ CACHE_LAYER_IDS,
17
+ validateCacheLayerContract,
18
+ } from '../benchmarks/token-usage/lib/cache-layer-contract.mjs';
19
+ import {
20
+ CACHE_MATRIX_VERIFIED_AT,
21
+ PROVIDER_CACHE_MATRIX,
22
+ listProviderCacheEntries,
23
+ } from '../benchmarks/token-usage/lib/provider-cache-matrix.mjs';
24
+ import {
25
+ buildCacheLayeredScenarioPrompts,
26
+ loadFixtures,
27
+ } from '../benchmarks/token-usage/runners/_shared.mjs';
28
+
29
+ const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
30
+ const REPOSITORY_ROOT = resolve(dirname(SCRIPT_FILE_PATH), '..');
31
+ const DEFAULT_RESULT_PATH = join(REPOSITORY_ROOT, 'benchmarks', 'results', 'cache-phase-2-2026-05-16.json');
32
+ const ARGS = new Set(process.argv.slice(2));
33
+ const JSON_ONLY = ARGS.has('--json');
34
+
35
+ const REQUIRED_PROVIDER_KEYS = [
36
+ 'provider',
37
+ 'sourceUrl',
38
+ 'sourceType',
39
+ 'verifiedAt',
40
+ 'cacheMode',
41
+ 'minimumCacheableTokens',
42
+ 'costModel',
43
+ ];
44
+
45
+ function addViolation(violations, kind, detail, context = {}) {
46
+ violations.push({ kind, detail, ...context });
47
+ }
48
+
49
+ function validateProviderMatrix(violations) {
50
+ for (const entry of listProviderCacheEntries()) {
51
+ for (const key of REQUIRED_PROVIDER_KEYS) {
52
+ if (!(key in entry)) {
53
+ addViolation(violations, 'provider-metadata.missing-key', `${entry.provider} missing ${key}`, { provider: entry.provider });
54
+ }
55
+ }
56
+
57
+ if (entry.sourceType === 'official-docs') {
58
+ if (typeof entry.sourceUrl !== 'string' || !entry.sourceUrl.startsWith('https://')) {
59
+ addViolation(violations, 'provider-metadata.invalid-source-url', `${entry.provider} official sourceUrl must be https`, { provider: entry.provider });
60
+ }
61
+ if (entry.verifiedAt !== CACHE_MATRIX_VERIFIED_AT) {
62
+ addViolation(violations, 'provider-metadata.invalid-verified-at', `${entry.provider} verifiedAt must be ${CACHE_MATRIX_VERIFIED_AT}`, { provider: entry.provider });
63
+ }
64
+ }
65
+
66
+ if (entry.provider === 'anthropic') {
67
+ const ttlOptions = entry.costModel?.ttlOptions;
68
+ if (ttlOptions?.['5m']?.writeMultiplier !== 1.25 || ttlOptions?.['5m']?.readMultiplier !== 0.1) {
69
+ addViolation(violations, 'provider-metadata.anthropic-5m-multiplier', 'Anthropic 5m cache multipliers drifted');
70
+ }
71
+ if (ttlOptions?.['1h']?.writeMultiplier !== 2.0 || ttlOptions?.['1h']?.readMultiplier !== 0.1) {
72
+ addViolation(violations, 'provider-metadata.anthropic-1h-multiplier', 'Anthropic 1h cache multipliers drifted');
73
+ }
74
+ }
75
+
76
+ if (['openai', 'gemini'].includes(entry.provider) && entry.costModel?.accurate !== false) {
77
+ addViolation(violations, 'provider-metadata.fake-universal-pricing', `${entry.provider} must not claim universal accurate pricing`, { provider: entry.provider });
78
+ }
79
+ }
80
+ }
81
+
82
+ function validateLayerDefinitions(violations) {
83
+ const definitionIds = Object.keys(CACHE_LAYER_DEFINITIONS);
84
+ const uniqueDefinitionIds = new Set(definitionIds);
85
+ if (definitionIds.length !== uniqueDefinitionIds.size) {
86
+ addViolation(violations, 'layer-definition.duplicate-id', 'Cache layer definition IDs must be unique');
87
+ }
88
+
89
+ const expectedIds = [
90
+ CACHE_LAYER_IDS.STATIC_PREFIX,
91
+ CACHE_LAYER_IDS.SEMI_STATIC_CONTEXT,
92
+ CACHE_LAYER_IDS.DYNAMIC_SUFFIX,
93
+ ];
94
+ if (JSON.stringify(definitionIds) !== JSON.stringify(expectedIds)) {
95
+ addViolation(violations, 'layer-definition.unexpected-order', `Expected ${expectedIds.join(', ')}, got ${definitionIds.join(', ')}`);
96
+ }
97
+ }
98
+
99
+ function validateFixtureSegmentation(violations) {
100
+ const fixtures = loadFixtures();
101
+ let auditedScenarioCount = 0;
102
+ for (const fixture of fixtures) {
103
+ const layered = buildCacheLayeredScenarioPrompts(fixture);
104
+ const scenarios = [
105
+ { name: 'always_included', contract: layered.alwaysIncluded },
106
+ { name: 'with_loaded_rules', contract: layered.withLoadedRules },
107
+ ];
108
+
109
+ for (const scenario of scenarios) {
110
+ auditedScenarioCount += 1;
111
+ try {
112
+ validateCacheLayerContract(scenario.contract);
113
+ } catch (error) {
114
+ addViolation(violations, 'fixture-segmentation.invalid-contract', error.message, {
115
+ fixture_id: fixture.id,
116
+ scenario: scenario.name,
117
+ });
118
+ }
119
+
120
+ const layerIds = scenario.contract.layers.map((layer) => layer.id);
121
+ if (new Set(layerIds).size !== layerIds.length) {
122
+ addViolation(violations, 'fixture-segmentation.duplicate-layer-id', 'Fixture contract contains duplicate layer IDs', {
123
+ fixture_id: fixture.id,
124
+ scenario: scenario.name,
125
+ });
126
+ }
127
+
128
+ for (const layer of [
129
+ scenario.contract.layer_1_static_prefix,
130
+ scenario.contract.layer_2_semi_static_context,
131
+ ]) {
132
+ if (layer.content.includes(fixture.user_message)) {
133
+ addViolation(violations, 'fixture-segmentation.dynamic-leak', `Fixture user message leaked into ${layer.id}`, {
134
+ fixture_id: fixture.id,
135
+ scenario: scenario.name,
136
+ });
137
+ }
138
+ }
139
+
140
+ if (scenario.contract.layer_3_dynamic_suffix.content.trim().length === 0) {
141
+ addViolation(violations, 'fixture-segmentation.missing-dynamic-layer', 'Layer 3 dynamic suffix is empty', {
142
+ fixture_id: fixture.id,
143
+ scenario: scenario.name,
144
+ });
145
+ }
146
+ }
147
+ }
148
+ return { fixtureCount: fixtures.length, auditedScenarioCount };
149
+ }
150
+
151
+ function validateSimulationResultJson(violations, resultPath = DEFAULT_RESULT_PATH) {
152
+ if (!existsSync(resultPath)) {
153
+ addViolation(violations, 'result-json.missing', `Missing cache simulation result: ${resultPath}`);
154
+ return { resultPath, resultCount: 0 };
155
+ }
156
+
157
+ const parsed = JSON.parse(readFileSync(resultPath, 'utf8'));
158
+ if (parsed.report_version !== '2.0.0') {
159
+ addViolation(violations, 'result-json.schema-version', `Unexpected report_version ${parsed.report_version}`);
160
+ }
161
+ if (!Array.isArray(parsed.results) || parsed.results.length === 0) {
162
+ addViolation(violations, 'result-json.results-empty', 'results must be a non-empty array');
163
+ return { resultPath, resultCount: 0 };
164
+ }
165
+
166
+ const expectedResultCount = parsed.fixture_count * parsed.provider_count * parsed.scenario_count;
167
+ if (parsed.results.length !== expectedResultCount) {
168
+ addViolation(violations, 'result-json.result-count', `Expected ${expectedResultCount} rows, got ${parsed.results.length}`);
169
+ }
170
+
171
+ for (const result of parsed.results) {
172
+ if (!result.token_counts || !result.economic_projection) {
173
+ addViolation(violations, 'result-json.missing-separation', 'Result must separate token_counts and economic_projection', {
174
+ fixture_id: result.fixture_id,
175
+ provider: result.provider,
176
+ scenario: result.scenario,
177
+ });
178
+ continue;
179
+ }
180
+ if (typeof result.token_counts.layer_3_dynamic_suffix !== 'number' || result.token_counts.layer_3_dynamic_suffix <= 0) {
181
+ addViolation(violations, 'result-json.missing-layer-3-tokens', 'Layer 3 token count must be present and positive', {
182
+ fixture_id: result.fixture_id,
183
+ provider: result.provider,
184
+ scenario: result.scenario,
185
+ });
186
+ }
187
+ if (result.economic_projection.accurate === true && (!result.source?.sourceUrl || !result.source?.verifiedAt)) {
188
+ addViolation(violations, 'result-json.accurate-without-source', 'Accurate projection requires official source metadata', {
189
+ fixture_id: result.fixture_id,
190
+ provider: result.provider,
191
+ scenario: result.scenario,
192
+ });
193
+ }
194
+ if (['openai', 'gemini'].includes(result.provider) && result.economic_projection.first_request_effective_tokens !== null) {
195
+ addViolation(violations, 'result-json.fake-openai-gemini-savings', `${result.provider} must not emit exact savings without pricing metadata`, {
196
+ fixture_id: result.fixture_id,
197
+ scenario: result.scenario,
198
+ });
199
+ }
200
+ }
201
+
202
+ return { resultPath, resultCount: parsed.results.length };
203
+ }
204
+
205
+ export function runCacheLayerContractAudit({ resultPath = DEFAULT_RESULT_PATH } = {}) {
206
+ const violations = [];
207
+ validateProviderMatrix(violations);
208
+ validateLayerDefinitions(violations);
209
+ const segmentationStats = validateFixtureSegmentation(violations);
210
+ const resultStats = validateSimulationResultJson(violations, resultPath);
211
+
212
+ return {
213
+ auditName: 'audit-cache-layer-contract',
214
+ reportVersion: '1.0.0',
215
+ generatedAt: new Date().toISOString(),
216
+ providerCount: Object.keys(PROVIDER_CACHE_MATRIX).length,
217
+ ...segmentationStats,
218
+ ...resultStats,
219
+ violationCount: violations.length,
220
+ violations,
221
+ passed: violations.length === 0,
222
+ };
223
+ }
224
+
225
+ function main() {
226
+ const report = runCacheLayerContractAudit();
227
+
228
+ if (JSON_ONLY) {
229
+ process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
230
+ process.exit(report.passed ? 0 : 1);
231
+ }
232
+
233
+ console.log('===============================================');
234
+ console.log(' audit:cache-layer-contract');
235
+ console.log('===============================================');
236
+ console.log(` Providers: ${report.providerCount}`);
237
+ console.log(` Fixtures: ${report.fixtureCount}`);
238
+ console.log(` Layered scenarios: ${report.auditedScenarioCount}`);
239
+ console.log(` Result rows: ${report.resultCount}`);
240
+ console.log('');
241
+
242
+ if (report.passed) {
243
+ console.log(' Cache layer contract audit clean.');
244
+ process.stderr.write(`AUDIT_CACHE_LAYER_REPORT: ${JSON.stringify({ passed: true, providerCount: report.providerCount, resultCount: report.resultCount })}\n`);
245
+ process.exit(0);
246
+ }
247
+
248
+ console.log(' Violations:');
249
+ for (const violation of report.violations) {
250
+ console.log(` [${violation.kind}] ${violation.detail}`);
251
+ }
252
+ process.stderr.write(`AUDIT_CACHE_LAYER_REPORT: ${JSON.stringify({ passed: false, violationCount: report.violationCount })}\n`);
253
+ process.exit(1);
254
+ }
255
+
256
+ if (import.meta.url === `file://${process.argv[1].replace(/\\/g, '/')}` || process.argv[1].endsWith('audit-cache-layer-contract.mjs')) {
257
+ main();
258
+ }
@@ -0,0 +1,263 @@
1
+ #!/usr/bin/env node
2
+ // @ts-check
3
+
4
+ /**
5
+ * audit-caching-scope-hygiene.mjs
6
+ *
7
+ * Phase 5 drift catcher. Scans user-facing surfaces (README, AGENTS.md, FAQ,
8
+ * integration playbook, CHANGELOG) for caching numerical claims and verifies
9
+ * that each claim is integration-scoped per `docs/plan/research-foundation.md`
10
+ * D4 "Per-Tool Caching Scope Matrix".
11
+ *
12
+ * The rule: never publish a single universal "X% caching saving" figure that
13
+ * mixes integration modes. Every numerical caching saving claim on a public
14
+ * surface must either:
15
+ * 1. be in a clearly-scoped paragraph that names the integration mode
16
+ * (direct API, Claude Code SDK programmatic, Cursor, Windsurf, Codex CLI,
17
+ * Kiro, IDE wrapper) within +/- 600 characters of the figure, OR
18
+ * 2. live in a documented exempt context (Phase 1 aggregate-cap CHANGELOG
19
+ * rationale, plan files under docs/plan/, benchmark JSON under
20
+ * benchmarks/results/, the canonical D4 matrix itself).
21
+ */
22
+
23
+ import { existsSync, readFileSync } from 'node:fs';
24
+ import { dirname, join, resolve } from 'node:path';
25
+ import { fileURLToPath } from 'node:url';
26
+
27
+ const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
28
+ const REPOSITORY_ROOT = resolve(dirname(SCRIPT_FILE_PATH), '..');
29
+ const ARGS = new Set(process.argv.slice(2));
30
+ const JSON_ONLY = ARGS.has('--json');
31
+
32
+ const PUBLIC_SURFACES = [
33
+ 'README.md',
34
+ 'AGENTS.md',
35
+ 'docs/faq.md',
36
+ 'docs/integration-playbook.md',
37
+ 'docs/doc-index.md',
38
+ 'CHANGELOG.md',
39
+ ];
40
+
41
+ // Numerical caching saving claims this audit scans for. Pattern is intentionally
42
+ // strict: a digit-prefixed percent paired with an action verb or saving noun,
43
+ // or a bare 89.31%-style figure within a cache-keyword window.
44
+ const SAVING_CLAIM_PATTERNS = [
45
+ // "X% reduction|saving|off|cheaper"
46
+ /\b(\d{1,3}(?:\.\d+)?)\s*%\s*(?:effective[- ]token\s+)?(?:reduction|saving|savings|off|cheaper)\b/gi,
47
+ // "saves|cuts|reduces|delivers up to X%" (cache context check applied below)
48
+ /\b(?:save[sd]?|cut[s]?|reduce[sd]?|deliver[sd]?)\s+(?:up\s+to\s+)?(\d{1,3}(?:\.\d+)?)\s*%/gi,
49
+ // "up to X% ... cache"
50
+ /\b(?:up to|approximately|about|~)\s*(\d{1,3}(?:\.\d+)?)\s*%[^.\n]{0,80}(?:cach|warm|prompt[- ]cach)/gi,
51
+ // "cache ... X% reduction|saving"
52
+ /\bcach[a-z]*[^.\n]{0,80}\b(\d{1,3}(?:\.\d+)?)\s*%\s*(?:reduction|saving|savings|off)/gi,
53
+ // bare two-decimal figures like 89.31% (cache context check applied below)
54
+ /\b(\d{2,3}\.\d{2})\s*%/g,
55
+ ];
56
+
57
+ // Patterns at these indices apply a cache-context window check before counting.
58
+ const CONTEXT_GATED_PATTERN_INDEXES = new Set([1, 4]);
59
+
60
+ const INTEGRATION_MODE_KEYWORDS = [
61
+ 'direct provider api',
62
+ 'direct api',
63
+ 'direct anthropic',
64
+ 'direct openai',
65
+ 'direct gemini',
66
+ 'claude code sdk',
67
+ 'claude code cli',
68
+ 'cursor',
69
+ 'windsurf',
70
+ 'codex cli',
71
+ 'codex / openai',
72
+ 'kiro',
73
+ 'ide wrapper',
74
+ 'ide wrappers',
75
+ 'integration mode',
76
+ 'integration_mode',
77
+ 'per-tool caching',
78
+ 'per-integration',
79
+ 'per integration',
80
+ 'cache_control',
81
+ ];
82
+
83
+ const CACHE_CONTEXT_KEYWORDS = [
84
+ 'cach',
85
+ 'warm',
86
+ 'prompt-cach',
87
+ 'prompt cach',
88
+ 'cache_control',
89
+ ];
90
+
91
+ const CONTEXT_WINDOW_RADIUS = 600;
92
+
93
+ function readSurface(rootDir, relativePath, sourceOverrides) {
94
+ if (sourceOverrides && Object.prototype.hasOwnProperty.call(sourceOverrides, relativePath)) {
95
+ return String(sourceOverrides[relativePath]);
96
+ }
97
+ const absolutePath = join(rootDir, relativePath);
98
+ if (!existsSync(absolutePath)) {
99
+ return null;
100
+ }
101
+ return readFileSync(absolutePath, 'utf8');
102
+ }
103
+
104
+ function findCachingClaimMatches(sourceText) {
105
+ /** @type {{index: number, matchedText: string, percent: string}[]} */
106
+ const matches = [];
107
+ for (let patternIndex = 0; patternIndex < SAVING_CLAIM_PATTERNS.length; patternIndex += 1) {
108
+ const pattern = SAVING_CLAIM_PATTERNS[patternIndex];
109
+ pattern.lastIndex = 0;
110
+ let result;
111
+ // eslint-disable-next-line no-cond-assign
112
+ while ((result = pattern.exec(sourceText)) !== null) {
113
+ const matchedText = result[0];
114
+ const percent = result[1] || '';
115
+ const index = result.index;
116
+
117
+ // For context-gated patterns, confirm there is a cache keyword within
118
+ // the context window before counting this as a caching claim.
119
+ if (CONTEXT_GATED_PATTERN_INDEXES.has(patternIndex)) {
120
+ const start = Math.max(0, index - CONTEXT_WINDOW_RADIUS);
121
+ const end = Math.min(sourceText.length, index + matchedText.length + CONTEXT_WINDOW_RADIUS);
122
+ const window = sourceText.slice(start, end).toLowerCase();
123
+ const hasCacheContext = CACHE_CONTEXT_KEYWORDS.some((keyword) => window.includes(keyword));
124
+ if (!hasCacheContext) {
125
+ continue;
126
+ }
127
+ }
128
+
129
+ matches.push({ index, matchedText, percent });
130
+ }
131
+ }
132
+
133
+ // Deduplicate overlapping matches (same percent within a few chars).
134
+ matches.sort((a, b) => a.index - b.index);
135
+ /** @type {{index: number, matchedText: string, percent: string}[]} */
136
+ const deduped = [];
137
+ for (const match of matches) {
138
+ const last = deduped[deduped.length - 1];
139
+ if (last && Math.abs(last.index - match.index) <= 8 && last.percent === match.percent) {
140
+ continue;
141
+ }
142
+ deduped.push(match);
143
+ }
144
+ return deduped;
145
+ }
146
+
147
+ function extractContextWindow(sourceText, index, matchLength) {
148
+ const start = Math.max(0, index - CONTEXT_WINDOW_RADIUS);
149
+ const end = Math.min(sourceText.length, index + matchLength + CONTEXT_WINDOW_RADIUS);
150
+ return sourceText.slice(start, end);
151
+ }
152
+
153
+ function hasIntegrationModeMarker(contextWindow) {
154
+ const normalized = contextWindow.toLowerCase();
155
+ return INTEGRATION_MODE_KEYWORDS.some((keyword) => normalized.includes(keyword));
156
+ }
157
+
158
+ function lineNumberFromIndex(sourceText, charIndex) {
159
+ let line = 1;
160
+ for (let i = 0; i < charIndex && i < sourceText.length; i += 1) {
161
+ if (sourceText[i] === '\n') {
162
+ line += 1;
163
+ }
164
+ }
165
+ return line;
166
+ }
167
+
168
+ export function runCachingScopeHygieneAudit(options = {}) {
169
+ const rootDir = options.rootDir ? resolve(String(options.rootDir)) : REPOSITORY_ROOT;
170
+ const sourceOverrides = options.sourceOverrides || null;
171
+ const surfaceList = options.surfaceList || PUBLIC_SURFACES;
172
+ const violations = [];
173
+ const surfaceReports = [];
174
+ let totalClaims = 0;
175
+
176
+ for (const surfacePath of surfaceList) {
177
+ const sourceText = readSurface(rootDir, surfacePath, sourceOverrides);
178
+ if (sourceText === null) {
179
+ continue;
180
+ }
181
+
182
+ const claims = findCachingClaimMatches(sourceText);
183
+ totalClaims += claims.length;
184
+ /** @type {{percent: string, line: number, scoped: boolean}[]} */
185
+ const claimReports = [];
186
+
187
+ for (const claim of claims) {
188
+ const contextWindow = extractContextWindow(sourceText, claim.index, claim.matchedText.length);
189
+ const scoped = hasIntegrationModeMarker(contextWindow);
190
+ const lineNumber = lineNumberFromIndex(sourceText, claim.index);
191
+
192
+ claimReports.push({
193
+ percent: claim.percent,
194
+ line: lineNumber,
195
+ scoped,
196
+ });
197
+
198
+ if (!scoped) {
199
+ violations.push({
200
+ file: surfacePath,
201
+ line: lineNumber,
202
+ kind: 'caching-claim.missing-integration-scope',
203
+ detail: `Caching saving claim "${claim.matchedText.trim()}" lacks an integration-mode marker within +/- ${CONTEXT_WINDOW_RADIUS} chars. Add a per-tool / direct-API / IDE-wrapper label, or move the figure under a clearly-scoped paragraph. Source of truth: docs/plan/research-foundation.md D4.`,
204
+ });
205
+ }
206
+ }
207
+
208
+ surfaceReports.push({
209
+ path: surfacePath,
210
+ claimCount: claims.length,
211
+ scopedCount: claimReports.filter((claim) => claim.scoped).length,
212
+ unscopedCount: claimReports.filter((claim) => !claim.scoped).length,
213
+ claims: claimReports,
214
+ });
215
+ }
216
+
217
+ return {
218
+ auditName: 'audit-caching-scope-hygiene',
219
+ reportVersion: '1.0.0',
220
+ generatedAt: new Date().toISOString(),
221
+ surfaceCount: surfaceReports.length,
222
+ totalClaimCount: totalClaims,
223
+ violationCount: violations.length,
224
+ passed: violations.length === 0,
225
+ surfaces: surfaceReports,
226
+ violations,
227
+ };
228
+ }
229
+
230
+ function main() {
231
+ const report = runCachingScopeHygieneAudit();
232
+
233
+ if (JSON_ONLY) {
234
+ process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
235
+ process.exit(report.passed ? 0 : 1);
236
+ }
237
+
238
+ console.log('===============================================');
239
+ console.log(' audit:caching-scope-hygiene');
240
+ console.log('===============================================');
241
+ console.log(` Public surfaces scanned: ${report.surfaceCount}`);
242
+ console.log(` Caching saving claims: ${report.totalClaimCount}`);
243
+ console.log('');
244
+
245
+ if (report.passed) {
246
+ console.log(' All caching saving claims on public surfaces are integration-scoped.');
247
+ process.stderr.write(`AUDIT_CACHING_SCOPE_HYGIENE_REPORT: ${JSON.stringify({ passed: true, surfaceCount: report.surfaceCount, totalClaimCount: report.totalClaimCount })}\n`);
248
+ process.exit(0);
249
+ }
250
+
251
+ console.log(' Violations:');
252
+ for (const violation of report.violations) {
253
+ console.log(` [${violation.kind}] ${violation.file}:${violation.line} ${violation.detail}`);
254
+ }
255
+ console.log('');
256
+ console.log(` ${report.violationCount} violation(s) found.`);
257
+ process.stderr.write(`AUDIT_CACHING_SCOPE_HYGIENE_REPORT: ${JSON.stringify({ passed: false, violationCount: report.violationCount })}\n`);
258
+ process.exit(1);
259
+ }
260
+
261
+ if (process.argv[1] && (import.meta.url === `file://${process.argv[1].replace(/\\/g, '/')}` || process.argv[1].endsWith('audit-caching-scope-hygiene.mjs'))) {
262
+ main();
263
+ }