@ryuenn3123/agentic-senior-core 3.0.50 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/.agent-context/prompts/bootstrap-design.md +3 -1
  2. package/.agent-context/prompts/research-design.md +165 -0
  3. package/.agent-context/review-checklists/pr-checklist.md +1 -0
  4. package/.agent-context/rules/api-docs.md +63 -47
  5. package/.agent-context/rules/architecture.md +133 -120
  6. package/.agent-context/rules/database-design.md +36 -18
  7. package/.agent-context/rules/docker-runtime.md +66 -43
  8. package/.agent-context/rules/efficiency-vs-hype.md +38 -17
  9. package/.agent-context/rules/error-handling.md +35 -16
  10. package/.agent-context/rules/event-driven.md +35 -18
  11. package/.agent-context/rules/frontend-architecture.md +103 -76
  12. package/.agent-context/rules/git-workflow.md +81 -197
  13. package/.agent-context/rules/microservices.md +42 -41
  14. package/.agent-context/rules/naming-conv.md +27 -8
  15. package/.agent-context/rules/performance.md +32 -12
  16. package/.agent-context/rules/realtime.md +26 -9
  17. package/.agent-context/rules/security.md +39 -20
  18. package/.agent-context/rules/testing.md +36 -16
  19. package/AGENTS.md +21 -20
  20. package/README.md +10 -1
  21. package/lib/cli/commands/init.mjs +12 -0
  22. package/lib/cli/commands/upgrade.mjs +11 -0
  23. package/lib/cli/compiler.mjs +1 -0
  24. package/lib/cli/detector/constants.mjs +135 -0
  25. package/lib/cli/detector/design-evidence/collector.mjs +256 -0
  26. package/lib/cli/detector/design-evidence/constants.mjs +39 -0
  27. package/lib/cli/detector/design-evidence/file-traversal.mjs +83 -0
  28. package/lib/cli/detector/design-evidence/structured-attribute-evidence.mjs +117 -0
  29. package/lib/cli/detector/design-evidence/summary.mjs +109 -0
  30. package/lib/cli/detector/design-evidence/utility-helpers.mjs +122 -0
  31. package/lib/cli/detector/design-evidence.mjs +25 -610
  32. package/lib/cli/detector/stack-detection.mjs +243 -0
  33. package/lib/cli/detector/ui-signals.mjs +150 -0
  34. package/lib/cli/detector/workspace-scan.mjs +177 -0
  35. package/lib/cli/detector.mjs +20 -688
  36. package/lib/cli/memory-continuity.mjs +1 -0
  37. package/lib/cli/project-scaffolder/design-contract/research-dossier-migration.mjs +165 -0
  38. package/lib/cli/project-scaffolder/design-contract/sections/audits.mjs +96 -0
  39. package/lib/cli/project-scaffolder/design-contract/sections/conceptual-anchor.mjs +233 -0
  40. package/lib/cli/project-scaffolder/design-contract/sections/execution-handoff.mjs +211 -0
  41. package/lib/cli/project-scaffolder/design-contract/seed-signals.mjs +79 -0
  42. package/lib/cli/project-scaffolder/design-contract/signal-vocab.mjs +64 -0
  43. package/lib/cli/project-scaffolder/design-contract/validation/anchor-validators.mjs +456 -0
  44. package/lib/cli/project-scaffolder/design-contract/validation/audit-validators.mjs +117 -0
  45. package/lib/cli/project-scaffolder/design-contract/validation/completeness.mjs +83 -0
  46. package/lib/cli/project-scaffolder/design-contract/validation/execution-validators.mjs +328 -0
  47. package/lib/cli/project-scaffolder/design-contract/validation/helpers.mjs +8 -0
  48. package/lib/cli/project-scaffolder/design-contract/validation/research-dossier-validators.mjs +104 -0
  49. package/lib/cli/project-scaffolder/design-contract/validation/structural-validators.mjs +79 -0
  50. package/lib/cli/project-scaffolder/design-contract/validation/system-validators.mjs +256 -0
  51. package/lib/cli/project-scaffolder/design-contract/validation.mjs +61 -896
  52. package/lib/cli/project-scaffolder/design-contract.mjs +151 -556
  53. package/lib/cli/project-scaffolder/prompt-builders.mjs +9 -0
  54. package/mcp.json +30 -9
  55. package/package.json +17 -2
  56. package/scripts/audit-cache-layer-contract.mjs +258 -0
  57. package/scripts/audit-caching-scope-hygiene.mjs +263 -0
  58. package/scripts/audit-file-size.mjs +219 -0
  59. package/scripts/audit-reflection-citations.mjs +163 -0
  60. package/scripts/audit-release-bundle.mjs +170 -0
  61. package/scripts/audit-rule-id-uniqueness.mjs +313 -0
  62. package/scripts/benchmark-evidence-bundle.mjs +1 -0
  63. package/scripts/build-release-benchmark-bundle.mjs +204 -0
  64. package/scripts/context-triggered-audit.mjs +1 -0
  65. package/scripts/documentation-boundary-audit.mjs +1 -0
  66. package/scripts/explain-on-demand-audit.mjs +2 -1
  67. package/scripts/frontend-usability-audit.mjs +10 -10
  68. package/scripts/llm-judge/checklist-loader.mjs +45 -0
  69. package/scripts/llm-judge/constants.mjs +66 -0
  70. package/scripts/llm-judge/diff-collection.mjs +74 -0
  71. package/scripts/llm-judge/prompting.mjs +78 -0
  72. package/scripts/llm-judge/providers.mjs +111 -0
  73. package/scripts/llm-judge/verdict.mjs +134 -0
  74. package/scripts/llm-judge.mjs +21 -482
  75. package/scripts/mcp-server/tool-registry.mjs +55 -0
  76. package/scripts/mcp-server/tools.mjs +137 -1
  77. package/scripts/migrate-rule-format/id-prefix-table.mjs +37 -0
  78. package/scripts/migrate-rule-format/parse-legacy.mjs +180 -0
  79. package/scripts/migrate-rule-format/render-new.mjs +169 -0
  80. package/scripts/migrate-rule-format/roundtrip-validate.mjs +89 -0
  81. package/scripts/migrate-rule-format.mjs +192 -0
  82. package/scripts/release-gate/constants.mjs +1 -1
  83. package/scripts/release-gate/static-checks.mjs +1 -1
  84. package/scripts/rules-guardian-audit.mjs +5 -2
  85. package/scripts/single-source-lazy-loading-audit.mjs +2 -1
  86. package/scripts/ui-design-judge/git-input.mjs +3 -0
  87. package/scripts/validate/config.mjs +27 -2
  88. package/scripts/validate/coverage-checks.mjs +1 -1
  89. package/scripts/validate.mjs +94 -1
@@ -190,6 +190,7 @@ export function buildDesignBootstrapPrompt({
190
190
  '29. repoEvidence when onboarding or detector evidence exists',
191
191
  '',
192
192
  '## Mechanical Gates',
193
+ '0. Read `.agent-context/prompts/research-design.md` first. Sections 3 (Category Code Identification), 4 (Morphological Exploration), and 5 (Anchor Candidates) are gates: each must produce an auditable artifact before UI implementation.',
193
194
  '1. Do not copy external style guides.',
194
195
  '2. Do not anchor the final design language to famous products, benchmark visuals, or external reference surfaces.',
195
196
  '3. Do not choose final style, library, palette, typography, motion, or layout from this offline scaffold.',
@@ -238,6 +239,13 @@ export function buildDesignBootstrapPrompt({
238
239
  '40. Translate conceptual anchors non-literally first. Do not turn anchor artifacts into required chrome, decorative props, wallpaper, or theme objects unless they serve a named product function.',
239
240
  '41. Use external websites and benchmark examples as candidate evidence for constraints, mechanics, and quality bars only; do not copy layout rhythm, palette, component skin, visual metaphor, or brand posture.',
240
241
  '',
242
+ '## Research-Design Brief Gates (research-design.md)',
243
+ '42. Section 3 — Category Code Identification: list at least three category codes per product surface in `conceptualAnchor.categoryCodes.candidateEntries`. Each entry must record `description`, `specificityEvidence`, `categoryDefaultReason`, and `rejectionNote`. The specificity floor is: a reader unfamiliar with the project must be able to visualize a specific aesthetic direction from the text alone, without seeing the UI and without knowing the product name. Reject placeholder phrases like "clean typography", "modern color palette", or "smooth animations". Anti-leakage rule: listing a cliche identifies a trap, not a target; the example cliches in the brief are not aesthetic candidates for any project. If the project pattern-matches a common AI-safe default (dev-tool dark slate, AI-startup purple-pink gradient, health-app mint, SaaS admin three-card KPI, marketing-site three-tile hero), name that cliche in `candidateEntries` with an explicit rejection note.',
244
+ '43. Section 4 — Morphological Exploration: build a 5x5 or 6x5 matrix in `conceptualAnchor.morphologicalExploration`. Choose 5 or 6 dimensions and 5 values per dimension. Do not seed the matrix with the Section 3 category codes as values. Highlight the selected combination, and ALSO highlight at least one uncomfortable combination that feels instinctively wrong but can be argued with product logic. The uncomfortable combination must record `combinationLabel`, `discomfortReason`, and `productLogicJustification`. If you cannot produce an uncomfortable-but-arguable combination, the dimensions are too narrow; widen at least one and regenerate.',
245
+ '44. Section 5 — Anchor Candidates: produce exactly five entries in `conceptualAnchor.anchorCandidates.candidates`. Each candidate must record `anchorReference`, `conceptualFamily`, `jobFit`, `hierarchyImplication`, `densityImplication`, `typeImplication`, `stateLanguage`, `motionImplication`, `whatItRulesOut`, `renameTest`, and `categoryCodeOverlap`.',
246
+ '45. Strengthened rename test: rename the product to three genuinely different categories that are remote from each other and from the actual product. Pick fresh test categories per anchor; do not reuse the same triple across every anchor. Record the three test categories explicitly in the dossier so a human reviewer can audit them. Score the result: 0/3 coherent equals STRONG PASS, 1/3 equals PASS, 2/3 equals REVISE, 3/3 equals DISCARD. Never select an anchor with verdict DISCARD; revise REVISE candidates until they fail in at least 2 of 3 categories.',
247
+ '46. Make `conceptualAnchor.anchorReference` and `derivedTokenLogic.anchorReference` match the selected anchor exactly. The selected anchor must have verdict STRONG PASS or PASS.',
248
+ '',
241
249
  '## Creative Ambition Floor',
242
250
  'Before implementation, the design contract must name one authored visual bet, one product-derived palette move, one signature motion/spatial/interaction behavior, and one morphology or composition choice that would not appear in a generic AI template.',
243
251
  'The ambition floor is not a fixed aesthetic. Quiet, dense, utilitarian, or text-heavy interfaces are allowed when the product requires them, but they still need a project-specific visual decision and a real reason for omitting richer motion, 3D, canvas, WebGL, scroll choreography, or animation libraries.',
@@ -290,6 +298,7 @@ export function buildDesignBootstrapPrompt({
290
298
  '13. Preserve externalResearchIntake so user-provided research becomes reviewed evidence without turning into an offline style or dependency preset.',
291
299
  '14. Preserve conceptualAnchor so prompt-only UI work has one cohesive non-template concept instead of a mixed collection of bold but unrelated visual decisions.',
292
300
  '15. Record conceptualAnchor.agentResearchMode, specificReferencePoint, signatureMotion, typographicDecision, visualRiskBudget, motionRiskBudget, and cohesionChecks so the final UI cannot quietly fall back to a timid dashboard/admin mental model.',
301
+ '15a. Record conceptualAnchor.categoryCodes.candidateEntries (Section 3 of research-design.md), conceptualAnchor.morphologicalExploration with selectedCombination and uncomfortableCombination (Section 4), and conceptualAnchor.anchorCandidates.candidates with full renameTest results (Section 5) before UI implementation.',
293
302
  '16. Preserve derivedTokenLogic, libraryResearchStatus, and libraryDecisions so token choices and dependency uncertainty stay visible before implementation.',
294
303
  '16a. Preserve designFlexibilityPolicy so the machine contract guides consistency without freezing literal anchor artifacts, exact token primitives, or component-kit visual language.',
295
304
  '17. Preserve productionContentPolicy so UI output is ship-ready and not a testing-looking scaffold.',
package/mcp.json CHANGED
@@ -1,25 +1,36 @@
1
1
  {
2
2
  "version": "1.1",
3
3
  "name": "agentic-senior-core",
4
- "description": "MCP configuration for governance-aware diagnostics, scoped rule loading, and self-healing workflows with dynamic knowledge injection.",
4
+ "description": "MCP configuration for governance-aware diagnostics and scoped rule loading. Five knowledge layers ship as concrete file-backed surfaces today; four additional layers are declared as planned dynamic surfaces. See docs/architecture-vision.md for the roadmap.",
5
5
  "knowledgeLayers": {
6
6
  "enabled": true,
7
- "description": "9-layer dynamic knowledge injection for AI agents",
7
+ "description": "Knowledge layer registry. 5 layers are file-backed and active today (rules, prompts, state, policies, project-context). 4 layers are reserved as planned dynamic surfaces (stack-strategies, architecture-playbooks, execution-contracts, governance-modes) and resolve to no-op until implemented in a later phase.",
8
+ "implementedLayerCount": 5,
9
+ "plannedLayerCount": 4,
10
+ "totalDeclaredLayers": 9,
11
+ "roadmapReference": "docs/architecture-vision.md",
8
12
  "layers": {
9
13
  "rules": {
10
14
  "path": ".agent-context/rules",
11
15
  "count": 15,
12
- "autoLoad": true
16
+ "autoLoad": true,
17
+ "status": "implemented"
13
18
  },
14
19
  "stack-strategies": {
15
20
  "path": "dynamic",
16
21
  "autoLoad": true,
17
- "type": "virtual"
22
+ "type": "virtual",
23
+ "status": "planned",
24
+ "plannedPhase": "phase-2-or-later",
25
+ "rationale": "Reserved for runtime-decision-signal cache derived from detected project stack."
18
26
  },
19
27
  "architecture-playbooks": {
20
28
  "path": "dynamic",
21
29
  "autoLoad": true,
22
- "type": "virtual"
30
+ "type": "virtual",
31
+ "status": "planned",
32
+ "plannedPhase": "phase-2-or-later",
33
+ "rationale": "Reserved for structural-planning playbooks derived from repo evidence."
23
34
  },
24
35
  "execution-contracts": {
25
36
  "path": "dynamic",
@@ -29,12 +40,16 @@
29
40
  "review-checklists",
30
41
  "policies"
31
42
  ],
32
- "type": "virtual"
43
+ "type": "virtual",
44
+ "status": "planned",
45
+ "plannedPhase": "phase-3-anti-halu",
46
+ "rationale": "Reserved for active-contract resolution. Today the agent reads prompts, review-checklists, and policies directly from their file-backed layers; the dedicated execution-contract resolver is planned for the validation MCP tools in Phase 3."
33
47
  },
34
48
  "prompts": {
35
49
  "path": ".agent-context/prompts",
36
50
  "count": 4,
37
51
  "autoLoad": true,
52
+ "status": "implemented",
38
53
  "templates": [
39
54
  "init-project",
40
55
  "bootstrap-design",
@@ -50,22 +65,28 @@
50
65
  "regulated",
51
66
  "startup"
52
67
  ],
53
- "type": "virtual"
68
+ "type": "virtual",
69
+ "status": "planned",
70
+ "plannedPhase": "phase-2-or-later",
71
+ "rationale": "Reserved for governance-mode selection metadata. Profile selection lives in policy files today and the mode-resolver is planned."
54
72
  },
55
73
  "state": {
56
74
  "path": ".agent-context/state",
57
75
  "count": 22,
58
- "autoLoad": true
76
+ "autoLoad": true,
77
+ "status": "implemented"
59
78
  },
60
79
  "policies": {
61
80
  "path": ".agent-context/policies",
62
81
  "count": 1,
63
- "autoLoad": true
82
+ "autoLoad": true,
83
+ "status": "implemented"
64
84
  },
65
85
  "project-context": {
66
86
  "path": "docs",
67
87
  "count": 0,
68
88
  "autoLoad": false,
89
+ "status": "implemented",
69
90
  "sources": [
70
91
  "project-brief",
71
92
  "architecture-decision-record",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ryuenn3123/agentic-senior-core",
3
- "version": "3.0.50",
3
+ "version": "4.0.1",
4
4
  "type": "module",
5
5
  "description": "Force your AI Agent to code like a Staff Engineer, not a Junior.",
6
6
  "bin": {
@@ -56,7 +56,14 @@
56
56
  "audit:rules-guardian": "node ./scripts/rules-guardian-audit.mjs",
57
57
  "audit:explain-on-demand": "node ./scripts/explain-on-demand-audit.mjs",
58
58
  "audit:single-source-lazy-loading": "node ./scripts/single-source-lazy-loading-audit.mjs",
59
+ "audit:cache-layer-contract": "node ./scripts/audit-cache-layer-contract.mjs",
60
+ "audit:reflection-citations": "node ./scripts/audit-reflection-citations.mjs",
61
+ "audit:caching-scope-hygiene": "node ./scripts/audit-caching-scope-hygiene.mjs",
62
+ "audit:release-bundle": "node ./scripts/audit-release-bundle.mjs",
63
+ "audit:file-size": "node ./scripts/audit-file-size.mjs",
64
+ "audit:rule-id-uniqueness": "node ./scripts/audit-rule-id-uniqueness.mjs",
59
65
  "audit:v3-purge": "node ./scripts/v3-purge-audit.mjs",
66
+ "build:release-bundle": "node ./scripts/build-release-benchmark-bundle.mjs",
60
67
  "sync:adapters": "node ./scripts/sync-thin-adapters.mjs",
61
68
  "check:adapters": "node ./scripts/sync-thin-adapters.mjs --check",
62
69
  "gate:release": "node ./scripts/release-gate.mjs && node ./scripts/forbidden-content-check.mjs",
@@ -69,11 +76,19 @@
69
76
  "benchmark:gate": "node ./scripts/benchmark-gate.mjs",
70
77
  "benchmark:intelligence": "node ./scripts/benchmark-intelligence.mjs",
71
78
  "benchmark:continuity": "node ./scripts/memory-continuity-benchmark.mjs",
79
+ "benchmark:cache-phase-2": "node ./benchmarks/token-usage/run-cache-simulation.mjs",
80
+ "benchmark:anti-halu": "node ./benchmarks/anti-halu/run-benchmark.mjs",
72
81
  "report:quality-trend": "node ./scripts/quality-trend-report.mjs",
73
82
  "report:docs-quality-drift": "node ./scripts/docs-quality-drift-report.mjs",
74
83
  "report:governance-weekly": "node ./scripts/governance-weekly-report.mjs",
75
84
  "clean:local": "node ./scripts/clean-local-artifacts.mjs",
76
85
  "validate": "node ./scripts/validate.mjs",
77
- "test": "node --test ./tests/cli-smoke.test.mjs ./tests/mcp-server.test.mjs ./tests/llm-judge.test.mjs ./tests/ui-rubric-calibration.test.mjs ./tests/operations.test.mjs ./tests/knowledge-injection.test.mjs"
86
+ "test": "node --test ./tests/cli-smoke.test.mjs ./tests/mcp-server.test.mjs ./tests/llm-judge.test.mjs ./tests/ui-rubric-calibration.test.mjs ./tests/operations.test.mjs ./tests/knowledge-injection.test.mjs ./tests/migrate-rule-format.test.mjs ./tests/audit-caching-scope-hygiene.test.mjs ./tests/research-dossier-migration.test.mjs ./benchmarks/token-usage/lib/token-counter.test.mjs ./benchmarks/token-usage/lib/provider-cache-matrix.test.mjs ./benchmarks/token-usage/lib/cache-layer-contract.test.mjs ./benchmarks/token-usage/lib/cache-economics.test.mjs"
87
+ },
88
+ "devDependencies": {
89
+ "@anthropic-ai/sdk": "^0.96.0",
90
+ "@google/genai": "^2.3.0",
91
+ "tiktoken": "^1.0.22",
92
+ "yaml": "^2.9.0"
78
93
  }
79
94
  }
@@ -0,0 +1,258 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * audit-cache-layer-contract.mjs
4
+ *
5
+ * Phase 2 cache-layer integrity gate. Validates provider cache metadata,
6
+ * fixture segmentation, and the emitted cache simulation JSON without calling
7
+ * provider APIs.
8
+ */
9
+
10
+ import { existsSync, readFileSync } from 'node:fs';
11
+ import { dirname, join, resolve } from 'node:path';
12
+ import { fileURLToPath } from 'node:url';
13
+
14
+ import {
15
+ CACHE_LAYER_DEFINITIONS,
16
+ CACHE_LAYER_IDS,
17
+ validateCacheLayerContract,
18
+ } from '../benchmarks/token-usage/lib/cache-layer-contract.mjs';
19
+ import {
20
+ CACHE_MATRIX_VERIFIED_AT,
21
+ PROVIDER_CACHE_MATRIX,
22
+ listProviderCacheEntries,
23
+ } from '../benchmarks/token-usage/lib/provider-cache-matrix.mjs';
24
+ import {
25
+ buildCacheLayeredScenarioPrompts,
26
+ loadFixtures,
27
+ } from '../benchmarks/token-usage/runners/_shared.mjs';
28
+
29
+ const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
30
+ const REPOSITORY_ROOT = resolve(dirname(SCRIPT_FILE_PATH), '..');
31
+ const DEFAULT_RESULT_PATH = join(REPOSITORY_ROOT, 'benchmarks', 'results', 'cache-phase-2-2026-05-16.json');
32
+ const ARGS = new Set(process.argv.slice(2));
33
+ const JSON_ONLY = ARGS.has('--json');
34
+
35
+ const REQUIRED_PROVIDER_KEYS = [
36
+ 'provider',
37
+ 'sourceUrl',
38
+ 'sourceType',
39
+ 'verifiedAt',
40
+ 'cacheMode',
41
+ 'minimumCacheableTokens',
42
+ 'costModel',
43
+ ];
44
+
45
+ function addViolation(violations, kind, detail, context = {}) {
46
+ violations.push({ kind, detail, ...context });
47
+ }
48
+
49
+ function validateProviderMatrix(violations) {
50
+ for (const entry of listProviderCacheEntries()) {
51
+ for (const key of REQUIRED_PROVIDER_KEYS) {
52
+ if (!(key in entry)) {
53
+ addViolation(violations, 'provider-metadata.missing-key', `${entry.provider} missing ${key}`, { provider: entry.provider });
54
+ }
55
+ }
56
+
57
+ if (entry.sourceType === 'official-docs') {
58
+ if (typeof entry.sourceUrl !== 'string' || !entry.sourceUrl.startsWith('https://')) {
59
+ addViolation(violations, 'provider-metadata.invalid-source-url', `${entry.provider} official sourceUrl must be https`, { provider: entry.provider });
60
+ }
61
+ if (entry.verifiedAt !== CACHE_MATRIX_VERIFIED_AT) {
62
+ addViolation(violations, 'provider-metadata.invalid-verified-at', `${entry.provider} verifiedAt must be ${CACHE_MATRIX_VERIFIED_AT}`, { provider: entry.provider });
63
+ }
64
+ }
65
+
66
+ if (entry.provider === 'anthropic') {
67
+ const ttlOptions = entry.costModel?.ttlOptions;
68
+ if (ttlOptions?.['5m']?.writeMultiplier !== 1.25 || ttlOptions?.['5m']?.readMultiplier !== 0.1) {
69
+ addViolation(violations, 'provider-metadata.anthropic-5m-multiplier', 'Anthropic 5m cache multipliers drifted');
70
+ }
71
+ if (ttlOptions?.['1h']?.writeMultiplier !== 2.0 || ttlOptions?.['1h']?.readMultiplier !== 0.1) {
72
+ addViolation(violations, 'provider-metadata.anthropic-1h-multiplier', 'Anthropic 1h cache multipliers drifted');
73
+ }
74
+ }
75
+
76
+ if (['openai', 'gemini'].includes(entry.provider) && entry.costModel?.accurate !== false) {
77
+ addViolation(violations, 'provider-metadata.fake-universal-pricing', `${entry.provider} must not claim universal accurate pricing`, { provider: entry.provider });
78
+ }
79
+ }
80
+ }
81
+
82
+ function validateLayerDefinitions(violations) {
83
+ const definitionIds = Object.keys(CACHE_LAYER_DEFINITIONS);
84
+ const uniqueDefinitionIds = new Set(definitionIds);
85
+ if (definitionIds.length !== uniqueDefinitionIds.size) {
86
+ addViolation(violations, 'layer-definition.duplicate-id', 'Cache layer definition IDs must be unique');
87
+ }
88
+
89
+ const expectedIds = [
90
+ CACHE_LAYER_IDS.STATIC_PREFIX,
91
+ CACHE_LAYER_IDS.SEMI_STATIC_CONTEXT,
92
+ CACHE_LAYER_IDS.DYNAMIC_SUFFIX,
93
+ ];
94
+ if (JSON.stringify(definitionIds) !== JSON.stringify(expectedIds)) {
95
+ addViolation(violations, 'layer-definition.unexpected-order', `Expected ${expectedIds.join(', ')}, got ${definitionIds.join(', ')}`);
96
+ }
97
+ }
98
+
99
+ function validateFixtureSegmentation(violations) {
100
+ const fixtures = loadFixtures();
101
+ let auditedScenarioCount = 0;
102
+ for (const fixture of fixtures) {
103
+ const layered = buildCacheLayeredScenarioPrompts(fixture);
104
+ const scenarios = [
105
+ { name: 'always_included', contract: layered.alwaysIncluded },
106
+ { name: 'with_loaded_rules', contract: layered.withLoadedRules },
107
+ ];
108
+
109
+ for (const scenario of scenarios) {
110
+ auditedScenarioCount += 1;
111
+ try {
112
+ validateCacheLayerContract(scenario.contract);
113
+ } catch (error) {
114
+ addViolation(violations, 'fixture-segmentation.invalid-contract', error.message, {
115
+ fixture_id: fixture.id,
116
+ scenario: scenario.name,
117
+ });
118
+ }
119
+
120
+ const layerIds = scenario.contract.layers.map((layer) => layer.id);
121
+ if (new Set(layerIds).size !== layerIds.length) {
122
+ addViolation(violations, 'fixture-segmentation.duplicate-layer-id', 'Fixture contract contains duplicate layer IDs', {
123
+ fixture_id: fixture.id,
124
+ scenario: scenario.name,
125
+ });
126
+ }
127
+
128
+ for (const layer of [
129
+ scenario.contract.layer_1_static_prefix,
130
+ scenario.contract.layer_2_semi_static_context,
131
+ ]) {
132
+ if (layer.content.includes(fixture.user_message)) {
133
+ addViolation(violations, 'fixture-segmentation.dynamic-leak', `Fixture user message leaked into ${layer.id}`, {
134
+ fixture_id: fixture.id,
135
+ scenario: scenario.name,
136
+ });
137
+ }
138
+ }
139
+
140
+ if (scenario.contract.layer_3_dynamic_suffix.content.trim().length === 0) {
141
+ addViolation(violations, 'fixture-segmentation.missing-dynamic-layer', 'Layer 3 dynamic suffix is empty', {
142
+ fixture_id: fixture.id,
143
+ scenario: scenario.name,
144
+ });
145
+ }
146
+ }
147
+ }
148
+ return { fixtureCount: fixtures.length, auditedScenarioCount };
149
+ }
150
+
151
+ function validateSimulationResultJson(violations, resultPath = DEFAULT_RESULT_PATH) {
152
+ if (!existsSync(resultPath)) {
153
+ addViolation(violations, 'result-json.missing', `Missing cache simulation result: ${resultPath}`);
154
+ return { resultPath, resultCount: 0 };
155
+ }
156
+
157
+ const parsed = JSON.parse(readFileSync(resultPath, 'utf8'));
158
+ if (parsed.report_version !== '2.0.0') {
159
+ addViolation(violations, 'result-json.schema-version', `Unexpected report_version ${parsed.report_version}`);
160
+ }
161
+ if (!Array.isArray(parsed.results) || parsed.results.length === 0) {
162
+ addViolation(violations, 'result-json.results-empty', 'results must be a non-empty array');
163
+ return { resultPath, resultCount: 0 };
164
+ }
165
+
166
+ const expectedResultCount = parsed.fixture_count * parsed.provider_count * parsed.scenario_count;
167
+ if (parsed.results.length !== expectedResultCount) {
168
+ addViolation(violations, 'result-json.result-count', `Expected ${expectedResultCount} rows, got ${parsed.results.length}`);
169
+ }
170
+
171
+ for (const result of parsed.results) {
172
+ if (!result.token_counts || !result.economic_projection) {
173
+ addViolation(violations, 'result-json.missing-separation', 'Result must separate token_counts and economic_projection', {
174
+ fixture_id: result.fixture_id,
175
+ provider: result.provider,
176
+ scenario: result.scenario,
177
+ });
178
+ continue;
179
+ }
180
+ if (typeof result.token_counts.layer_3_dynamic_suffix !== 'number' || result.token_counts.layer_3_dynamic_suffix <= 0) {
181
+ addViolation(violations, 'result-json.missing-layer-3-tokens', 'Layer 3 token count must be present and positive', {
182
+ fixture_id: result.fixture_id,
183
+ provider: result.provider,
184
+ scenario: result.scenario,
185
+ });
186
+ }
187
+ if (result.economic_projection.accurate === true && (!result.source?.sourceUrl || !result.source?.verifiedAt)) {
188
+ addViolation(violations, 'result-json.accurate-without-source', 'Accurate projection requires official source metadata', {
189
+ fixture_id: result.fixture_id,
190
+ provider: result.provider,
191
+ scenario: result.scenario,
192
+ });
193
+ }
194
+ if (['openai', 'gemini'].includes(result.provider) && result.economic_projection.first_request_effective_tokens !== null) {
195
+ addViolation(violations, 'result-json.fake-openai-gemini-savings', `${result.provider} must not emit exact savings without pricing metadata`, {
196
+ fixture_id: result.fixture_id,
197
+ scenario: result.scenario,
198
+ });
199
+ }
200
+ }
201
+
202
+ return { resultPath, resultCount: parsed.results.length };
203
+ }
204
+
205
+ export function runCacheLayerContractAudit({ resultPath = DEFAULT_RESULT_PATH } = {}) {
206
+ const violations = [];
207
+ validateProviderMatrix(violations);
208
+ validateLayerDefinitions(violations);
209
+ const segmentationStats = validateFixtureSegmentation(violations);
210
+ const resultStats = validateSimulationResultJson(violations, resultPath);
211
+
212
+ return {
213
+ auditName: 'audit-cache-layer-contract',
214
+ reportVersion: '1.0.0',
215
+ generatedAt: new Date().toISOString(),
216
+ providerCount: Object.keys(PROVIDER_CACHE_MATRIX).length,
217
+ ...segmentationStats,
218
+ ...resultStats,
219
+ violationCount: violations.length,
220
+ violations,
221
+ passed: violations.length === 0,
222
+ };
223
+ }
224
+
225
+ function main() {
226
+ const report = runCacheLayerContractAudit();
227
+
228
+ if (JSON_ONLY) {
229
+ process.stdout.write(`${JSON.stringify(report, null, 2)}\n`);
230
+ process.exit(report.passed ? 0 : 1);
231
+ }
232
+
233
+ console.log('===============================================');
234
+ console.log(' audit:cache-layer-contract');
235
+ console.log('===============================================');
236
+ console.log(` Providers: ${report.providerCount}`);
237
+ console.log(` Fixtures: ${report.fixtureCount}`);
238
+ console.log(` Layered scenarios: ${report.auditedScenarioCount}`);
239
+ console.log(` Result rows: ${report.resultCount}`);
240
+ console.log('');
241
+
242
+ if (report.passed) {
243
+ console.log(' Cache layer contract audit clean.');
244
+ process.stderr.write(`AUDIT_CACHE_LAYER_REPORT: ${JSON.stringify({ passed: true, providerCount: report.providerCount, resultCount: report.resultCount })}\n`);
245
+ process.exit(0);
246
+ }
247
+
248
+ console.log(' Violations:');
249
+ for (const violation of report.violations) {
250
+ console.log(` [${violation.kind}] ${violation.detail}`);
251
+ }
252
+ process.stderr.write(`AUDIT_CACHE_LAYER_REPORT: ${JSON.stringify({ passed: false, violationCount: report.violationCount })}\n`);
253
+ process.exit(1);
254
+ }
255
+
256
+ if (import.meta.url === `file://${process.argv[1].replace(/\\/g, '/')}` || process.argv[1].endsWith('audit-cache-layer-contract.mjs')) {
257
+ main();
258
+ }