@ryuenn3123/agentic-senior-core 2.0.5 → 2.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.agent-context/blueprints/mobile-app.md +91 -21
  2. package/.agent-context/profiles/platform.md +13 -13
  3. package/.agent-context/profiles/regulated.md +13 -13
  4. package/.agent-context/profiles/startup.md +13 -13
  5. package/.agent-context/prompts/review-code.md +3 -3
  6. package/.agent-context/review-checklists/frontend-skill-parity.md +28 -28
  7. package/.agent-context/review-checklists/frontend-usability.md +33 -33
  8. package/.agent-context/review-checklists/pr-checklist.md +11 -6
  9. package/.agent-context/review-checklists/release-operations.md +29 -29
  10. package/.agent-context/rules/api-docs.md +34 -0
  11. package/.agent-context/skills/README.md +62 -62
  12. package/.agent-context/skills/backend/README.md +67 -67
  13. package/.agent-context/skills/backend/architecture.md +360 -360
  14. package/.agent-context/skills/backend/compatibility-manifest.json +8 -8
  15. package/.agent-context/skills/backend/data-access.md +230 -230
  16. package/.agent-context/skills/backend/errors.md +137 -137
  17. package/.agent-context/skills/backend/validation.md +116 -116
  18. package/.agent-context/skills/backend.md +28 -28
  19. package/.agent-context/skills/cli/README.md +55 -49
  20. package/.agent-context/skills/cli/compatibility-manifest.json +8 -8
  21. package/.agent-context/skills/cli/init.md +37 -37
  22. package/.agent-context/skills/cli/output.md +35 -35
  23. package/.agent-context/skills/cli/safety-telemetry.md +39 -0
  24. package/.agent-context/skills/cli/upgrade.md +37 -37
  25. package/.agent-context/skills/cli.md +31 -28
  26. package/.agent-context/skills/distribution/.evidence/compatibility-manifest.json +9 -0
  27. package/.agent-context/skills/distribution/.evidence/sbom-excerpt.json +6 -0
  28. package/.agent-context/skills/distribution/.evidence/test-report.json +8 -0
  29. package/.agent-context/skills/distribution/CHANGELOG.md +7 -0
  30. package/.agent-context/skills/distribution/README.md +27 -19
  31. package/.agent-context/skills/distribution/compatibility-manifest.json +8 -8
  32. package/.agent-context/skills/distribution/compatibility.md +31 -31
  33. package/.agent-context/skills/distribution/package.json +5 -0
  34. package/.agent-context/skills/distribution/provenance-attestation.md +47 -0
  35. package/.agent-context/skills/distribution/publish.md +36 -36
  36. package/.agent-context/skills/distribution/rollback.md +31 -31
  37. package/.agent-context/skills/distribution/tests/.gitkeep +1 -0
  38. package/.agent-context/skills/distribution.md +31 -28
  39. package/.agent-context/skills/frontend/.evidence/compatibility-manifest.json +9 -0
  40. package/.agent-context/skills/frontend/.evidence/sbom-excerpt.json +6 -0
  41. package/.agent-context/skills/frontend/.evidence/test-report.json +8 -0
  42. package/.agent-context/skills/frontend/CHANGELOG.md +7 -0
  43. package/.agent-context/skills/frontend/README.md +49 -36
  44. package/.agent-context/skills/frontend/accessibility.md +107 -107
  45. package/.agent-context/skills/frontend/compatibility-manifest.json +8 -8
  46. package/.agent-context/skills/frontend/conversion-clarity.md +51 -0
  47. package/.agent-context/skills/frontend/motion.md +66 -66
  48. package/.agent-context/skills/frontend/package.json +5 -0
  49. package/.agent-context/skills/frontend/performance.md +62 -62
  50. package/.agent-context/skills/frontend/responsive-delivery.md +41 -0
  51. package/.agent-context/skills/frontend/tests/.gitkeep +1 -0
  52. package/.agent-context/skills/frontend/ui-architecture.md +128 -128
  53. package/.agent-context/skills/frontend.md +35 -29
  54. package/.agent-context/skills/fullstack/.evidence/compatibility-manifest.json +9 -0
  55. package/.agent-context/skills/fullstack/.evidence/sbom-excerpt.json +6 -0
  56. package/.agent-context/skills/fullstack/.evidence/test-report.json +8 -0
  57. package/.agent-context/skills/fullstack/CHANGELOG.md +7 -0
  58. package/.agent-context/skills/fullstack/README.md +27 -19
  59. package/.agent-context/skills/fullstack/compatibility-manifest.json +8 -8
  60. package/.agent-context/skills/fullstack/contracts.md +52 -52
  61. package/.agent-context/skills/fullstack/end-to-end.md +41 -41
  62. package/.agent-context/skills/fullstack/feature-slicing.md +64 -64
  63. package/.agent-context/skills/fullstack/package.json +5 -0
  64. package/.agent-context/skills/fullstack/release-coordination.md +51 -0
  65. package/.agent-context/skills/fullstack/tests/.gitkeep +1 -0
  66. package/.agent-context/skills/fullstack.md +29 -26
  67. package/.agent-context/skills/index.json +107 -107
  68. package/.agent-context/skills/review-quality/.evidence/compatibility-manifest.json +9 -0
  69. package/.agent-context/skills/review-quality/.evidence/sbom-excerpt.json +6 -0
  70. package/.agent-context/skills/review-quality/.evidence/test-report.json +8 -0
  71. package/.agent-context/skills/review-quality/CHANGELOG.md +7 -0
  72. package/.agent-context/skills/review-quality/README.md +27 -19
  73. package/.agent-context/skills/review-quality/benchmark.md +29 -29
  74. package/.agent-context/skills/review-quality/compatibility-manifest.json +8 -8
  75. package/.agent-context/skills/review-quality/package.json +5 -0
  76. package/.agent-context/skills/review-quality/planning.md +37 -37
  77. package/.agent-context/skills/review-quality/release-decision.md +49 -0
  78. package/.agent-context/skills/review-quality/security.md +33 -33
  79. package/.agent-context/skills/review-quality/tests/.gitkeep +1 -0
  80. package/.agent-context/skills/review-quality.md +33 -27
  81. package/.agent-context/stacks/flutter.md +16 -16
  82. package/.agent-context/stacks/react-native.md +16 -16
  83. package/.agent-context/state/architecture-map.md +25 -25
  84. package/.agent-context/state/benchmark-analysis.json +431 -431
  85. package/.agent-context/state/benchmark-thresholds.json +10 -10
  86. package/.agent-context/state/benchmark-watchlist.json +19 -19
  87. package/.agent-context/state/dependency-map.md +32 -32
  88. package/.agent-context/state/quality-trend-report.json +16 -6
  89. package/.agent-context/state/skill-platform.json +38 -38
  90. package/.agent-context/state/weekly-governance-report.json +126 -0
  91. package/.agent-override.md +36 -36
  92. package/.cursorrules +1 -1
  93. package/.gemini/instructions.md +20 -20
  94. package/.github/ISSUE_TEMPLATE/v1.7-frontend-work-item.yml +54 -54
  95. package/.github/copilot-instructions.md +21 -21
  96. package/.github/workflows/benchmark-detection.yml +38 -38
  97. package/.github/workflows/benchmark-intelligence.yml +50 -50
  98. package/.github/workflows/frontend-usability-gate.yml +36 -36
  99. package/.github/workflows/governance-weekly-report.yml +43 -0
  100. package/.github/workflows/release-gate.yml +32 -32
  101. package/.github/workflows/sbom-compliance.yml +32 -32
  102. package/.windsurfrules +1 -1
  103. package/AGENTS.md +27 -27
  104. package/README.md +389 -368
  105. package/lib/cli/commands/init.mjs +13 -1
  106. package/lib/cli/commands/optimize.mjs +171 -171
  107. package/lib/cli/commands/upgrade.mjs +9 -1
  108. package/lib/cli/compatibility.mjs +124 -124
  109. package/lib/cli/constants.mjs +37 -2
  110. package/lib/cli/token-optimization.mjs +275 -275
  111. package/lib/cli/utils.mjs +24 -3
  112. package/mcp.json +92 -92
  113. package/package.json +2 -1
  114. package/scripts/benchmark-gate.mjs +121 -121
  115. package/scripts/benchmark-intelligence.mjs +140 -140
  116. package/scripts/detection-benchmark.mjs +138 -138
  117. package/scripts/frontend-usability-audit.mjs +87 -87
  118. package/scripts/generate-sbom.mjs +61 -61
  119. package/scripts/governance-weekly-report.mjs +293 -0
  120. package/scripts/init-project.ps1 +104 -104
  121. package/scripts/llm-judge.mjs +664 -664
  122. package/scripts/quality-trend-report.mjs +288 -288
  123. package/scripts/release-gate.mjs +261 -259
  124. package/scripts/skill-tier-policy.mjs +75 -75
  125. package/scripts/token-optimization-benchmark.mjs +252 -252
  126. package/scripts/validate.mjs +942 -865
@@ -1,76 +1,76 @@
1
- export const SKILL_TIER_MINIMUMS = {
2
- standard: { minWords: 60, minHeadings: 1, minChecklistItems: 0, minCodeBlocks: 0 },
3
- advance: { minWords: 100, minHeadings: 2, minChecklistItems: 1, minCodeBlocks: 0 },
4
- expert: { minWords: 130, minHeadings: 3, minChecklistItems: 1, minCodeBlocks: 0 },
5
- above: { minWords: 240, minHeadings: 3, minChecklistItems: 1, minCodeBlocks: 1 },
6
- };
7
-
8
- export function countWords(markdownContent) {
9
- return markdownContent
10
- .replace(/```[\s\S]*?```/g, ' ')
11
- .replace(/[^A-Za-z0-9_\-\s]/g, ' ')
12
- .trim()
13
- .split(/\s+/)
14
- .filter(Boolean).length;
15
- }
16
-
17
- export function countMarkdownHeadings(markdownContent) {
18
- const headingMatches = markdownContent.match(/^#{2,6}\s+/gm);
19
- return headingMatches ? headingMatches.length : 0;
20
- }
21
-
22
- export function countChecklistItems(markdownContent) {
23
- const checklistMatches = markdownContent.match(/^\s*[-*]\s+\[[ xX]\]\s+/gm);
24
- return checklistMatches ? checklistMatches.length : 0;
25
- }
26
-
27
- export function countCodeBlocks(markdownContent) {
28
- const fenceMatches = markdownContent.match(/```/g);
29
- if (!fenceMatches) {
30
- return 0;
31
- }
32
-
33
- return Math.floor(fenceMatches.length / 2);
34
- }
35
-
36
- export function extractSkillTier(markdownContent) {
37
- const normalizedMarkdownContent = markdownContent.replace(/\*\*/g, '');
38
- const tierMatch = normalizedMarkdownContent.match(/\bTier\s*:\s*`?(standard|advance|expert|above)`?\b/i);
39
- return tierMatch ? tierMatch[1].toLowerCase() : null;
40
- }
41
-
42
- export function validateSkillTopicContent(markdownContent) {
43
- const detectedTier = extractSkillTier(markdownContent);
44
-
45
- if (!detectedTier) {
46
- return { isValid: false, reason: 'missing-tier' };
47
- }
48
-
49
- const minimumRules = SKILL_TIER_MINIMUMS[detectedTier];
50
- if (!minimumRules) {
51
- return { isValid: false, reason: 'unsupported-tier', detectedTier };
52
- }
53
-
54
- const wordCount = countWords(markdownContent);
55
- const headingCount = countMarkdownHeadings(markdownContent);
56
- const checklistCount = countChecklistItems(markdownContent);
57
- const codeBlockCount = countCodeBlocks(markdownContent);
58
-
59
- if (wordCount < minimumRules.minWords) {
60
- return { isValid: false, reason: 'word-count', detectedTier, wordCount, minimumRules };
61
- }
62
-
63
- if (headingCount < minimumRules.minHeadings) {
64
- return { isValid: false, reason: 'heading-count', detectedTier, headingCount, minimumRules };
65
- }
66
-
67
- if (checklistCount < minimumRules.minChecklistItems) {
68
- return { isValid: false, reason: 'checklist-count', detectedTier, checklistCount, minimumRules };
69
- }
70
-
71
- if (codeBlockCount < minimumRules.minCodeBlocks) {
72
- return { isValid: false, reason: 'code-block-count', detectedTier, codeBlockCount, minimumRules };
73
- }
74
-
75
- return { isValid: true, detectedTier, wordCount, headingCount, checklistCount, codeBlockCount, minimumRules };
1
+ export const SKILL_TIER_MINIMUMS = {
2
+ standard: { minWords: 60, minHeadings: 1, minChecklistItems: 0, minCodeBlocks: 0 },
3
+ advance: { minWords: 100, minHeadings: 2, minChecklistItems: 1, minCodeBlocks: 0 },
4
+ expert: { minWords: 130, minHeadings: 3, minChecklistItems: 1, minCodeBlocks: 0 },
5
+ above: { minWords: 240, minHeadings: 3, minChecklistItems: 1, minCodeBlocks: 1 },
6
+ };
7
+
8
+ export function countWords(markdownContent) {
9
+ return markdownContent
10
+ .replace(/```[\s\S]*?```/g, ' ')
11
+ .replace(/[^A-Za-z0-9_\-\s]/g, ' ')
12
+ .trim()
13
+ .split(/\s+/)
14
+ .filter(Boolean).length;
15
+ }
16
+
17
+ export function countMarkdownHeadings(markdownContent) {
18
+ const headingMatches = markdownContent.match(/^#{2,6}\s+/gm);
19
+ return headingMatches ? headingMatches.length : 0;
20
+ }
21
+
22
+ export function countChecklistItems(markdownContent) {
23
+ const checklistMatches = markdownContent.match(/^\s*[-*]\s+\[[ xX]\]\s+/gm);
24
+ return checklistMatches ? checklistMatches.length : 0;
25
+ }
26
+
27
+ export function countCodeBlocks(markdownContent) {
28
+ const fenceMatches = markdownContent.match(/```/g);
29
+ if (!fenceMatches) {
30
+ return 0;
31
+ }
32
+
33
+ return Math.floor(fenceMatches.length / 2);
34
+ }
35
+
36
+ export function extractSkillTier(markdownContent) {
37
+ const normalizedMarkdownContent = markdownContent.replace(/\*\*/g, '');
38
+ const tierMatch = normalizedMarkdownContent.match(/\bTier\s*:\s*`?(standard|advance|expert|above)`?\b/i);
39
+ return tierMatch ? tierMatch[1].toLowerCase() : null;
40
+ }
41
+
42
+ export function validateSkillTopicContent(markdownContent) {
43
+ const detectedTier = extractSkillTier(markdownContent);
44
+
45
+ if (!detectedTier) {
46
+ return { isValid: false, reason: 'missing-tier' };
47
+ }
48
+
49
+ const minimumRules = SKILL_TIER_MINIMUMS[detectedTier];
50
+ if (!minimumRules) {
51
+ return { isValid: false, reason: 'unsupported-tier', detectedTier };
52
+ }
53
+
54
+ const wordCount = countWords(markdownContent);
55
+ const headingCount = countMarkdownHeadings(markdownContent);
56
+ const checklistCount = countChecklistItems(markdownContent);
57
+ const codeBlockCount = countCodeBlocks(markdownContent);
58
+
59
+ if (wordCount < minimumRules.minWords) {
60
+ return { isValid: false, reason: 'word-count', detectedTier, wordCount, minimumRules };
61
+ }
62
+
63
+ if (headingCount < minimumRules.minHeadings) {
64
+ return { isValid: false, reason: 'heading-count', detectedTier, headingCount, minimumRules };
65
+ }
66
+
67
+ if (checklistCount < minimumRules.minChecklistItems) {
68
+ return { isValid: false, reason: 'checklist-count', detectedTier, checklistCount, minimumRules };
69
+ }
70
+
71
+ if (codeBlockCount < minimumRules.minCodeBlocks) {
72
+ return { isValid: false, reason: 'code-block-count', detectedTier, codeBlockCount, minimumRules };
73
+ }
74
+
75
+ return { isValid: true, detectedTier, wordCount, headingCount, checklistCount, codeBlockCount, minimumRules };
76
76
  }
@@ -1,252 +1,252 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * token-optimization-benchmark.mjs
5
- *
6
- * Measures token-estimate reduction between baseline commands and
7
- * native optimized command variants. If RTK is installed, it also
8
- * compares RTK command output in the same scenarios.
9
- */
10
-
11
- import { existsSync } from 'node:fs';
12
- import fs from 'node:fs/promises';
13
- import { spawnSync } from 'node:child_process';
14
- import { dirname, join, resolve } from 'node:path';
15
- import { fileURLToPath } from 'node:url';
16
-
17
- const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
18
- const SCRIPT_DIR = dirname(SCRIPT_FILE_PATH);
19
- const REPOSITORY_ROOT = resolve(SCRIPT_DIR, '..');
20
- const REPORT_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'token-optimization-benchmark.json');
21
- const LOCAL_RTK_BINARY_PATH = join(REPOSITORY_ROOT, '.benchmarks', 'tools', 'rtk', 'rtk.exe');
22
- const TOKEN_ESTIMATE_DIVISOR = 4;
23
- const ARGUMENT_FLAGS = new Set(process.argv.slice(2));
24
- const isStdoutOnlyMode = ARGUMENT_FLAGS.has('--stdout-only');
25
-
26
- const BENCHMARK_SCENARIOS = [
27
- {
28
- scenarioId: 'git-show',
29
- scenarioName: 'Latest commit detail review',
30
- baselineCommand: { command: 'git', args: ['show', 'HEAD', '--stat', '--patch', '--no-color'] },
31
- nativeOptimizedCommand: { command: 'git', args: ['show', 'HEAD', '--stat', '--no-color'] },
32
- rtkCommand: { command: 'rtk', args: ['git', 'show', 'HEAD', '--stat', '--patch', '--no-color'] },
33
- },
34
- {
35
- scenarioId: 'git-log',
36
- scenarioName: 'Commit history review',
37
- baselineCommand: { command: 'git', args: ['log', '-n', '50'] },
38
- nativeOptimizedCommand: { command: 'git', args: ['log', '--oneline', '-n', '50'] },
39
- rtkCommand: { command: 'rtk', args: ['git', 'log', '-n', '50'] },
40
- },
41
- {
42
- scenarioId: 'search-token',
43
- scenarioName: 'Search result scan',
44
- baselineCommand: { command: 'git', args: ['grep', '-n', 'token'] },
45
- nativeOptimizedCommand: { command: 'git', args: ['grep', '-n', 'token', 'README.md', 'docs'] },
46
- rtkCommand: { command: 'rtk', args: ['git', 'grep', '-n', 'token'] },
47
- },
48
- ];
49
-
50
- function estimateTokenCount(outputText) {
51
- if (!outputText || outputText.length === 0) {
52
- return 0;
53
- }
54
-
55
- return Math.ceil(outputText.length / TOKEN_ESTIMATE_DIVISOR);
56
- }
57
-
58
- function formatCommandLine(commandDefinition) {
59
- return [commandDefinition.command, ...commandDefinition.args].join(' ');
60
- }
61
-
62
- function executeCommand(commandDefinition) {
63
- const commandResult = spawnSync(commandDefinition.command, commandDefinition.args, {
64
- cwd: REPOSITORY_ROOT,
65
- encoding: 'utf8',
66
- maxBuffer: 1024 * 1024 * 20,
67
- });
68
-
69
- const standardOutput = commandResult.stdout || '';
70
- const standardError = commandResult.stderr || '';
71
- const combinedOutput = `${standardOutput}${standardError}`;
72
- const normalizedOutput = combinedOutput.trimEnd();
73
-
74
- const outputLineCount = normalizedOutput.length === 0
75
- ? 0
76
- : normalizedOutput.split(/\r?\n/u).length;
77
-
78
- return {
79
- command: formatCommandLine(commandDefinition),
80
- exitCode: typeof commandResult.status === 'number' ? commandResult.status : 1,
81
- outputChars: normalizedOutput.length,
82
- outputLines: outputLineCount,
83
- estimatedTokens: estimateTokenCount(normalizedOutput),
84
- };
85
- }
86
-
87
- function resolveRtkBinaryPath() {
88
- const configuredRtkBinaryPath = process.env.RTK_BINARY_PATH;
89
-
90
- if (configuredRtkBinaryPath && existsSync(configuredRtkBinaryPath)) {
91
- return {
92
- executablePath: configuredRtkBinaryPath,
93
- source: 'env',
94
- };
95
- }
96
-
97
- if (existsSync(LOCAL_RTK_BINARY_PATH)) {
98
- return {
99
- executablePath: LOCAL_RTK_BINARY_PATH,
100
- source: 'local-tools',
101
- };
102
- }
103
-
104
- return {
105
- executablePath: 'rtk',
106
- source: 'system-path',
107
- };
108
- }
109
-
110
- function detectRtkAvailability() {
111
- const rtkBinary = resolveRtkBinaryPath();
112
-
113
- const versionResult = spawnSync(rtkBinary.executablePath, ['--version'], {
114
- cwd: REPOSITORY_ROOT,
115
- encoding: 'utf8',
116
- maxBuffer: 1024 * 1024,
117
- });
118
-
119
- if (versionResult.error || versionResult.status !== 0) {
120
- return {
121
- isAvailable: false,
122
- version: null,
123
- source: rtkBinary.source,
124
- executablePath: rtkBinary.executablePath,
125
- reason: versionResult.error
126
- ? versionResult.error.message
127
- : (versionResult.stderr || versionResult.stdout || 'RTK command unavailable').trim(),
128
- };
129
- }
130
-
131
- const versionMatch = (versionResult.stdout || '').match(/\d+\.\d+\.\d+/u);
132
- return {
133
- isAvailable: true,
134
- version: versionMatch ? versionMatch[0] : null,
135
- source: rtkBinary.source,
136
- executablePath: rtkBinary.executablePath,
137
- reason: null,
138
- };
139
- }
140
-
141
- function buildRtkCommand(commandDefinition, rtkAvailability) {
142
- return {
143
- command: rtkAvailability.executablePath || commandDefinition.command,
144
- args: commandDefinition.args,
145
- };
146
- }
147
-
148
- function computeSavingsSummary(baselineTokens, optimizedTokens) {
149
- if (baselineTokens <= 0) {
150
- return {
151
- tokenDelta: 0,
152
- savingsPercent: 0,
153
- };
154
- }
155
-
156
- const tokenDelta = baselineTokens - optimizedTokens;
157
- const savingsPercent = Number(((tokenDelta / baselineTokens) * 100).toFixed(2));
158
-
159
- return {
160
- tokenDelta,
161
- savingsPercent,
162
- };
163
- }
164
-
165
- function calculateAverageSavings(benchmarkRows, savingsKeyName) {
166
- const availableSavings = benchmarkRows
167
- .map((benchmarkRow) => benchmarkRow[savingsKeyName]?.savingsPercent)
168
- .filter((savingsPercent) => typeof savingsPercent === 'number');
169
-
170
- if (availableSavings.length === 0) {
171
- return null;
172
- }
173
-
174
- const totalSavings = availableSavings.reduce((runningTotal, savingsPercent) => runningTotal + savingsPercent, 0);
175
- return Number((totalSavings / availableSavings.length).toFixed(2));
176
- }
177
-
178
- async function runTokenOptimizationBenchmark() {
179
- const rtkAvailability = detectRtkAvailability();
180
- const scenarioResults = [];
181
-
182
- for (const benchmarkScenario of BENCHMARK_SCENARIOS) {
183
- const baselineRun = executeCommand(benchmarkScenario.baselineCommand);
184
- const nativeOptimizedRun = executeCommand(benchmarkScenario.nativeOptimizedCommand);
185
- const nativeSavings = computeSavingsSummary(
186
- baselineRun.estimatedTokens,
187
- nativeOptimizedRun.estimatedTokens
188
- );
189
-
190
- let rtkRun = null;
191
- let rtkSavings = null;
192
-
193
- if (rtkAvailability.isAvailable) {
194
- rtkRun = executeCommand(buildRtkCommand(benchmarkScenario.rtkCommand, rtkAvailability));
195
- if (rtkRun.exitCode === 0) {
196
- rtkSavings = computeSavingsSummary(baselineRun.estimatedTokens, rtkRun.estimatedTokens);
197
- }
198
- }
199
-
200
- scenarioResults.push({
201
- scenarioId: benchmarkScenario.scenarioId,
202
- scenarioName: benchmarkScenario.scenarioName,
203
- baseline: baselineRun,
204
- nativeOptimized: nativeOptimizedRun,
205
- nativeSavings,
206
- rtk: rtkRun,
207
- rtkSavings,
208
- });
209
- }
210
-
211
- const benchmarkReport = {
212
- generatedAt: new Date().toISOString(),
213
- reportName: 'token-optimization-benchmark',
214
- methodology: {
215
- tokenEstimate: `estimated_tokens = ceil(output_chars / ${TOKEN_ESTIMATE_DIVISOR})`,
216
- scope: 'command-output estimate only; model-specific tokenization differs by provider',
217
- repositoryRoot: REPOSITORY_ROOT,
218
- },
219
- environment: {
220
- nodeVersion: process.version,
221
- platform: process.platform,
222
- rtk: rtkAvailability,
223
- },
224
- scenarios: scenarioResults,
225
- summary: {
226
- scenarioCount: scenarioResults.length,
227
- averageNativeSavingsPercent: calculateAverageSavings(scenarioResults, 'nativeSavings'),
228
- averageRtkSavingsPercent: calculateAverageSavings(scenarioResults, 'rtkSavings'),
229
- },
230
- artifact: {
231
- path: REPORT_PATH,
232
- writeMode: isStdoutOnlyMode ? 'stdout-only' : 'stdout-and-file',
233
- },
234
- };
235
-
236
- if (!isStdoutOnlyMode) {
237
- await fs.mkdir(dirname(REPORT_PATH), { recursive: true });
238
- await fs.writeFile(REPORT_PATH, JSON.stringify(benchmarkReport, null, 2) + '\n', 'utf8');
239
- }
240
-
241
- return benchmarkReport;
242
- }
243
-
244
- runTokenOptimizationBenchmark()
245
- .then((benchmarkReport) => {
246
- console.log(JSON.stringify(benchmarkReport, null, 2));
247
- })
248
- .catch((benchmarkError) => {
249
- const errorMessage = benchmarkError instanceof Error ? benchmarkError.message : String(benchmarkError);
250
- console.error(`Token benchmark failed: ${errorMessage}`);
251
- process.exit(1);
252
- });
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * token-optimization-benchmark.mjs
5
+ *
6
+ * Measures token-estimate reduction between baseline commands and
7
+ * native optimized command variants. If RTK is installed, it also
8
+ * compares RTK command output in the same scenarios.
9
+ */
10
+
11
+ import { existsSync } from 'node:fs';
12
+ import fs from 'node:fs/promises';
13
+ import { spawnSync } from 'node:child_process';
14
+ import { dirname, join, resolve } from 'node:path';
15
+ import { fileURLToPath } from 'node:url';
16
+
17
+ const SCRIPT_FILE_PATH = fileURLToPath(import.meta.url);
18
+ const SCRIPT_DIR = dirname(SCRIPT_FILE_PATH);
19
+ const REPOSITORY_ROOT = resolve(SCRIPT_DIR, '..');
20
+ const REPORT_PATH = join(REPOSITORY_ROOT, '.agent-context', 'state', 'token-optimization-benchmark.json');
21
+ const LOCAL_RTK_BINARY_PATH = join(REPOSITORY_ROOT, '.benchmarks', 'tools', 'rtk', 'rtk.exe');
22
+ const TOKEN_ESTIMATE_DIVISOR = 4;
23
+ const ARGUMENT_FLAGS = new Set(process.argv.slice(2));
24
+ const isStdoutOnlyMode = ARGUMENT_FLAGS.has('--stdout-only');
25
+
26
+ const BENCHMARK_SCENARIOS = [
27
+ {
28
+ scenarioId: 'git-show',
29
+ scenarioName: 'Latest commit detail review',
30
+ baselineCommand: { command: 'git', args: ['show', 'HEAD', '--stat', '--patch', '--no-color'] },
31
+ nativeOptimizedCommand: { command: 'git', args: ['show', 'HEAD', '--stat', '--no-color'] },
32
+ rtkCommand: { command: 'rtk', args: ['git', 'show', 'HEAD', '--stat', '--patch', '--no-color'] },
33
+ },
34
+ {
35
+ scenarioId: 'git-log',
36
+ scenarioName: 'Commit history review',
37
+ baselineCommand: { command: 'git', args: ['log', '-n', '50'] },
38
+ nativeOptimizedCommand: { command: 'git', args: ['log', '--oneline', '-n', '50'] },
39
+ rtkCommand: { command: 'rtk', args: ['git', 'log', '-n', '50'] },
40
+ },
41
+ {
42
+ scenarioId: 'search-token',
43
+ scenarioName: 'Search result scan',
44
+ baselineCommand: { command: 'git', args: ['grep', '-n', 'token'] },
45
+ nativeOptimizedCommand: { command: 'git', args: ['grep', '-n', 'token', 'README.md', 'docs'] },
46
+ rtkCommand: { command: 'rtk', args: ['git', 'grep', '-n', 'token'] },
47
+ },
48
+ ];
49
+
50
+ function estimateTokenCount(outputText) {
51
+ if (!outputText || outputText.length === 0) {
52
+ return 0;
53
+ }
54
+
55
+ return Math.ceil(outputText.length / TOKEN_ESTIMATE_DIVISOR);
56
+ }
57
+
58
+ function formatCommandLine(commandDefinition) {
59
+ return [commandDefinition.command, ...commandDefinition.args].join(' ');
60
+ }
61
+
62
+ function executeCommand(commandDefinition) {
63
+ const commandResult = spawnSync(commandDefinition.command, commandDefinition.args, {
64
+ cwd: REPOSITORY_ROOT,
65
+ encoding: 'utf8',
66
+ maxBuffer: 1024 * 1024 * 20,
67
+ });
68
+
69
+ const standardOutput = commandResult.stdout || '';
70
+ const standardError = commandResult.stderr || '';
71
+ const combinedOutput = `${standardOutput}${standardError}`;
72
+ const normalizedOutput = combinedOutput.trimEnd();
73
+
74
+ const outputLineCount = normalizedOutput.length === 0
75
+ ? 0
76
+ : normalizedOutput.split(/\r?\n/u).length;
77
+
78
+ return {
79
+ command: formatCommandLine(commandDefinition),
80
+ exitCode: typeof commandResult.status === 'number' ? commandResult.status : 1,
81
+ outputChars: normalizedOutput.length,
82
+ outputLines: outputLineCount,
83
+ estimatedTokens: estimateTokenCount(normalizedOutput),
84
+ };
85
+ }
86
+
87
+ function resolveRtkBinaryPath() {
88
+ const configuredRtkBinaryPath = process.env.RTK_BINARY_PATH;
89
+
90
+ if (configuredRtkBinaryPath && existsSync(configuredRtkBinaryPath)) {
91
+ return {
92
+ executablePath: configuredRtkBinaryPath,
93
+ source: 'env',
94
+ };
95
+ }
96
+
97
+ if (existsSync(LOCAL_RTK_BINARY_PATH)) {
98
+ return {
99
+ executablePath: LOCAL_RTK_BINARY_PATH,
100
+ source: 'local-tools',
101
+ };
102
+ }
103
+
104
+ return {
105
+ executablePath: 'rtk',
106
+ source: 'system-path',
107
+ };
108
+ }
109
+
110
+ function detectRtkAvailability() {
111
+ const rtkBinary = resolveRtkBinaryPath();
112
+
113
+ const versionResult = spawnSync(rtkBinary.executablePath, ['--version'], {
114
+ cwd: REPOSITORY_ROOT,
115
+ encoding: 'utf8',
116
+ maxBuffer: 1024 * 1024,
117
+ });
118
+
119
+ if (versionResult.error || versionResult.status !== 0) {
120
+ return {
121
+ isAvailable: false,
122
+ version: null,
123
+ source: rtkBinary.source,
124
+ executablePath: rtkBinary.executablePath,
125
+ reason: versionResult.error
126
+ ? versionResult.error.message
127
+ : (versionResult.stderr || versionResult.stdout || 'RTK command unavailable').trim(),
128
+ };
129
+ }
130
+
131
+ const versionMatch = (versionResult.stdout || '').match(/\d+\.\d+\.\d+/u);
132
+ return {
133
+ isAvailable: true,
134
+ version: versionMatch ? versionMatch[0] : null,
135
+ source: rtkBinary.source,
136
+ executablePath: rtkBinary.executablePath,
137
+ reason: null,
138
+ };
139
+ }
140
+
141
+ function buildRtkCommand(commandDefinition, rtkAvailability) {
142
+ return {
143
+ command: rtkAvailability.executablePath || commandDefinition.command,
144
+ args: commandDefinition.args,
145
+ };
146
+ }
147
+
148
+ function computeSavingsSummary(baselineTokens, optimizedTokens) {
149
+ if (baselineTokens <= 0) {
150
+ return {
151
+ tokenDelta: 0,
152
+ savingsPercent: 0,
153
+ };
154
+ }
155
+
156
+ const tokenDelta = baselineTokens - optimizedTokens;
157
+ const savingsPercent = Number(((tokenDelta / baselineTokens) * 100).toFixed(2));
158
+
159
+ return {
160
+ tokenDelta,
161
+ savingsPercent,
162
+ };
163
+ }
164
+
165
+ function calculateAverageSavings(benchmarkRows, savingsKeyName) {
166
+ const availableSavings = benchmarkRows
167
+ .map((benchmarkRow) => benchmarkRow[savingsKeyName]?.savingsPercent)
168
+ .filter((savingsPercent) => typeof savingsPercent === 'number');
169
+
170
+ if (availableSavings.length === 0) {
171
+ return null;
172
+ }
173
+
174
+ const totalSavings = availableSavings.reduce((runningTotal, savingsPercent) => runningTotal + savingsPercent, 0);
175
+ return Number((totalSavings / availableSavings.length).toFixed(2));
176
+ }
177
+
178
+ async function runTokenOptimizationBenchmark() {
179
+ const rtkAvailability = detectRtkAvailability();
180
+ const scenarioResults = [];
181
+
182
+ for (const benchmarkScenario of BENCHMARK_SCENARIOS) {
183
+ const baselineRun = executeCommand(benchmarkScenario.baselineCommand);
184
+ const nativeOptimizedRun = executeCommand(benchmarkScenario.nativeOptimizedCommand);
185
+ const nativeSavings = computeSavingsSummary(
186
+ baselineRun.estimatedTokens,
187
+ nativeOptimizedRun.estimatedTokens
188
+ );
189
+
190
+ let rtkRun = null;
191
+ let rtkSavings = null;
192
+
193
+ if (rtkAvailability.isAvailable) {
194
+ rtkRun = executeCommand(buildRtkCommand(benchmarkScenario.rtkCommand, rtkAvailability));
195
+ if (rtkRun.exitCode === 0) {
196
+ rtkSavings = computeSavingsSummary(baselineRun.estimatedTokens, rtkRun.estimatedTokens);
197
+ }
198
+ }
199
+
200
+ scenarioResults.push({
201
+ scenarioId: benchmarkScenario.scenarioId,
202
+ scenarioName: benchmarkScenario.scenarioName,
203
+ baseline: baselineRun,
204
+ nativeOptimized: nativeOptimizedRun,
205
+ nativeSavings,
206
+ rtk: rtkRun,
207
+ rtkSavings,
208
+ });
209
+ }
210
+
211
+ const benchmarkReport = {
212
+ generatedAt: new Date().toISOString(),
213
+ reportName: 'token-optimization-benchmark',
214
+ methodology: {
215
+ tokenEstimate: `estimated_tokens = ceil(output_chars / ${TOKEN_ESTIMATE_DIVISOR})`,
216
+ scope: 'command-output estimate only; model-specific tokenization differs by provider',
217
+ repositoryRoot: REPOSITORY_ROOT,
218
+ },
219
+ environment: {
220
+ nodeVersion: process.version,
221
+ platform: process.platform,
222
+ rtk: rtkAvailability,
223
+ },
224
+ scenarios: scenarioResults,
225
+ summary: {
226
+ scenarioCount: scenarioResults.length,
227
+ averageNativeSavingsPercent: calculateAverageSavings(scenarioResults, 'nativeSavings'),
228
+ averageRtkSavingsPercent: calculateAverageSavings(scenarioResults, 'rtkSavings'),
229
+ },
230
+ artifact: {
231
+ path: REPORT_PATH,
232
+ writeMode: isStdoutOnlyMode ? 'stdout-only' : 'stdout-and-file',
233
+ },
234
+ };
235
+
236
+ if (!isStdoutOnlyMode) {
237
+ await fs.mkdir(dirname(REPORT_PATH), { recursive: true });
238
+ await fs.writeFile(REPORT_PATH, JSON.stringify(benchmarkReport, null, 2) + '\n', 'utf8');
239
+ }
240
+
241
+ return benchmarkReport;
242
+ }
243
+
244
+ runTokenOptimizationBenchmark()
245
+ .then((benchmarkReport) => {
246
+ console.log(JSON.stringify(benchmarkReport, null, 2));
247
+ })
248
+ .catch((benchmarkError) => {
249
+ const errorMessage = benchmarkError instanceof Error ? benchmarkError.message : String(benchmarkError);
250
+ console.error(`Token benchmark failed: ${errorMessage}`);
251
+ process.exit(1);
252
+ });