edsger 0.55.4 → 0.56.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/quality-benchmark/index.d.ts +32 -0
- package/dist/commands/quality-benchmark/index.js +124 -0
- package/dist/index.js +24 -0
- package/dist/phases/quality-benchmark/index.d.ts +65 -0
- package/dist/phases/quality-benchmark/index.js +194 -0
- package/dist/phases/quality-benchmark/mcp-server.d.ts +46 -0
- package/dist/phases/quality-benchmark/mcp-server.js +252 -0
- package/dist/phases/quality-benchmark/parsers.d.ts +22 -0
- package/dist/phases/quality-benchmark/parsers.js +1022 -0
- package/dist/phases/quality-benchmark/prompts.d.ts +31 -0
- package/dist/phases/quality-benchmark/prompts.js +154 -0
- package/dist/phases/quality-benchmark/rubric.md +1066 -0
- package/dist/phases/quality-benchmark/tool-catalog.d.ts +33 -0
- package/dist/phases/quality-benchmark/tool-catalog.js +597 -0
- package/dist/phases/quality-benchmark/tool-runner.d.ts +69 -0
- package/dist/phases/quality-benchmark/tool-runner.js +399 -0
- package/dist/phases/quality-benchmark/types.d.ts +312 -0
- package/dist/phases/quality-benchmark/types.js +23 -0
- package/package.json +4 -4
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt construction for the quality-benchmark SDK session.
|
|
3
|
+
*
|
|
4
|
+
* System prompt = the full rubric document (rubric.md) appended to
|
|
5
|
+
* Claude Code's default preset. The rubric is the single source of
|
|
6
|
+
* truth for: the 6-phase pipeline, the 8 dimensions, scoring anchors,
|
|
7
|
+
* N/A and Unmeasured rules, evidence/recommendation formats, the tool
|
|
8
|
+
* catalog (which the LLM cross-references against the MCP tools we
|
|
9
|
+
* expose), and the mandatory JSON output schema.
|
|
10
|
+
*
|
|
11
|
+
* User prompt = run-specific task context (product, repo path, options).
|
|
12
|
+
*/
|
|
13
|
+
export declare function createQualityBenchmarkSystemPrompt(): string;
|
|
14
|
+
export interface QualityBenchmarkUserPromptParams {
|
|
15
|
+
productName: string;
|
|
16
|
+
productId: string;
|
|
17
|
+
repoRoot: string;
|
|
18
|
+
branch?: string;
|
|
19
|
+
commitSha: string;
|
|
20
|
+
runId: string;
|
|
21
|
+
/** Did the user grant consent for on-demand tool installation? */
|
|
22
|
+
installEnabled: boolean;
|
|
23
|
+
/** Optional caller hint (e.g. "first run", "scheduled rerun"). */
|
|
24
|
+
reason?: string;
|
|
25
|
+
}
|
|
26
|
+
export declare function createQualityBenchmarkUserPrompt(params: QualityBenchmarkUserPromptParams): string;
|
|
27
|
+
/**
|
|
28
|
+
* Extract the final JSON report from the LLM's response. The rubric
|
|
29
|
+
* requires exactly one ```json block at the end of the response.
|
|
30
|
+
*/
|
|
31
|
+
export declare function extractReportJson(text: string): unknown | null;
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt construction for the quality-benchmark SDK session.
|
|
3
|
+
*
|
|
4
|
+
* System prompt = the full rubric document (rubric.md) appended to
|
|
5
|
+
* Claude Code's default preset. The rubric is the single source of
|
|
6
|
+
* truth for: the 6-phase pipeline, the 8 dimensions, scoring anchors,
|
|
7
|
+
* N/A and Unmeasured rules, evidence/recommendation formats, the tool
|
|
8
|
+
* catalog (which the LLM cross-references against the MCP tools we
|
|
9
|
+
* expose), and the mandatory JSON output schema.
|
|
10
|
+
*
|
|
11
|
+
* User prompt = run-specific task context (product, repo path, options).
|
|
12
|
+
*/
|
|
13
|
+
import { readFileSync } from 'node:fs';
|
|
14
|
+
import { dirname, join } from 'node:path';
|
|
15
|
+
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import { RUBRIC_VERSION } from './types.js';
|
|
17
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
18
|
+
// rubric.md is shipped alongside the compiled prompts.ts (copied by the
|
|
19
|
+
// build step) and lives next to it in src/. Try both locations so dev
|
|
20
|
+
// (tsx / vitest) and prod (dist) both work.
|
|
21
|
+
function loadRubric() {
|
|
22
|
+
const candidates = [
|
|
23
|
+
join(moduleDir, 'rubric.md'),
|
|
24
|
+
join(moduleDir, '..', '..', '..', 'src', 'phases', 'quality-benchmark', 'rubric.md'),
|
|
25
|
+
];
|
|
26
|
+
for (const path of candidates) {
|
|
27
|
+
try {
|
|
28
|
+
return readFileSync(path, 'utf8');
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
// try next
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
throw new Error('quality-benchmark: rubric.md not found next to prompts.ts. ' +
|
|
35
|
+
'Ensure the build step copies it into dist/.');
|
|
36
|
+
}
|
|
37
|
+
const RUBRIC = loadRubric();
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
// System prompt
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
export function createQualityBenchmarkSystemPrompt() {
|
|
42
|
+
return [
|
|
43
|
+
`You are running the Edsger Quality Benchmark, rubric version ${RUBRIC_VERSION}.`,
|
|
44
|
+
'',
|
|
45
|
+
'Your task is to produce one JSON report at the end of your response that',
|
|
46
|
+
'matches the schema in the rubric. Drive the 6-phase pipeline using the',
|
|
47
|
+
'MCP tools exposed under the "quality-benchmark" server, NOT the raw Bash',
|
|
48
|
+
'tool, for anything that runs a code-quality tool. Bash is fine for git',
|
|
49
|
+
'commands, reading manifests, and other repo inspection.',
|
|
50
|
+
'',
|
|
51
|
+
'MCP tools available to you (all under the `mcp__quality-benchmark__` prefix):',
|
|
52
|
+
' list_applicable_tools — returns the catalog subset for the detected repo',
|
|
53
|
+
' probe_tool — check whether a tool is installed and reachable',
|
|
54
|
+
' install_tool — install a missing tool (user-space, no sudo)',
|
|
55
|
+
' run_tool — execute a tool and receive its parsed summary',
|
|
56
|
+
' verify_finding — confirm a file:line is real before citing it',
|
|
57
|
+
' record_progress — surface a status message to the UI',
|
|
58
|
+
'',
|
|
59
|
+
'IMPORTANT discipline rules (also in the rubric — repeated for emphasis):',
|
|
60
|
+
' 1. Never run a quality tool through Bash. Always use run_tool.',
|
|
61
|
+
' The MCP server captures, parses, and saves outputs deterministically.',
|
|
62
|
+
' Using Bash bypasses parsing and verification.',
|
|
63
|
+
' 2. Always probe_tool before run_tool. If unavailable, decide whether to',
|
|
64
|
+
' install_tool (if install_command is non-null) or accept "unmeasured".',
|
|
65
|
+
' 3. Every evidence entry MUST cite a real file:line. Call verify_finding',
|
|
66
|
+
' before including any finding sourced from llm_judgment. Drop ones',
|
|
67
|
+
' that fail verification.',
|
|
68
|
+
' 4. End your response with exactly ONE ```json code block matching the',
|
|
69
|
+
" rubric's output schema. No prose after it.",
|
|
70
|
+
'',
|
|
71
|
+
'--- BEGIN RUBRIC ---',
|
|
72
|
+
RUBRIC,
|
|
73
|
+
'--- END RUBRIC ---',
|
|
74
|
+
].join('\n');
|
|
75
|
+
}
|
|
76
|
+
export function createQualityBenchmarkUserPrompt(params) {
|
|
77
|
+
const lines = [];
|
|
78
|
+
lines.push(`Quality benchmark run for product **${params.productName}** (id: ${params.productId}).`);
|
|
79
|
+
lines.push('');
|
|
80
|
+
lines.push(`Repo: \`${params.repoRoot}\``);
|
|
81
|
+
if (params.branch) {
|
|
82
|
+
lines.push(`Branch: \`${params.branch}\``);
|
|
83
|
+
}
|
|
84
|
+
lines.push(`Commit: \`${params.commitSha}\``);
|
|
85
|
+
lines.push(`Run id: \`${params.runId}\``);
|
|
86
|
+
lines.push(`Install consent: ${params.installEnabled ? 'GRANTED — you may call install_tool for missing tools' : 'NOT GRANTED — do not call install_tool; missing tools must be marked unmeasured'}`);
|
|
87
|
+
if (params.reason) {
|
|
88
|
+
lines.push(`Context: ${params.reason}`);
|
|
89
|
+
}
|
|
90
|
+
lines.push('');
|
|
91
|
+
lines.push('Run the 6-phase pipeline now. Use record_progress at each phase');
|
|
92
|
+
lines.push('boundary so the user sees what you are doing. End with the');
|
|
93
|
+
lines.push('mandatory JSON report.');
|
|
94
|
+
return lines.join('\n');
|
|
95
|
+
}
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Output extraction
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
/**
|
|
100
|
+
* Extract the final JSON report from the LLM's response. The rubric
|
|
101
|
+
* requires exactly one ```json block at the end of the response.
|
|
102
|
+
*/
|
|
103
|
+
export function extractReportJson(text) {
|
|
104
|
+
if (!text) {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
// Scan for fenced ```json blocks; take the last one (rubric says final).
|
|
108
|
+
const blocks = [];
|
|
109
|
+
const re = /```json\s*\n([\s\S]*?)```/g;
|
|
110
|
+
let m;
|
|
111
|
+
while ((m = re.exec(text)) !== null) {
|
|
112
|
+
blocks.push(m[1].trim());
|
|
113
|
+
}
|
|
114
|
+
if (blocks.length === 0) {
|
|
115
|
+
return tryParseLooseJson(text);
|
|
116
|
+
}
|
|
117
|
+
const last = blocks[blocks.length - 1];
|
|
118
|
+
try {
|
|
119
|
+
return JSON.parse(last);
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Fallback: scan the end of the text for the last balanced { ... } block
|
|
127
|
+
* (handles the case where the LLM omitted the fence markers).
|
|
128
|
+
*/
|
|
129
|
+
function tryParseLooseJson(text) {
|
|
130
|
+
const trimmed = text.trim();
|
|
131
|
+
let depth = 0;
|
|
132
|
+
let end = -1;
|
|
133
|
+
for (let i = trimmed.length - 1; i >= 0; i--) {
|
|
134
|
+
const ch = trimmed[i];
|
|
135
|
+
if (ch === '}') {
|
|
136
|
+
if (end === -1) {
|
|
137
|
+
end = i;
|
|
138
|
+
}
|
|
139
|
+
depth++;
|
|
140
|
+
}
|
|
141
|
+
else if (ch === '{') {
|
|
142
|
+
depth--;
|
|
143
|
+
if (depth === 0) {
|
|
144
|
+
try {
|
|
145
|
+
return JSON.parse(trimmed.slice(i, end + 1));
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
}
|