@upstash/context7-mcp 1.0.34-canary.1 → 1.0.34-canary.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +78 -25
- package/dist/lib/api.js +34 -5
- package/package.json +2 -10
- package/dist/benchmark/benchmark.js +0 -347
- package/dist/benchmark/compare-benchmark.js +0 -289
- package/dist/benchmark/run-benchmark.js +0 -459
- package/dist/benchmark/simulate.js +0 -319
|
@@ -1,289 +0,0 @@
|
|
|
1
|
-
import "dotenv/config";
|
|
2
|
-
import { readFileSync, readdirSync, writeFileSync, existsSync } from "fs";
|
|
3
|
-
import { join, dirname } from "path";
|
|
4
|
-
import { fileURLToPath } from "url";
|
|
5
|
-
import { generateText } from "ai";
|
|
6
|
-
import { anthropic } from "@ai-sdk/anthropic";
|
|
7
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
8
|
-
const __dirname = dirname(__filename);
|
|
9
|
-
// Package root is two levels up from dist/benchmark/
|
|
10
|
-
const packageRoot = join(__dirname, "..", "..");
|
|
11
|
-
const ARCHIVE_PATH = join(packageRoot, "src", "benchmark", "reports", "benchmarks", "archive");
|
|
12
|
-
// Question sets to compare (1-8)
|
|
13
|
-
const QUESTION_SETS = [1, 2, 3, 4, 5, 6, 7, 8];
|
|
14
|
-
/**
|
|
15
|
-
* Compare two benchmark runs across all question sets
|
|
16
|
-
*
|
|
17
|
-
* Usage:
|
|
18
|
-
* pnpm run compare-benchmark <prefix-a> <prefix-b>
|
|
19
|
-
*
|
|
20
|
-
* Example:
|
|
21
|
-
* pnpm run compare-benchmark CTX7-943-run-3 single-params-run-0
|
|
22
|
-
*
|
|
23
|
-
* This will compare folders:
|
|
24
|
-
* {prefix-a}-file-questions{1-8}-model-claude vs {prefix-b}-file-questions{1-8}-model-claude
|
|
25
|
-
*/
|
|
26
|
-
async function compareBenchmarks() {
|
|
27
|
-
const args = process.argv.slice(2);
|
|
28
|
-
if (args.length < 2) {
|
|
29
|
-
console.error("Usage: pnpm run compare-benchmark <prefix-a> <prefix-b>");
|
|
30
|
-
console.error("Example: pnpm run compare-benchmark CTX7-943-run-3 single-params-run-0");
|
|
31
|
-
console.error("");
|
|
32
|
-
console.error("This compares folders matching pattern: {prefix}-file-questions{1-8}-model-claude");
|
|
33
|
-
process.exit(1);
|
|
34
|
-
}
|
|
35
|
-
const prefixA = args[0];
|
|
36
|
-
const prefixB = args[1];
|
|
37
|
-
console.log("=".repeat(80));
|
|
38
|
-
console.log("Context7 Benchmark Comparison (All Question Sets)");
|
|
39
|
-
console.log("=".repeat(80));
|
|
40
|
-
console.log(`Prefix A: ${prefixA}`);
|
|
41
|
-
console.log(`Prefix B: ${prefixB}`);
|
|
42
|
-
console.log(`Judge: Claude Sonnet 4.5`);
|
|
43
|
-
console.log(`Archive Path: ${ARCHIVE_PATH}`);
|
|
44
|
-
console.log();
|
|
45
|
-
const model = anthropic("claude-sonnet-4-5");
|
|
46
|
-
const allResults = [];
|
|
47
|
-
const questionSetResults = [];
|
|
48
|
-
// Aggregate counters
|
|
49
|
-
let totalWinsA = 0;
|
|
50
|
-
let totalWinsB = 0;
|
|
51
|
-
let totalTies = 0;
|
|
52
|
-
// Process each question set
|
|
53
|
-
for (const questionSet of QUESTION_SETS) {
|
|
54
|
-
const folderNameA = `${prefixA}-file-questions${questionSet}-model-claude`;
|
|
55
|
-
const folderNameB = `${prefixB}-file-questions${questionSet}-model-claude`;
|
|
56
|
-
const folderA = join(ARCHIVE_PATH, folderNameA);
|
|
57
|
-
const folderB = join(ARCHIVE_PATH, folderNameB);
|
|
58
|
-
console.log("═".repeat(80));
|
|
59
|
-
console.log(`Question Set ${questionSet}`);
|
|
60
|
-
console.log("═".repeat(80));
|
|
61
|
-
// Check if both folders exist
|
|
62
|
-
if (!existsSync(folderA)) {
|
|
63
|
-
console.log(`⚠️ Skipping: Folder A not found: ${folderNameA}`);
|
|
64
|
-
console.log();
|
|
65
|
-
continue;
|
|
66
|
-
}
|
|
67
|
-
if (!existsSync(folderB)) {
|
|
68
|
-
console.log(`⚠️ Skipping: Folder B not found: ${folderNameB}`);
|
|
69
|
-
console.log();
|
|
70
|
-
continue;
|
|
71
|
-
}
|
|
72
|
-
// Find all raw files in both folders
|
|
73
|
-
const filesA = readdirSync(folderA).filter((f) => f.match(/^q\d+_raw\.md$/));
|
|
74
|
-
const filesB = readdirSync(folderB).filter((f) => f.match(/^q\d+_raw\.md$/));
|
|
75
|
-
// Get question numbers from both folders
|
|
76
|
-
const questionsA = new Set(filesA.map((f) => parseInt(f.match(/q(\d+)_raw\.md/)?.[1] || "0")));
|
|
77
|
-
const questionsB = new Set(filesB.map((f) => parseInt(f.match(/q(\d+)_raw\.md/)?.[1] || "0")));
|
|
78
|
-
// Find common questions
|
|
79
|
-
const commonQuestions = [...questionsA].filter((q) => questionsB.has(q)).sort((a, b) => a - b);
|
|
80
|
-
if (commonQuestions.length === 0) {
|
|
81
|
-
console.log(`⚠️ Skipping: No common questions found`);
|
|
82
|
-
console.log();
|
|
83
|
-
continue;
|
|
84
|
-
}
|
|
85
|
-
console.log(`Found ${commonQuestions.length} common questions`);
|
|
86
|
-
let setWinsA = 0;
|
|
87
|
-
let setWinsB = 0;
|
|
88
|
-
let setTies = 0;
|
|
89
|
-
const setResults = [];
|
|
90
|
-
// Process questions in batches for parallel execution
|
|
91
|
-
const BATCH_SIZE = 5;
|
|
92
|
-
for (let batchStart = 0; batchStart < commonQuestions.length; batchStart += BATCH_SIZE) {
|
|
93
|
-
const batchEnd = Math.min(batchStart + BATCH_SIZE, commonQuestions.length);
|
|
94
|
-
const batch = commonQuestions.slice(batchStart, batchEnd);
|
|
95
|
-
const batchPromises = batch.map(async (questionNum) => {
|
|
96
|
-
const fileA = join(folderA, `q${questionNum}_raw.md`);
|
|
97
|
-
const fileB = join(folderB, `q${questionNum}_raw.md`);
|
|
98
|
-
const contentA = readFileSync(fileA, "utf-8");
|
|
99
|
-
const contentB = readFileSync(fileB, "utf-8");
|
|
100
|
-
// Extract question from the content
|
|
101
|
-
const questionMatch = contentA.match(/QUESTION:\s*(.+?)(?:\n|$)/);
|
|
102
|
-
const question = questionMatch ? questionMatch[1].trim() : `Question ${questionNum}`;
|
|
103
|
-
// Extract just the context part (after CONTEXT:)
|
|
104
|
-
const extractContext = (content) => {
|
|
105
|
-
const contextStart = content.indexOf("CONTEXT:");
|
|
106
|
-
if (contextStart === -1)
|
|
107
|
-
return content;
|
|
108
|
-
return content.substring(contextStart + 8).trim();
|
|
109
|
-
};
|
|
110
|
-
const contextA = extractContext(contentA);
|
|
111
|
-
const contextB = extractContext(contentB);
|
|
112
|
-
console.log(` [Q${questionNum}] Comparing: ${question.substring(0, 45)}...`);
|
|
113
|
-
try {
|
|
114
|
-
const result = await generateText({
|
|
115
|
-
model,
|
|
116
|
-
messages: [
|
|
117
|
-
{
|
|
118
|
-
role: "user",
|
|
119
|
-
content: `You are a technical documentation expert evaluating which context is more helpful for answering a programming question.
|
|
120
|
-
|
|
121
|
-
QUESTION: ${question}
|
|
122
|
-
|
|
123
|
-
=== ANSWER A ===
|
|
124
|
-
${contextA}
|
|
125
|
-
|
|
126
|
-
=== ANSWER B ===
|
|
127
|
-
${contextB}
|
|
128
|
-
|
|
129
|
-
Compare these two answers and determine which one is better for helping a developer answer the question. Consider:
|
|
130
|
-
1. Relevance - Does it directly address the question?
|
|
131
|
-
2. Code examples - Are there working, relevant code snippets?
|
|
132
|
-
3. Completeness - Does it cover the key aspects needed?
|
|
133
|
-
4. Clarity - Is the information well-organized and easy to understand?
|
|
134
|
-
5. Accuracy - Does it appear technically correct?
|
|
135
|
-
|
|
136
|
-
Respond with ONLY a JSON object in this format:
|
|
137
|
-
{"winner": "A" | "B" | "tie", "reasoning": "<brief 1-2 sentence explanation>"}
|
|
138
|
-
|
|
139
|
-
If both are roughly equal in quality, respond with "tie".`,
|
|
140
|
-
},
|
|
141
|
-
],
|
|
142
|
-
});
|
|
143
|
-
// Parse the result
|
|
144
|
-
const jsonMatch = result.text.match(/\{[\s\S]*\}/);
|
|
145
|
-
if (jsonMatch) {
|
|
146
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
147
|
-
const rawWinner = String(parsed.winner).toUpperCase();
|
|
148
|
-
const winner = rawWinner === "A" ? "A" : rawWinner === "B" ? "B" : "tie";
|
|
149
|
-
console.log(` [Q${questionNum}] Winner: ${winner}`);
|
|
150
|
-
return {
|
|
151
|
-
questionSet,
|
|
152
|
-
questionNum,
|
|
153
|
-
question,
|
|
154
|
-
winner,
|
|
155
|
-
reasoning: parsed.reasoning,
|
|
156
|
-
};
|
|
157
|
-
}
|
|
158
|
-
else {
|
|
159
|
-
console.log(` [Q${questionNum}] ⚠️ Could not parse result, marking as tie`);
|
|
160
|
-
return {
|
|
161
|
-
questionSet,
|
|
162
|
-
questionNum,
|
|
163
|
-
question,
|
|
164
|
-
winner: "tie",
|
|
165
|
-
reasoning: "Failed to parse LLM response",
|
|
166
|
-
};
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
catch (error) {
|
|
170
|
-
console.error(` [Q${questionNum}] ❌ Error:`, error);
|
|
171
|
-
return {
|
|
172
|
-
questionSet,
|
|
173
|
-
questionNum,
|
|
174
|
-
question,
|
|
175
|
-
winner: "tie",
|
|
176
|
-
reasoning: `Error during comparison: ${error}`,
|
|
177
|
-
};
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
const batchResults = await Promise.all(batchPromises);
|
|
181
|
-
setResults.push(...batchResults);
|
|
182
|
-
// Update set counts
|
|
183
|
-
batchResults.forEach((r) => {
|
|
184
|
-
if (r.winner === "A")
|
|
185
|
-
setWinsA++;
|
|
186
|
-
else if (r.winner === "B")
|
|
187
|
-
setWinsB++;
|
|
188
|
-
else
|
|
189
|
-
setTies++;
|
|
190
|
-
});
|
|
191
|
-
}
|
|
192
|
-
// Store set results
|
|
193
|
-
questionSetResults.push({
|
|
194
|
-
questionSet,
|
|
195
|
-
winsA: setWinsA,
|
|
196
|
-
winsB: setWinsB,
|
|
197
|
-
ties: setTies,
|
|
198
|
-
results: setResults,
|
|
199
|
-
});
|
|
200
|
-
allResults.push(...setResults);
|
|
201
|
-
// Update totals
|
|
202
|
-
totalWinsA += setWinsA;
|
|
203
|
-
totalWinsB += setWinsB;
|
|
204
|
-
totalTies += setTies;
|
|
205
|
-
console.log(` Summary: A=${setWinsA}, B=${setWinsB}, Ties=${setTies}`);
|
|
206
|
-
console.log();
|
|
207
|
-
}
|
|
208
|
-
// Generate final summary
|
|
209
|
-
console.log("=".repeat(80));
|
|
210
|
-
console.log("COMPARISON RESULTS");
|
|
211
|
-
console.log("=".repeat(80));
|
|
212
|
-
console.log();
|
|
213
|
-
console.log("📊 Results by Question Set:");
|
|
214
|
-
questionSetResults.forEach((qsr) => {
|
|
215
|
-
console.log(` questions${qsr.questionSet}: A=${qsr.winsA}, B=${qsr.winsB}, Ties=${qsr.ties}`);
|
|
216
|
-
});
|
|
217
|
-
console.log();
|
|
218
|
-
console.log("📊 Aggregate Score:");
|
|
219
|
-
console.log(` ${prefixA}`);
|
|
220
|
-
console.log(` → ${totalWinsA} wins`);
|
|
221
|
-
console.log();
|
|
222
|
-
console.log(` ${prefixB}`);
|
|
223
|
-
console.log(` → ${totalWinsB} wins`);
|
|
224
|
-
console.log();
|
|
225
|
-
console.log(` Ties: ${totalTies}`);
|
|
226
|
-
console.log();
|
|
227
|
-
// Determine overall winner
|
|
228
|
-
if (totalWinsA > totalWinsB) {
|
|
229
|
-
console.log(`🏆 Winner: ${prefixA}`);
|
|
230
|
-
}
|
|
231
|
-
else if (totalWinsB > totalWinsA) {
|
|
232
|
-
console.log(`🏆 Winner: ${prefixB}`);
|
|
233
|
-
}
|
|
234
|
-
else {
|
|
235
|
-
console.log(`🤝 Result: TIE`);
|
|
236
|
-
}
|
|
237
|
-
console.log("=".repeat(80));
|
|
238
|
-
// Generate markdown report
|
|
239
|
-
let report = `# Benchmark Comparison Results\n\n`;
|
|
240
|
-
report += `**Date**: ${new Date().toISOString()}\n`;
|
|
241
|
-
report += `**Judge**: Claude Sonnet 4.5\n\n`;
|
|
242
|
-
report += `## Prefixes Compared\n\n`;
|
|
243
|
-
report += `- **Prefix A**: ${prefixA}\n`;
|
|
244
|
-
report += `- **Prefix B**: ${prefixB}\n\n`;
|
|
245
|
-
report += `## Aggregate Summary\n\n`;
|
|
246
|
-
report += `| Prefix | Wins |\n`;
|
|
247
|
-
report += `|--------|------|\n`;
|
|
248
|
-
report += `| ${prefixA} | ${totalWinsA} |\n`;
|
|
249
|
-
report += `| ${prefixB} | ${totalWinsB} |\n`;
|
|
250
|
-
report += `| Ties | ${totalTies} |\n\n`;
|
|
251
|
-
if (totalWinsA > totalWinsB) {
|
|
252
|
-
report += `**Winner**: ${prefixA}\n\n`;
|
|
253
|
-
}
|
|
254
|
-
else if (totalWinsB > totalWinsA) {
|
|
255
|
-
report += `**Winner**: ${prefixB}\n\n`;
|
|
256
|
-
}
|
|
257
|
-
else {
|
|
258
|
-
report += `**Result**: TIE\n\n`;
|
|
259
|
-
}
|
|
260
|
-
report += `## Results by Question Set\n\n`;
|
|
261
|
-
report += `| Question Set | A Wins | B Wins | Ties |\n`;
|
|
262
|
-
report += `|--------------|--------|--------|------|\n`;
|
|
263
|
-
questionSetResults.forEach((qsr) => {
|
|
264
|
-
report += `| questions${qsr.questionSet} | ${qsr.winsA} | ${qsr.winsB} | ${qsr.ties} |\n`;
|
|
265
|
-
});
|
|
266
|
-
report += `| **Total** | **${totalWinsA}** | **${totalWinsB}** | **${totalTies}** |\n\n`;
|
|
267
|
-
report += `## Detailed Results\n\n`;
|
|
268
|
-
questionSetResults.forEach((qsr) => {
|
|
269
|
-
report += `### Question Set ${qsr.questionSet}\n\n`;
|
|
270
|
-
report += `| Q# | Question | Winner | Reasoning |\n`;
|
|
271
|
-
report += `|----|----------|--------|----------|\n`;
|
|
272
|
-
qsr.results.forEach((r) => {
|
|
273
|
-
const shortQuestion = r.question.length > 35 ? r.question.substring(0, 35) + "..." : r.question;
|
|
274
|
-
const winnerLabel = r.winner === "A" ? "A" : r.winner === "B" ? "B" : "Tie";
|
|
275
|
-
report += `| ${r.questionNum} | ${shortQuestion} | ${winnerLabel} | ${r.reasoning.replace(/\|/g, "\\|")} |\n`;
|
|
276
|
-
});
|
|
277
|
-
report += `\n`;
|
|
278
|
-
});
|
|
279
|
-
// Save report
|
|
280
|
-
const reportPath = join(ARCHIVE_PATH, `comparison-${prefixA}-vs-${prefixB}.md`);
|
|
281
|
-
writeFileSync(reportPath, report);
|
|
282
|
-
console.log();
|
|
283
|
-
console.log(`📄 Report saved to: ${reportPath}`);
|
|
284
|
-
}
|
|
285
|
-
// Run comparison
|
|
286
|
-
compareBenchmarks().catch((error) => {
|
|
287
|
-
console.error("Fatal error:", error);
|
|
288
|
-
process.exit(1);
|
|
289
|
-
});
|