@mario-gc/pi-context7 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Unit tests for BM25 scoring and cache timing.
3
+ *
4
+ * Covers:
5
+ * - BM25 cache hit for semantically equivalent reordered queries (same tokens, different order)
6
+ * - BM25 cache miss for queries sharing only stopwords or insufficient term overlap
7
+ * - cache.set writes file + updates manifest before returning (timing guarantee)
8
+ * - cache.get returns BM25 hit after an awaited set completes
9
+ *
10
+ * @module extensions/cache.test
11
+ */
12
+
13
+ import { describe, it, before, after } from "node:test";
14
+ import assert from "node:assert/strict";
15
+ import { rm, mkdir, readFile, access } from "node:fs/promises";
16
+ import { join } from "node:path";
17
+ import { tmpdir } from "node:os";
18
+
19
+ import {
20
+ bm25Find,
21
+ tokenize,
22
+ tokenizeForScoring,
23
+ type ManifestEntry,
24
+ } from "./cache.ts";
25
+ import { createCache } from "./cache.ts";
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Test fixtures — a temp cache root that doesn't touch the real cache
29
+ // ---------------------------------------------------------------------------
30
+
31
+ const TEST_CACHE_ROOT = join(tmpdir(), `context7-cache-test-${Date.now()}`);
32
+
33
+ before(async () => {
34
+ process.env.CONTEXT7_CACHE_ROOT = TEST_CACHE_ROOT;
35
+ await mkdir(TEST_CACHE_ROOT, { recursive: true });
36
+ });
37
+
38
+ after(async () => {
39
+ delete process.env.CONTEXT7_CACHE_ROOT;
40
+ try {
41
+ await rm(TEST_CACHE_ROOT, { recursive: true, force: true });
42
+ } catch {
43
+ // best effort
44
+ }
45
+ });
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // Helper: build a minimal manifest entry
49
+ // ---------------------------------------------------------------------------
50
+
51
+ function makeEntry(query: string, hash: string, scope: Record<string, string> = {}): ManifestEntry {
52
+ return {
53
+ scope,
54
+ query,
55
+ hash,
56
+ cachedAt: 0,
57
+ ttl: 300,
58
+ size: 100,
59
+ };
60
+ }
61
+
62
+ // ===========================================================================
63
+ // BM25 Scoring Tests
64
+ // ===========================================================================
65
+
66
+ describe("BM25 scoring", () => {
67
+ describe("tokenizeForScoring", () => {
68
+ it("removes English stopwords", () => {
69
+ const tokens = tokenizeForScoring("how to set up express");
70
+ // "how", "to", "set", "up" are stopwords; only "express" remains
71
+ assert.deepEqual(tokens, ["express"]);
72
+ });
73
+
74
+ it("keeps domain-specific terms", () => {
75
+ const tokens = tokenizeForScoring("useState hook patterns in react");
76
+ // "in" is a stopword; the rest are domain terms
77
+ assert.deepEqual(tokens, ["usestate", "hook", "patterns", "react"]);
78
+ });
79
+
80
+ it("returns same tokens for reordered queries", () => {
81
+ const a = tokenizeForScoring("best practices for layout and content");
82
+ const b = tokenizeForScoring("best practices for content and layout");
83
+ // "for" and "and" are stopwords; both produce [best, practices, layout, content]
84
+ assert.deepEqual(a.sort(), b.sort());
85
+ assert.deepEqual(a.sort(), ["best", "content", "layout", "practices"]);
86
+ });
87
+ });
88
+
89
+ describe("bm25Find — cache hit for reordered equivalent queries", () => {
90
+ it("matches same tokens in different order", () => {
91
+ // Criterion 1 & 4: two queries with same tokens, different order
92
+ const entries = [
93
+ makeEntry("best practices for layout and content", "h1", { libraryId: "/test/lib" }),
94
+ ];
95
+
96
+ const result = bm25Find("best practices for content and layout", entries, 0.5);
97
+
98
+ assert.notEqual(result, null, "Should match reordered query");
99
+ assert.equal(result!.hash, "h1");
100
+ });
101
+
102
+ it("matches regardless of which query is cached vs looked up", () => {
103
+ const entries = [
104
+ makeEntry("best practices for content and layout", "h2", { libraryId: "/test/lib" }),
105
+ ];
106
+
107
+ const result = bm25Find("best practices for layout and content", entries, 0.5);
108
+
109
+ assert.notEqual(result, null);
110
+ assert.equal(result!.hash, "h2");
111
+ });
112
+ });
113
+
114
+ describe("bm25Find — cache miss for stopword-only overlap", () => {
115
+ it("rejects queries sharing only stopwords", () => {
116
+ // Criterion 5: queries sharing only stopwords after filtering
117
+ // "how to configure nextjs" → tokens: [configure, nextjs] (how, to are stopwords)
118
+ // "how to set up express" → tokens: [express] (how, to, set, up are stopwords)
119
+ // Overlap: 0 matching terms → miss
120
+ const entries = [
121
+ makeEntry("how to set up express", "h1", { libraryId: "/test/lib" }),
122
+ ];
123
+
124
+ const result = bm25Find("how to configure nextjs", entries, 0.5);
125
+
126
+ assert.equal(result, null, "Should not match on stopword-only overlap");
127
+ });
128
+ });
129
+
130
+ describe("bm25Find — cache miss for insufficient overlap", () => {
131
+ it("rejects queries with < 50% term overlap", () => {
132
+ // Criterion 5: "useState hook patterns" vs "useEffect hook cleanup"
133
+ // shares only "hook" (1 of 3, 33%) — below the 50% threshold
134
+ const entries = [
135
+ makeEntry("useState hook patterns", "h1", { libraryId: "/test/lib" }),
136
+ ];
137
+
138
+ const result = bm25Find("useEffect hook cleanup", entries, 0.5);
139
+
140
+ assert.equal(result, null, "Should not match with <50% term overlap");
141
+ });
142
+
143
+ it("rejects single shared term even if it's domain-specific", () => {
144
+ // "hook" is a domain term, but sharing only 1 of 3 terms (33%) is insufficient
145
+ const entries = [
146
+ makeEntry("react hook forms validation", "h1", { libraryId: "/test/lib" }),
147
+ ];
148
+
149
+ // "hook" matches, but 1/3 = 33% < 50%
150
+ const result = bm25Find("custom hook rendering", entries, 0.5);
151
+
152
+ assert.equal(result, null, "Should not match with single shared term");
153
+ });
154
+ });
155
+
156
+ describe("bm25Find — edge cases", () => {
157
+ it("returns null for empty query", () => {
158
+ const entries = [makeEntry("some cached query", "h1")];
159
+ assert.equal(bm25Find("", entries, 0.5), null);
160
+ });
161
+
162
+ it("returns null for all-stopword query", () => {
163
+ const entries = [makeEntry("how to use express", "h1")];
164
+ // "how to use" are all stopwords → queryTokens is empty after filtering
165
+ assert.equal(bm25Find("how to for", entries, 0.5), null);
166
+ });
167
+
168
+ it("returns null when no entries provided", () => {
169
+ assert.equal(bm25Find("some query", [], 0.5), null);
170
+ });
171
+ });
172
+ });
173
+
174
+ // ===========================================================================
175
+ // Cache Timing Tests
176
+ // ===========================================================================
177
+
178
+ describe("cache timing", () => {
179
+ describe("cache.set writes file and updates manifest before returning", () => {
180
+ it("file exists on disk after awaited set()", async () => {
181
+ // Criterion 6: cache.set completes (file written + manifest updated) before returning
182
+ const cache = createCache();
183
+ await cache.init();
184
+
185
+ const scope = { libraryId: "/timing/file-test" };
186
+ const params = { libraryId: "/timing/file-test", query: "file existence check" };
187
+ const data = { results: [{ id: 1, text: "test data" }] };
188
+
189
+ await cache.set("context", scope, params, data);
190
+
191
+ // The cache file should exist on disk immediately after set() returns
192
+ // We verify via cache.get() which reads from disk — if the file wasn't
193
+ // written, this would return a cache miss.
194
+ const result = await cache.get("context", scope, params);
195
+ assert.equal(result.source, "exact", "File should be readable immediately after awaited set");
196
+ assert.deepEqual(result.data, data);
197
+ });
198
+
199
+ it("manifest entry is visible to subsequent get() after awaited set()", async () => {
200
+ // This directly tests the fire-and-forget fix: with await, the manifest
201
+ // is updated before set() returns, so a subsequent BM25 lookup sees it.
202
+ const cache = createCache();
203
+ await cache.init();
204
+
205
+ const scope = { libraryId: "/timing/manifest-test" };
206
+ const paramsA = { libraryId: "/timing/manifest-test", query: "best practices for layout and content" };
207
+ const paramsB = { libraryId: "/timing/manifest-test", query: "best practices for content and layout" };
208
+ const data = { snippets: [{ text: "cached response" }] };
209
+
210
+ // 1. Write entry A
211
+ await cache.set("context", scope, paramsA, data);
212
+
213
+ // 2. Look up with query B (same tokens, different order) — should BM25 hit
214
+ const result = await cache.get("context", scope, paramsB);
215
+ assert.equal(result.source, "bm25", "BM25 should find the entry written by awaited set()");
216
+ assert.deepEqual(result.data, data);
217
+ });
218
+
219
+ it("awaited set() makes entry visible for exact match too", async () => {
220
+ const cache = createCache();
221
+ await cache.init();
222
+
223
+ const scope = { libraryName: "timing-lib" };
224
+ const params = { libraryName: "timing-lib", query: "exact match timing" };
225
+ const data = { results: [{ id: "timing", title: "Timing Test" }] };
226
+
227
+ await cache.set("search", scope, params, data);
228
+
229
+ const result = await cache.get("search", scope, params);
230
+ assert.equal(result.source, "exact");
231
+ assert.deepEqual(result.data, data);
232
+ });
233
+ });
234
+
235
+ describe("sequential cache writes produce BM25 hits", () => {
236
+ it("simulates two sequential tool calls: second hits cache from first", async () => {
237
+ // This simulates the real-world scenario from the spec:
238
+ // two context7_get_context calls with reordered query terms
239
+ // in the same LLM response, executed sequentially.
240
+ const cache = createCache();
241
+ await cache.init();
242
+
243
+ const scope = { libraryId: "/sequential/sim" };
244
+ const queryA = "best practices for layout and content";
245
+ const queryB = "best practices for content and layout";
246
+
247
+ const paramsA = { libraryId: "/sequential/sim", query: queryA, type: "json" };
248
+ const paramsB = { libraryId: "/sequential/sim", query: queryB, type: "json" };
249
+
250
+ const apiData = { codeSnippets: [{ codeTitle: "Example", code: "console.log(1)" }] };
251
+
252
+ // --- First tool call (simulated) ---
253
+ // Cache miss → fetch from API → await cache.set
254
+ let result = await cache.get("context", scope, paramsA);
255
+ assert.equal(result.source, null, "First call should be a cache miss");
256
+
257
+ // Simulate the awaited cache.set (as the tool now does)
258
+ await cache.set("context", scope, paramsA, apiData);
259
+
260
+ // --- Second tool call (simulated) ---
261
+ // With sequential execution + awaited set, the manifest is updated.
262
+ // queryB has the same tokens as queryA → BM25 hit.
263
+ result = await cache.get("context", scope, paramsB);
264
+ assert.equal(result.source, "bm25", "Second call should BM25 hit the first call's cache");
265
+ assert.deepEqual(result.data, apiData);
266
+ });
267
+ });
268
+ });
@@ -24,7 +24,17 @@ import { homedir } from "node:os";
24
24
  // Constants
25
25
  // ---------------------------------------------------------------------------
26
26
 
27
- const CACHE_ROOT = join(homedir(), ".pi", "agent", "cache", "context7");
27
+ /**
28
+ * Resolve the cache root directory.
29
+ *
30
+ * Reads `CONTEXT7_CACHE_ROOT` at call time (not import time) so that tests
31
+ * can set the env var before creating a cache instance.
32
+ */
33
+ function getCacheRoot(): string {
34
+ return process.env.CONTEXT7_CACHE_ROOT
35
+ ? join(process.env.CONTEXT7_CACHE_ROOT, "context7")
36
+ : join(homedir(), ".pi", "agent", "cache", "context7");
37
+ }
28
38
 
29
39
  /** Subdirectory name for each endpoint. */
30
40
  const DIR_NAMES: Record<string, string> = {
@@ -42,10 +52,67 @@ const MAX_CACHE_SIZE = 52_428_800; // 50 MB in bytes
42
52
 
43
53
  const BM25_K1 = 1.2;
44
54
  const BM25_B = 0.75;
45
- const BM25_CONSTANT_IDF = 1.5;
46
55
 
47
- const BM25_THRESHOLD_ONLINE = 0.7;
48
- const BM25_THRESHOLD_OFFLINE = 0.5;
56
+ /**
57
+ * Minimum IDF floor to prevent small-corpus artifacts.
58
+ *
59
+ * With very few documents (N=1-3), the IDF smoothing formula can produce
60
+ * artificially low values for shared terms vs unique terms, distorting
61
+ * the self-match normalization. This floor ensures no term's IDF drops
62
+ * below a reasonable minimum.
63
+ */
64
+ const BM25_IDF_MIN = 0.5;
65
+
66
+ /**
67
+ * Minimum fraction of query terms that must appear in a document for it
68
+ * to be considered a valid match (prevents single-shared-term false positives).
69
+ * Must be strictly greater than this threshold to pass.
70
+ */
71
+ const BM25_MIN_OVERLAP = 0.5;
72
+
73
+ /**
74
+ * Minimum number of matching terms required regardless of overlap ratio.
75
+ * Prevents false matches when query has very few terms (2-3) and one happens
76
+ * to match by coincidence (e.g., "useState hook" vs "useEffect hook" sharing "hook").
77
+ */
78
+ const BM25_MIN_MATCHING_TERMS = 2;
79
+
80
+ /**
81
+ * BM25 score thresholds for cache hits.
82
+ *
83
+ * Combined with the IDF floor (0.5) and minimum overlap check (50%),
84
+ * these thresholds ensure meaningful term matches while rejecting
85
+ * false positives from stopwords alone.
86
+ *
87
+ * With IDF floor=0.5 and k1=1.2, each matching term contributes
88
+ * roughly 0.3-0.7 to the score. A threshold of 0.5 requires at
89
+ * least one solid term match; 0.3 is more permissive for offline mode.
90
+ */
91
+ const BM25_THRESHOLD_ONLINE = 0.5;
92
+ const BM25_THRESHOLD_OFFLINE = 0.3;
93
+
94
+ /**
95
+ * English stopword set applied before BM25 scoring.
96
+ *
97
+ * Contains only structural English words and very generic verbs
98
+ * ("how", "to", "for", "use", "set", etc.). Domain-specific terms
99
+ * ("hook", "middleware", "auth", "router", "component", "state") are
100
+ * intentionally NOT included — they carry the semantic meaning BM25
101
+ * must discriminate on.
102
+ */
103
+ const STOPWORDS = new Set([
104
+ "a", "an", "the", "and", "or", "but", "is", "are", "was", "were",
105
+ "be", "been", "being", "have", "has", "had", "do", "does", "did",
106
+ "will", "would", "could", "should", "may", "might", "must", "can",
107
+ "how", "to", "for", "in", "on", "at", "by", "with", "from", "of",
108
+ "as", "into", "about", "than", "then", "so", "if", "because",
109
+ "what", "which", "who", "when", "where", "why", "this", "that",
110
+ "these", "those", "i", "you", "he", "she", "it", "we", "they",
111
+ "my", "your", "his", "her", "its", "our", "their",
112
+ "use", "using", "used", "get", "getting", "set", "setting",
113
+ "up", "down", "out", "over", "under", "again",
114
+ "not", "no", "nor", "too", "very", "just", "also", "only",
115
+ ]);
49
116
 
50
117
  // ---------------------------------------------------------------------------
51
118
  // Types
@@ -63,7 +130,7 @@ export interface CacheResult {
63
130
  entry?: CacheEntry;
64
131
  }
65
132
 
66
- interface ManifestEntry {
133
+ export interface ManifestEntry {
67
134
  scope: Record<string, string>;
68
135
  query: string;
69
136
  hash: string;
@@ -154,8 +221,9 @@ function extractQueryText(params: Record<string, string | boolean | undefined>):
154
221
  * Tokenize text for BM25 scoring.
155
222
  *
156
223
  * Lowercases, splits on non-alphanumeric characters, filters empty tokens.
224
+ * The raw token list is used as the basis for stopword-aware scoring.
157
225
  */
158
- function tokenize(text: string): string[] {
226
+ export function tokenize(text: string): string[] {
159
227
  return text
160
228
  .toLowerCase()
161
229
  .split(/[^a-z0-9]+/)
@@ -163,11 +231,61 @@ function tokenize(text: string): string[] {
163
231
  }
164
232
 
165
233
  /**
166
- * Compute BM25 score for a single query/document pair.
234
+ * Tokenize text and remove English stopwords before BM25 scoring.
167
235
  *
168
- * Implements the simplified BM25 formula described in the spec.
236
+ * Stopwords contribute no discriminating signal with a proper
237
+ * corpus-frequency IDF they would score near zero anyway, but filtering
238
+ * them up front keeps the token lists short and the self-match
239
+ * normalization meaningful (the self-score reflects only terms that
240
+ * actually carry semantic weight).
169
241
  */
170
- function bm25Score(queryTokens: string[], docTokens: string[], avgDocLen: number): number {
242
+ export function tokenizeForScoring(text: string): string[] {
243
+ return tokenize(text).filter((t) => !STOPWORDS.has(t));
244
+ }
245
+
246
+ /**
247
+ * Compute the BM25+ smoothed inverse document frequency for a term.
248
+ *
249
+ * IDF = ln(1 + (N - n + 0.5) / (n + 0.5))
250
+ *
251
+ * where N = total candidate documents and n = documents whose token list
252
+ * contains the term. The `+1` smoothing prevents negative IDF values for
253
+ * terms that appear in every document, which would otherwise subtract
254
+ * from the score and destabilize the self-match normalization.
255
+ */
256
+ export function computeIdf(term: string, docTokenLists: string[][]): number {
257
+ const N = docTokenLists.length;
258
+ const n = docTokenLists.filter((tokens) => tokens.includes(term)).length;
259
+ const raw = Math.log(1 + (N - n + 0.5) / (n + 0.5));
260
+ return Math.max(raw, BM25_IDF_MIN);
261
+ }
262
+
263
+ /**
264
+ * Build an IDF map for the (de-duplicated) query terms against a corpus.
265
+ */
266
+ export function buildIdfMap(queryTokens: string[], docTokenLists: string[][]): Map<string, number> {
267
+ const idf = new Map<string, number>();
268
+ for (const term of new Set(queryTokens)) {
269
+ idf.set(term, computeIdf(term, docTokenLists));
270
+ }
271
+ return idf;
272
+ }
273
+
274
+ /**
275
+ * Compute the raw (unnormalized) BM25 score for a single query/document pair.
276
+ *
277
+ * score += tf * idf
278
+ *
279
+ * where tf = (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * (docLen / avgDocLen)))
280
+ * and idf is looked up from the precomputed `idfMap`. Terms missing from the
281
+ * map are treated as having zero IDF.
282
+ */
283
+ export function bm25Score(
284
+ queryTokens: string[],
285
+ docTokens: string[],
286
+ avgDocLen: number,
287
+ idfMap: Map<string, number>,
288
+ ): number {
171
289
  const docLen = docTokens.length;
172
290
  const avgLen = avgDocLen > 0 ? avgDocLen : 1;
173
291
 
@@ -178,7 +296,7 @@ function bm25Score(queryTokens: string[], docTokens: string[], avgDocLen: number
178
296
  const tf =
179
297
  (freq * (BM25_K1 + 1)) /
180
298
  (freq + BM25_K1 * (1 - BM25_B + BM25_B * (docLen / avgLen)));
181
- score += tf * BM25_CONSTANT_IDF;
299
+ score += tf * (idfMap.get(term) ?? 0);
182
300
  }
183
301
  }
184
302
  return score;
@@ -187,27 +305,52 @@ function bm25Score(queryTokens: string[], docTokens: string[], avgDocLen: number
187
305
  /**
188
306
  * Run BM25 against a list of manifest entries and return the best match.
189
307
  *
308
+ * Scoring pipeline:
309
+ * 1. Tokenize the query and each candidate query with stopword filtering.
310
+ * 2. Filter out documents with insufficient term overlap (< 50% of query terms).
311
+ * 3. Compute a corpus-frequency IDF for each query term (with floor).
312
+ * 4. Score every remaining candidate and keep the highest.
313
+ * 5. Return the best entry only if its raw score meets `threshold`.
314
+ *
315
+ * The combination of overlap check + IDF floor + raw score threshold
316
+ * prevents false positives from stopwords while correctly matching
317
+ * queries that share meaningful domain terms.
318
+ *
190
319
  * @returns The best matching entry, or null if none reach the threshold.
191
320
  */
192
- function bm25Find(query: string, entries: ManifestEntry[], threshold: number): ManifestEntry | null {
193
- const queryTokens = tokenize(query);
321
+ export function bm25Find(query: string, entries: ManifestEntry[], threshold: number): ManifestEntry | null {
322
+ const queryTokens = tokenizeForScoring(query);
194
323
  if (queryTokens.length === 0) return null;
195
324
 
196
- const docTokenLists = entries.map((e) => tokenize(e.query));
325
+ const docTokenLists = entries.map((e) => tokenizeForScoring(e.query));
197
326
  const avgDocLen =
198
327
  docTokenLists.reduce((sum, t) => sum + t.length, 0) / Math.max(entries.length, 1);
199
328
 
329
+ const idfMap = buildIdfMap(queryTokens, docTokenLists);
330
+
200
331
  let bestScore = 0;
201
332
  let bestEntry: ManifestEntry | null = null;
202
333
 
203
334
  for (let i = 0; i < entries.length; i++) {
204
- const score = bm25Score(queryTokens, docTokenLists[i], avgDocLen);
335
+ const docTokens = docTokenLists[i];
336
+
337
+ // Check term overlap: must have both sufficient ratio AND minimum count.
338
+ // This prevents single-shared-term false positives (e.g., "useState hook"
339
+ // vs "useEffect hook" sharing only "hook").
340
+ const matchingTerms = queryTokens.filter((t) => docTokens.includes(t)).length;
341
+ const overlapRatio = matchingTerms / queryTokens.length;
342
+ if (overlapRatio <= BM25_MIN_OVERLAP || matchingTerms < BM25_MIN_MATCHING_TERMS) continue;
343
+
344
+ const score = bm25Score(queryTokens, docTokens, avgDocLen, idfMap);
205
345
  if (score > bestScore) {
206
346
  bestScore = score;
207
347
  bestEntry = entries[i];
208
348
  }
209
349
  }
210
350
 
351
+ // Edge case: if bestScore is zero, no terms matched.
352
+ if (bestScore <= 0) return null;
353
+
211
354
  return bestScore >= threshold ? bestEntry : null;
212
355
  }
213
356
 
@@ -216,15 +359,15 @@ function bm25Find(query: string, entries: ManifestEntry[], threshold: number): M
216
359
  // ---------------------------------------------------------------------------
217
360
 
218
361
  function getEndpointDir(endpoint: "search" | "context"): string {
219
- return join(CACHE_ROOT, DIR_NAMES[endpoint]);
362
+ return join(getCacheRoot(), DIR_NAMES[endpoint]);
220
363
  }
221
364
 
222
365
  function getManifestPath(endpoint: "search" | "context"): string {
223
- return join(CACHE_ROOT, `${DIR_NAMES[endpoint]}.json`);
366
+ return join(getCacheRoot(), `${DIR_NAMES[endpoint]}.json`);
224
367
  }
225
368
 
226
369
  function getManifestTempPath(endpoint: "search" | "context"): string {
227
- return join(CACHE_ROOT, `${DIR_NAMES[endpoint]}.json.tmp`);
370
+ return join(getCacheRoot(), `${DIR_NAMES[endpoint]}.json.tmp`);
228
371
  }
229
372
 
230
373
  function getEntryPath(endpoint: "search" | "context", hash: string): string {
@@ -321,8 +464,8 @@ async function init(): Promise<void> {
321
464
  if (initialized) return;
322
465
 
323
466
  // 1. Ensure directories exist
324
- await mkdir(join(CACHE_ROOT, "libraries"), { recursive: true });
325
- await mkdir(join(CACHE_ROOT, "contexts"), { recursive: true });
467
+ await mkdir(join(getCacheRoot(), "libraries"), { recursive: true });
468
+ await mkdir(join(getCacheRoot(), "contexts"), { recursive: true });
326
469
 
327
470
  // 2. Load manifests
328
471
  for (const endpoint of ["search", "context"] as const) {
@@ -359,11 +502,11 @@ async function init(): Promise<void> {
359
502
 
360
503
  // Also clean manifest .tmp files from the root cache dir
361
504
  try {
362
- const rootFiles = await readdir(CACHE_ROOT);
505
+ const rootFiles = await readdir(getCacheRoot());
363
506
  for (const file of rootFiles) {
364
507
  if (file.endsWith(".tmp")) {
365
508
  try {
366
- await unlink(join(CACHE_ROOT, file));
509
+ await unlink(join(getCacheRoot(), file));
367
510
  } catch {
368
511
  // ignore
369
512
  }
@@ -15,6 +15,10 @@ import { readFileSync } from "node:fs";
15
15
  import { homedir } from "node:os";
16
16
  import { join } from "node:path";
17
17
  import { createCache, type CacheModule } from "./cache.js";
18
+ import {
19
+ computeQualityScore,
20
+ getStars,
21
+ } from "./ranking.js";
18
22
 
19
23
  export default function (pi: ExtensionAPI) {
20
24
  let cache: CacheModule;
@@ -210,6 +214,7 @@ export default function (pi: ExtensionAPI) {
210
214
  pi.registerTool({
211
215
  name: "context7_search_library",
212
216
  label: "Context7 Search Library",
217
+ executionMode: "sequential",
213
218
  description:
214
219
  "Search Context7 for libraries by name. Returns matching libraries with IDs, descriptions, " +
215
220
  "trust scores, and available versions. Use this first to resolve a library name to a " +
@@ -273,8 +278,12 @@ export default function (pi: ExtensionAPI) {
273
278
  signal,
274
279
  );
275
280
  results = raw.results ?? [];
276
- // Store in cache (fire-and-forget)
277
- cache.set("search", { libraryName: params.libraryName }, fetchParams, raw).catch(() => {});
281
+ // Store in cache (await to guarantee manifest is updated before returning)
282
+ await cache
283
+ .set("search", { libraryName: params.libraryName }, fetchParams, raw)
284
+ .catch((err) => {
285
+ console.error("[context7] cache write failed:", err);
286
+ });
278
287
  cacheNote = "\n[fetched from API]";
279
288
  }
280
289
 
@@ -290,37 +299,96 @@ export default function (pi: ExtensionAPI) {
290
299
  };
291
300
  }
292
301
 
293
- // Format output
294
- const lines: string[] = [
295
- `Found ${results.length} libraries for "${params.libraryName}":`,
296
- ];
302
+ // -----------------------------------------------------------------------
303
+ // Library auto-ranking: filter non-finalized, compute composite quality
304
+ // score, sort, and show top 3 with a Recommended marker.
305
+ // Weights and scoring logic live in ranking.ts (imported at module top).
306
+ // -----------------------------------------------------------------------
307
+
308
+ // Step 1 — Filter non-finalized libraries
309
+ const finalized = results.filter((lib) => {
310
+ const state = (lib as Record<string, unknown>).state;
311
+ return state === "finalized" || state === undefined; // keep if finalized or field missing
312
+ });
313
+
314
+ if (finalized.length === 0) {
315
+ return {
316
+ content: [
317
+ {
318
+ type: "text",
319
+ text:
320
+ `Found ${results.length} libraries for "${params.libraryName}" but none are finalized yet. ` +
321
+ "Try again later or use a different search term.",
322
+ },
323
+ ],
324
+ details: { results },
325
+ };
326
+ }
327
+
328
+ // Step 2 — Compute maxStars across the finalized results
329
+ const maxStars = Math.max(
330
+ ...finalized.map(
331
+ (lib) => getStars(lib as Record<string, unknown>),
332
+ ),
333
+ 0,
334
+ );
335
+
336
+ // Step 3 — Score, sort, and slice top 3
337
+ const scored = finalized
338
+ .map((lib) => ({
339
+ lib,
340
+ score: computeQualityScore(lib as Record<string, unknown>, maxStars),
341
+ }))
342
+ .sort((a, b) => b.score - a.score)
343
+ .slice(0, 3);
344
+
345
+ // Step 4 — Format output
346
+ const lines: string[] = [];
347
+
348
+ if (results.length > 3) {
349
+ lines.push(
350
+ `Found ${results.length} libraries for "${params.libraryName}" — showing top ${scored.length} by quality:`,
351
+ );
352
+ } else {
353
+ lines.push(
354
+ `Found ${results.length} ${results.length === 1 ? "library" : "libraries"} for "${params.libraryName}":`,
355
+ );
356
+ }
357
+ lines.push("");
297
358
 
298
- for (let i = 0; i < results.length; i++) {
299
- const lib = results[i] as Record<string, unknown>;
359
+ for (let i = 0; i < scored.length; i++) {
360
+ const lib = scored[i].lib as Record<string, unknown>;
300
361
  const idx = i + 1;
301
- const id = lib.id ?? "";
302
- const title = lib.title ?? lib.name ?? "Unknown";
303
- const description = lib.description ?? "";
362
+ const id = (lib.id ?? "") as string;
363
+ const title = (lib.title ?? lib.name ?? "Unknown") as string;
364
+ const description = (lib.description ?? "") as string;
304
365
  const versions = Array.isArray(lib.versions)
305
366
  ? (lib.versions as string[]).join(", ")
306
367
  : "";
307
368
  const trust = lib.trustScore ?? lib.trust_score ?? "?";
308
369
  const bench = lib.benchmarkScore ?? lib.benchmark_score ?? "?";
309
- const stars = lib.stars ?? lib.githubStars ?? lib.github_stars ?? "?";
370
+ const stars = ((lib.stars ?? lib.githubStars ?? lib.github_stars ?? 0) as number) | 0;
310
371
 
311
- lines.push("");
312
- lines.push(`${idx}. ${title} — ${id}`);
372
+ const marker = i === 0 ? "⭐ Recommended: " : `${idx}. `;
373
+ lines.push(`${marker}${title} — ${id}`);
313
374
  lines.push(` ${description}`);
314
375
  if (versions) lines.push(` Versions: ${versions}`);
315
- lines.push(` Trust: ${trust}/10 · Benchmark: ${bench}/100 · ⭐ ${stars}`);
376
+ lines.push(
377
+ ` Stars: ${stars.toLocaleString()} · Trust: ${trust}/10 · Benchmark: ${bench}/100`,
378
+ );
379
+ if (i === 0) {
380
+ lines.push(` → Use this ID with context7_get_context`);
381
+ }
382
+ lines.push("");
316
383
  }
317
384
 
318
- lines.push("");
319
- lines.push(
320
- "Use the library ID (e.g., " +
321
- (results[0] as Record<string, unknown>)?.id +
322
- ") with context7_get_context.",
323
- );
385
+ // Always suggest the top result's ID
386
+ const topId = (scored[0]?.lib as Record<string, unknown>)?.id as
387
+ | string
388
+ | undefined;
389
+ if (topId) {
390
+ lines.push(`Use ${topId} with context7_get_context.`);
391
+ }
324
392
  if (cacheNote) lines.push(cacheNote);
325
393
 
326
394
  return {
@@ -344,6 +412,7 @@ export default function (pi: ExtensionAPI) {
344
412
  pi.registerTool({
345
413
  name: "context7_get_context",
346
414
  label: "Context7 Get Context",
415
+ executionMode: "sequential",
347
416
  description:
348
417
  "Get up-to-date documentation context and code examples for a library from Context7. " +
349
418
  "Requires a libraryId from context7_search_library (format: /owner/repo or /owner/repo@version). " +
@@ -351,7 +420,6 @@ export default function (pi: ExtensionAPI) {
351
420
  promptSnippet: "Retrieve documentation and code examples for a Context7 library ID",
352
421
  promptGuidelines: [
353
422
  "Use context7_get_context for library documentation instead of relying on training data. Training data may be outdated.",
354
- "When context7_get_context returns insufficient results, retry with researchMode: true for a deeper search.",
355
423
  "Always run context7_search_library first to resolve library names to Context7 IDs before calling context7_get_context.",
356
424
  ],
357
425
  parameters: Type.Object({
@@ -372,14 +440,6 @@ export default function (pi: ExtensionAPI) {
372
440
  default: "json",
373
441
  }),
374
442
  ),
375
- researchMode: Type.Optional(
376
- Type.Boolean({
377
- description:
378
- "When true, use deeper agentic research (sandboxed agents, live web search). " +
379
- "Slower but higher quality. Use as retry if default results are insufficient.",
380
- default: false,
381
- }),
382
- ),
383
443
  }),
384
444
  async execute(_toolCallId, params, signal, _onUpdate, _ctx) {
385
445
  try {
@@ -405,7 +465,7 @@ export default function (pi: ExtensionAPI) {
405
465
  query: params.query,
406
466
  type: responseType,
407
467
  };
408
- if (params.researchMode) fetchParams.researchMode = true;
468
+
409
469
 
410
470
  // Try cache
411
471
  const cached = await cache.get(
@@ -435,10 +495,12 @@ export default function (pi: ExtensionAPI) {
435
495
  currentApiKey,
436
496
  signal,
437
497
  );
438
- // Store in cache (fire-and-forget)
439
- cache
498
+ // Store in cache (await to guarantee manifest is updated before returning)
499
+ await cache
440
500
  .set("context", { libraryId: params.libraryId }, fetchParams, data)
441
- .catch(() => {});
501
+ .catch((err) => {
502
+ console.error("[context7] cache write failed:", err);
503
+ });
442
504
  cacheNote = "\n[fetched from API]";
443
505
  }
444
506
 
@@ -520,21 +582,35 @@ export default function (pi: ExtensionAPI) {
520
582
  outputLines.push("");
521
583
 
522
584
  for (const snippet of infoSnippets) {
523
- const title = snippet.title ?? "Info";
524
- const snippetText =
525
- (snippet.content as string) ??
526
- (snippet.text as string) ??
527
- (snippet.description as string) ??
528
- "";
529
- outputLines.push(`**${title}** ${snippetText}`);
585
+ const breadcrumb = (snippet.breadcrumb as string) ?? "Documentation";
586
+ const snippetContent = (snippet.content as string) ?? "";
587
+ const pageId = snippet.pageId as string | undefined;
588
+
589
+ outputLines.push(`**${breadcrumb}**`);
590
+ if (snippetContent) outputLines.push(snippetContent);
591
+ if (pageId) outputLines.push(`Source: ${pageId}`);
530
592
  outputLines.push("");
531
593
  }
532
594
  }
533
595
 
534
- // Research mode note
535
- if (params.researchMode) {
536
- outputLines.push("[Research mode — deeper analysis]");
537
- outputLines.push("");
596
+ // Library rules (global, libraryOwn, libraryTeam)
597
+ const rules = data?.rules as
598
+ | Record<string, string[]>
599
+ | undefined;
600
+ if (rules) {
601
+ const allRules: string[] = [];
602
+ if (Array.isArray(rules.global)) allRules.push(...rules.global);
603
+ if (Array.isArray(rules.libraryOwn)) allRules.push(...rules.libraryOwn);
604
+ if (Array.isArray(rules.libraryTeam)) allRules.push(...rules.libraryTeam);
605
+
606
+ if (allRules.length > 0) {
607
+ outputLines.push("### Library Rules");
608
+ outputLines.push("");
609
+ for (const rule of allRules) {
610
+ outputLines.push(`- ${rule}`);
611
+ }
612
+ outputLines.push("");
613
+ }
538
614
  }
539
615
  }
540
616
 
@@ -0,0 +1,394 @@
1
+ /**
2
+ * Unit tests for library ranking logic (extensions/ranking.ts).
3
+ *
4
+ * Verifies:
5
+ * - Weight constants sum to exactly 1.0
6
+ * - Individual weight values are correct (0.6, 0.25, 0.15)
7
+ * - React (220k stars) outranks Preact (36k stars) with the new weights
8
+ * - Log normalization works correctly (no NaN, no division by zero)
9
+ * - Edge cases: stars=0, single result, all same stars, missing fields
10
+ *
11
+ * Run with: npm test
12
+ *
13
+ * @module extensions/ranking.test
14
+ */
15
+
16
+ import { test, describe } from "node:test";
17
+ import assert from "node:assert/strict";
18
+
19
+ import {
20
+ WEIGHT_STARS,
21
+ WEIGHT_TRUST,
22
+ WEIGHT_BENCHMARK,
23
+ computeQualityScore,
24
+ getStars,
25
+ getTrust,
26
+ getBenchmark,
27
+ } from "./ranking.ts";
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Helper: score a set of libraries and return them sorted by composite score
31
+ // (descending), mirroring the ranking logic in context7.ts.
32
+ // ---------------------------------------------------------------------------
33
+
34
+ function rankLibraries(libs: Record<string, unknown>[]): Array<{
35
+ lib: Record<string, unknown>;
36
+ score: number;
37
+ }> {
38
+ const finalized = libs.filter(
39
+ (lib) => lib.state === "finalized" || lib.state === undefined,
40
+ );
41
+
42
+ const maxStars = Math.max(...finalized.map((lib) => getStars(lib)), 0);
43
+
44
+ return finalized
45
+ .map((lib) => ({ lib, score: computeQualityScore(lib, maxStars) }))
46
+ .sort((a, b) => b.score - a.score);
47
+ }
48
+
49
+ // ===========================================================================
50
+ // Weight Constants
51
+ // ===========================================================================
52
+
53
+ describe("Weight constants", () => {
54
+ test("WEIGHT_STARS is 0.6", () => {
55
+ assert.equal(WEIGHT_STARS, 0.6);
56
+ });
57
+
58
+ test("WEIGHT_TRUST is 0.25", () => {
59
+ assert.equal(WEIGHT_TRUST, 0.25);
60
+ });
61
+
62
+ test("WEIGHT_BENCHMARK is 0.15", () => {
63
+ assert.equal(WEIGHT_BENCHMARK, 0.15);
64
+ });
65
+
66
+ test("weights sum to exactly 1.0", () => {
67
+ const sum = WEIGHT_STARS + WEIGHT_TRUST + WEIGHT_BENCHMARK;
68
+ assert.equal(sum, 1.0);
69
+ });
70
+
71
+ test("no weight is zero or negative", () => {
72
+ assert.ok(WEIGHT_STARS > 0, "WEIGHT_STARS must be positive");
73
+ assert.ok(WEIGHT_TRUST > 0, "WEIGHT_TRUST must be positive");
74
+ assert.ok(WEIGHT_BENCHMARK > 0, "WEIGHT_BENCHMARK must be positive");
75
+ });
76
+ });
77
+
78
+ // ===========================================================================
79
+ // React vs Preact — the motivating example from the spec
80
+ // ===========================================================================
81
+
82
+ describe("React outranks Preact (stars=220000 vs 36000)", () => {
83
+ const react = {
84
+ id: "/facebook/react",
85
+ title: "React",
86
+ stars: 220000,
87
+ trustScore: 10,
88
+ benchmarkScore: 95.5,
89
+ state: "finalized",
90
+ };
91
+ const preact = {
92
+ id: "/preactjs/preact",
93
+ title: "Preact",
94
+ stars: 36000,
95
+ trustScore: 9,
96
+ benchmarkScore: 88.0,
97
+ state: "finalized",
98
+ };
99
+
100
+ test("React composite score is greater than Preact", () => {
101
+ const ranked = rankLibraries([preact, react]); // preact first to verify sorting
102
+ assert.equal(ranked[0].lib.id, "/facebook/react");
103
+ assert.equal(ranked[1].lib.id, "/preactjs/preact");
104
+ assert.ok(
105
+ ranked[0].score > ranked[1].score,
106
+ `React (${ranked[0].score}) should outrank Preact (${ranked[1].score})`,
107
+ );
108
+ });
109
+
110
+ test("React composite score is close to 1.0 (max)", () => {
111
+ const maxStars = 220000;
112
+ const score = computeQualityScore(react, maxStars);
113
+ // React has the highest stars (starsNorm=1.0), max trust (1.0), high benchmark (0.955)
114
+ // Expected: 0.6*1 + 0.25*1 + 0.15*0.955 = 0.99325
115
+ assert.ok(
116
+ score > 0.99,
117
+ `React score should be ~0.993, got ${score}`,
118
+ );
119
+ });
120
+
121
+ test("Preact composite score is meaningfully below React", () => {
122
+ const maxStars = 220000;
123
+ const reactScore = computeQualityScore(react, maxStars);
124
+ const preactScore = computeQualityScore(preact, maxStars);
125
+ const gap = reactScore - preactScore;
126
+ // The gap should be at least 0.1 (significant margin)
127
+ assert.ok(
128
+ gap > 0.1,
129
+ `React-Preact gap should be > 0.1, got ${gap}`,
130
+ );
131
+ });
132
+
133
+ test("ranking works regardless of input order", () => {
134
+ const order1 = rankLibraries([react, preact]);
135
+ const order2 = rankLibraries([preact, react]);
136
+ assert.equal(order1[0].lib.id, order2[0].lib.id);
137
+ });
138
+ });
139
+
140
+ // ===========================================================================
141
+ // Log Normalization
142
+ // ===========================================================================
143
+
144
+ describe("Log normalization", () => {
145
+ test("library with max stars gets starsNorm = 1.0", () => {
146
+ const lib = { stars: 100000, trustScore: 0, benchmarkScore: 0 };
147
+ const score = computeQualityScore(lib, 100000);
148
+ // starsNorm = log(100001)/log(100001) = 1.0
149
+ // composite = 0.6 * 1.0 + 0.25 * 0 + 0.15 * 0 = 0.6
150
+ assert.equal(score, 0.6);
151
+ });
152
+
153
+ test("library with 0 stars gets starsNorm = 0 (no NaN)", () => {
154
+ const lib = { stars: 0, trustScore: 5, benchmarkScore: 50 };
155
+ const score = computeQualityScore(lib, 100000);
156
+ // starsNorm = log(1)/log(100001) = 0/11.51 = 0
157
+ // composite = 0.6*0 + 0.25*0.5 + 0.15*0.5 = 0.125 + 0.075 = 0.2
158
+ assert.ok(!Number.isNaN(score), "score must not be NaN");
159
+ assert.equal(score, 0.2);
160
+ });
161
+
162
+ test("log scale compresses extreme range (1k vs 220k stars)", () => {
163
+ const maxStars = 220000;
164
+ const smallLib = { stars: 1000, trustScore: 0, benchmarkScore: 0 };
165
+ const bigLib = { stars: 220000, trustScore: 0, benchmarkScore: 0 };
166
+ const smallScore = computeQualityScore(smallLib, maxStars);
167
+ const bigScore = computeQualityScore(bigLib, maxStars);
168
+
169
+ // Small library should still score meaningfully (not near zero)
170
+ // log(1001)/log(220001) ≈ 6.91/12.30 ≈ 0.562
171
+ // composite = 0.6 * 0.562 ≈ 0.337
172
+ assert.ok(
173
+ smallScore > 0.3,
174
+ `1k-star lib should score > 0.3 with log norm, got ${smallScore}`,
175
+ );
176
+ // Big library gets full stars weight
177
+ assert.equal(bigScore, 0.6);
178
+ // But not 220x higher (which linear would give)
179
+ assert.ok(
180
+ bigScore / smallScore < 3,
181
+ "log scale should compress the range significantly",
182
+ );
183
+ });
184
+
185
+ test("starsNorm increases monotonically with star count", () => {
186
+ const maxStars = 100000;
187
+ const starCounts = [0, 10, 100, 1000, 10000, 100000];
188
+ const scores = starCounts.map((s) =>
189
+ computeQualityScore({ stars: s, trustScore: 0, benchmarkScore: 0 }, maxStars),
190
+ );
191
+ for (let i = 1; i < scores.length; i++) {
192
+ assert.ok(
193
+ scores[i] > scores[i - 1],
194
+ `score should increase from ${starCounts[i - 1]} to ${starCounts[i]} stars`,
195
+ );
196
+ }
197
+ });
198
+ });
199
+
200
+ // ===========================================================================
201
+ // Edge Cases
202
+ // ===========================================================================
203
+
204
+ describe("Edge cases", () => {
205
+ test("stars=0 does not cause NaN or Infinity", () => {
206
+ const lib = { stars: 0, trustScore: 10, benchmarkScore: 100 };
207
+ const score = computeQualityScore(lib, 50000);
208
+ assert.ok(Number.isFinite(score), "score must be finite");
209
+ // starsNorm = log(1)/log(50001) = 0
210
+ // composite = 0.6*0 + 0.25*1 + 0.15*1 = 0.4
211
+ assert.equal(score, 0.4);
212
+ });
213
+
214
+ test("maxStars=0 (all libraries have 0 stars) does not crash", () => {
215
+ const lib = { stars: 0, trustScore: 10, benchmarkScore: 100 };
216
+ const score = computeQualityScore(lib, 0);
217
+ // maxStars=0 → starsNorm=0 (guard clause)
218
+ // composite = 0.6*0 + 0.25*1 + 0.15*1 = 0.4
219
+ assert.ok(Number.isFinite(score), "score must be finite");
220
+ assert.equal(score, 0.4);
221
+ });
222
+
223
+ test("single result is always ranked first (Recommended)", () => {
224
+ const lib = {
225
+ id: "/some/lib",
226
+ title: "SomeLib",
227
+ stars: 50,
228
+ trustScore: 3,
229
+ benchmarkScore: 40,
230
+ state: "finalized",
231
+ };
232
+ const ranked = rankLibraries([lib]);
233
+ assert.equal(ranked.length, 1);
234
+ assert.equal(ranked[0].lib.id, "/some/lib");
235
+ assert.ok(ranked[0].score > 0, "single result should have a positive score");
236
+ });
237
+
238
+ test("all same stars — ranking falls to trust and benchmark", () => {
239
+ // When all libraries have the same stars, starsNorm = 1.0 for all,
240
+ // so the composite is determined by trust and benchmark.
241
+ const libA = {
242
+ id: "/a",
243
+ title: "A",
244
+ stars: 5000,
245
+ trustScore: 10,
246
+ benchmarkScore: 90,
247
+ state: "finalized",
248
+ };
249
+ const libB = {
250
+ id: "/b",
251
+ title: "B",
252
+ stars: 5000,
253
+ trustScore: 5,
254
+ benchmarkScore: 50,
255
+ state: "finalized",
256
+ };
257
+ const libC = {
258
+ id: "/c",
259
+ title: "C",
260
+ stars: 5000,
261
+ trustScore: 7,
262
+ benchmarkScore: 70,
263
+ state: "finalized",
264
+ };
265
+
266
+ const ranked = rankLibraries([libB, libC, libA]);
267
+ assert.equal(ranked[0].lib.id, "/a"); // highest trust + benchmark
268
+ assert.equal(ranked[1].lib.id, "/c");
269
+ assert.equal(ranked[2].lib.id, "/b"); // lowest trust + benchmark
270
+ });
271
+
272
+ test("missing trustScore and benchmarkScore treated as 0", () => {
273
+ const lib = { stars: 10000, state: "finalized" }; // no trustScore, no benchmarkScore
274
+ const score = computeQualityScore(lib, 10000);
275
+ // starsNorm = 1.0, trustNorm = 0, benchmarkNorm = 0
276
+ // composite = 0.6 * 1.0 + 0.25 * 0 + 0.15 * 0 = 0.6
277
+ assert.ok(Number.isFinite(score));
278
+ assert.equal(score, 0.6);
279
+ });
280
+
281
+ test("missing stars field treated as 0", () => {
282
+ const lib = { trustScore: 10, benchmarkScore: 100, state: "finalized" };
283
+ const score = computeQualityScore(lib, 50000);
284
+ assert.ok(Number.isFinite(score));
285
+ // stars=0 → starsNorm=0, trustNorm=1, benchmarkNorm=1
286
+ // composite = 0 + 0.25 + 0.15 = 0.4
287
+ assert.equal(score, 0.4);
288
+ });
289
+
290
+ test("non-finalized libraries are filtered out during ranking", () => {
291
+ const finalized = {
292
+ id: "/finalized",
293
+ title: "Finalized",
294
+ stars: 100,
295
+ trustScore: 5,
296
+ benchmarkScore: 50,
297
+ state: "finalized",
298
+ };
299
+ const processing = {
300
+ id: "/processing",
301
+ title: "Processing",
302
+ stars: 999999,
303
+ trustScore: 10,
304
+ benchmarkScore: 100,
305
+ state: "processing",
306
+ };
307
+ const initial = {
308
+ id: "/initial",
309
+ title: "Initial",
310
+ stars: 888888,
311
+ trustScore: 10,
312
+ benchmarkScore: 100,
313
+ state: "initial",
314
+ };
315
+
316
+ const ranked = rankLibraries([processing, initial, finalized]);
317
+ assert.equal(ranked.length, 1, "only finalized libraries should remain");
318
+ assert.equal(ranked[0].lib.id, "/finalized");
319
+ });
320
+
321
+ test("state missing (undefined) is kept (backwards compatible)", () => {
322
+ const lib = {
323
+ id: "/no-state",
324
+ title: "NoState",
325
+ stars: 1000,
326
+ trustScore: 7,
327
+ benchmarkScore: 70,
328
+ // no state field
329
+ };
330
+ const ranked = rankLibraries([lib]);
331
+ assert.equal(ranked.length, 1);
332
+ });
333
+ });
334
+
335
+ // ===========================================================================
336
+ // Field Accessor Helpers
337
+ // ===========================================================================
338
+
339
+ describe("Field accessors handle alternate field names", () => {
340
+ test("getStars checks stars, githubStars, github_stars", () => {
341
+ assert.equal(getStars({ stars: 100 }), 100);
342
+ assert.equal(getStars({ githubStars: 200 }), 200);
343
+ assert.equal(getStars({ github_stars: 300 }), 300);
344
+ assert.equal(getStars({}), 0);
345
+ });
346
+
347
+ test("getTrust checks trustScore, trust_score", () => {
348
+ assert.equal(getTrust({ trustScore: 8 }), 8);
349
+ assert.equal(getTrust({ trust_score: 6 }), 6);
350
+ assert.equal(getTrust({}), 0);
351
+ });
352
+
353
+ test("getBenchmark checks benchmarkScore, benchmark_score", () => {
354
+ assert.equal(getBenchmark({ benchmarkScore: 75.5 }), 75);
355
+ assert.equal(getBenchmark({ benchmark_score: 42.7 }), 42);
356
+ assert.equal(getBenchmark({}), 0);
357
+ });
358
+ });
359
+
360
+ // ===========================================================================
361
+ // Score Range Validation
362
+ // ===========================================================================
363
+
364
+ describe("Score range validation", () => {
365
+ test("perfect library (max stars, max trust, max benchmark) scores 1.0", () => {
366
+ const lib = { stars: 100000, trustScore: 10, benchmarkScore: 100 };
367
+ const score = computeQualityScore(lib, 100000);
368
+ assert.equal(score, 1.0);
369
+ });
370
+
371
+ test("worst library (0 everything) scores 0.0", () => {
372
+ const lib = { stars: 0, trustScore: 0, benchmarkScore: 0 };
373
+ const score = computeQualityScore(lib, 100000);
374
+ assert.equal(score, 0.0);
375
+ });
376
+
377
+ test("all scores are in [0, 1] range", () => {
378
+ const maxStars = 50000;
379
+ const testCases = [
380
+ { stars: 0, trustScore: 0, benchmarkScore: 0 },
381
+ { stars: 1, trustScore: 1, benchmarkScore: 1 },
382
+ { stars: 100, trustScore: 5, benchmarkScore: 50 },
383
+ { stars: 1000, trustScore: 7, benchmarkScore: 70 },
384
+ { stars: 50000, trustScore: 10, benchmarkScore: 100 },
385
+ ];
386
+ for (const tc of testCases) {
387
+ const score = computeQualityScore(tc, maxStars);
388
+ assert.ok(
389
+ score >= 0 && score <= 1,
390
+ `score ${score} for ${JSON.stringify(tc)} is out of [0,1]`,
391
+ );
392
+ }
393
+ });
394
+ });
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Library ranking logic for Context7 search results.
3
+ *
4
+ * Computes a composite quality score for each library using three signals:
5
+ * - Stars (log-normalized) — strongest weight, reflects real-world adoption
6
+ * - Trust score (linear 0–10) — source reputation
7
+ * - Benchmark score (linear 0–100) — documentation quality
8
+ *
9
+ * Weights are exported as constants so they can be tested and kept in sync
10
+ * with SKILL.md documentation.
11
+ *
12
+ * @module extensions/ranking
13
+ */
14
+
15
+ /**
16
+ * Weight for the log-normalized stars signal.
17
+ * Stars are the dominant ranking signal — popular, established libraries
18
+ * should win by default unless they have notably poor documentation quality.
19
+ */
20
+ export const WEIGHT_STARS = 0.6;
21
+
22
+ /**
23
+ * Weight for the trust score signal (0–10).
24
+ * Most major libraries score 9–10, so the difference is minimal — trust
25
+ * acts as a tie-breaker when stars are similar.
26
+ */
27
+ export const WEIGHT_TRUST = 0.25;
28
+
29
+ /**
30
+ * Weight for the benchmark score signal (0–100).
31
+ * The least stable metric (changes with each documentation refresh) and
32
+ * the least correlated with what the user actually wants.
33
+ */
34
+ export const WEIGHT_BENCHMARK = 0.15;
35
+
36
+ /**
37
+ * Extract the stars value from a library record, checking multiple
38
+ * possible field names (camelCase, snake_case, githubStars).
39
+ */
40
+ export function getStars(lib: Record<string, unknown>): number {
41
+ return ((lib.stars ?? lib.githubStars ?? lib.github_stars ?? 0) as number) | 0;
42
+ }
43
+
44
+ /**
45
+ * Extract the trust score from a library record (0–10).
46
+ */
47
+ export function getTrust(lib: Record<string, unknown>): number {
48
+ return ((lib.trustScore ?? lib.trust_score ?? 0) as number) | 0;
49
+ }
50
+
51
+ /**
52
+ * Extract the benchmark score from a library record (0–100).
53
+ */
54
+ export function getBenchmark(lib: Record<string, unknown>): number {
55
+ return ((lib.benchmarkScore ?? lib.benchmark_score ?? 0) as number) | 0;
56
+ }
57
+
58
+ /**
59
+ * Compute the composite quality score for a library.
60
+ *
61
+ * Stars are log-normalized: `log(stars + 1) / log(maxStars + 1)`.
62
+ * This ensures a 1,000-star library scores meaningfully (~0.65) next to a
63
+ * 220k-star library, rather than near-zero with linear normalization.
64
+ *
65
+ * Trust and benchmark are linearly normalized to 0–1.
66
+ *
67
+ * @param lib - The library record from the API response.
68
+ * @param maxStars - The highest star count in the result set (for log normalization).
69
+ * @returns Composite score in the range [0, 1].
70
+ */
71
+ export function computeQualityScore(
72
+ lib: Record<string, unknown>,
73
+ maxStars: number,
74
+ ): number {
75
+ const stars = getStars(lib);
76
+ const trust = getTrust(lib);
77
+ const benchmark = getBenchmark(lib);
78
+
79
+ // Log-normalize stars: log(stars + 1) / log(maxStars + 1)
80
+ // When maxStars is 0, all star contributions are 0 (avoids division by zero / NaN).
81
+ const starsNorm =
82
+ maxStars > 0 ? Math.log(stars + 1) / Math.log(maxStars + 1) : 0;
83
+
84
+ // Linear normalize trust (0-10) and benchmark (0-100)
85
+ const trustNorm = trust / 10;
86
+ const benchmarkNorm = benchmark / 100;
87
+
88
+ return (
89
+ WEIGHT_STARS * starsNorm +
90
+ WEIGHT_TRUST * trustNorm +
91
+ WEIGHT_BENCHMARK * benchmarkNorm
92
+ );
93
+ }
package/package.json CHANGED
@@ -1,8 +1,12 @@
1
1
  {
2
2
  "name": "@mario-gc/pi-context7",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "Context7 integration for pi coding agent — fetch up-to-date library documentation and code examples",
5
5
  "license": "MIT",
6
+ "scripts": {
7
+ "test": "node --test extensions/*.test.ts",
8
+ "typecheck": "tsc --noEmit"
9
+ },
6
10
  "keywords": [
7
11
  "pi-package",
8
12
  "context7",
@@ -27,11 +27,17 @@ Call `context7_search_library` with:
27
27
 
28
28
  ### Step 2: Select the Best Match
29
29
 
30
- From the results, choose based on:
31
- - Exact or closest name match to what the user asked for
32
- - Higher benchmark scores (out of 100) indicate better documentation quality
33
- - Higher trust scores (out of 10) indicate more authoritative sources
34
- - If the user mentioned a version (e.g., "React 19"), prefer version-specific IDs from the `versions` list
30
+ Results are automatically ranked by a composite quality score:
31
+ - **Stars (60%)** log-normalized so smaller libraries aren't drowned out
32
+ - **Trust score (25%)** source reputation (0–10)
33
+ - **Benchmark score (15%)** documentation quality (0–100)
34
+
35
+ Only the top 3 results are shown, with the best match marked as ⭐ Recommended.
36
+ Non-finalized libraries (still processing) are filtered out automatically.
37
+
38
+ **You do not need to manually select a library** — use the Recommended ID
39
+ for `context7_get_context`. If the recommended library doesn't match what
40
+ you need, use one of the other shown results or refine your search.
35
41
 
36
42
  ### Step 3: Fetch Documentation
37
43
 
@@ -40,7 +46,6 @@ Call `context7_get_context` with:
40
46
  - `libraryId`: The selected Context7 library ID (e.g., `/vercel/next.js`)
41
47
  - `query`: The user's specific question — be descriptive
42
48
  - `type`: Use "json" for structured snippets (default), "txt" for plain text
43
- - `researchMode`: Only use this as a **retry** if the initial results are insufficient
44
49
 
45
50
  ### Step 4: Use the Documentation
46
51
 
@@ -48,7 +53,7 @@ Incorporate the fetched documentation into your response:
48
53
  - Answer the user's question using current, accurate information
49
54
  - Include relevant code examples from the docs
50
55
  - Cite the library version when relevant
51
- - Reference the source page/breadcrumb when helpful (from `pageTitle` or `breadcrumb`)
56
+ - Reference the source page/breadcrumb when helpful (from `breadcrumb` or `pageId`)
52
57
 
53
58
  ## Query Quality
54
59
 
@@ -70,8 +75,8 @@ When users mention specific versions:
70
75
  ## Retry Strategy
71
76
 
72
77
  If `context7_get_context` returns insufficient or irrelevant results:
73
- 1. Retry with `researchMode: true`this uses deeper agentic search
74
- 2. If still insufficient, consider refining the query with more specific terms
78
+ 1. Refine your query with more specific terms include the exact API name, pattern, or feature
79
+ 2. Try a different library ID from the search results if multiple were shown
75
80
  3. Do not silently fall back to training data without telling the user
76
81
 
77
82
  ## Guidelines