@skill-tools/router 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +61 -0
- package/dist/index.cjs +956 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +426 -0
- package/dist/index.d.ts +426 -0
- package/dist/index.js +950 -0
- package/dist/index.js.map +1 -0
- package/package.json +68 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,950 @@
|
|
|
1
|
+
import { resolveSkillFiles, parseSkill } from '@skill-tools/core';
|
|
2
|
+
|
|
3
|
+
// src/bm25/index.ts
|
|
4
|
+
var BM25Index = class {
|
|
5
|
+
k1;
|
|
6
|
+
b;
|
|
7
|
+
documents = [];
|
|
8
|
+
invertedIndex = /* @__PURE__ */ new Map();
|
|
9
|
+
idfCache = /* @__PURE__ */ new Map();
|
|
10
|
+
avgdl = 0;
|
|
11
|
+
totalDocLength = 0;
|
|
12
|
+
constructor(options) {
|
|
13
|
+
this.k1 = options?.k1 ?? 1.2;
|
|
14
|
+
this.b = options?.b ?? 0.75;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Add documents to the index.
|
|
18
|
+
* Batch operation — IDF is recomputed once after all documents are added.
|
|
19
|
+
*/
|
|
20
|
+
add(entries) {
|
|
21
|
+
if (entries.length === 0) return;
|
|
22
|
+
for (const entry of entries) {
|
|
23
|
+
const tokens = tokenize(entry.text);
|
|
24
|
+
const docIdx = this.documents.length;
|
|
25
|
+
this.documents.push({
|
|
26
|
+
id: entry.id,
|
|
27
|
+
length: tokens.length,
|
|
28
|
+
metadata: entry.metadata
|
|
29
|
+
});
|
|
30
|
+
this.totalDocLength += tokens.length;
|
|
31
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
32
|
+
for (const token of tokens) {
|
|
33
|
+
termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
|
|
34
|
+
}
|
|
35
|
+
for (const [term, tf] of termFreq) {
|
|
36
|
+
let postings = this.invertedIndex.get(term);
|
|
37
|
+
if (!postings) {
|
|
38
|
+
postings = [];
|
|
39
|
+
this.invertedIndex.set(term, postings);
|
|
40
|
+
}
|
|
41
|
+
postings.push({ docIdx, tf });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
this.avgdl = this.totalDocLength / this.documents.length;
|
|
45
|
+
this.recomputeIDF();
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Remove documents by ID.
|
|
49
|
+
* Rebuilds internal index mappings after removal.
|
|
50
|
+
*/
|
|
51
|
+
remove(ids) {
|
|
52
|
+
const idSet = new Set(ids);
|
|
53
|
+
const removedIndices = /* @__PURE__ */ new Set();
|
|
54
|
+
for (let i = 0; i < this.documents.length; i++) {
|
|
55
|
+
if (idSet.has(this.documents[i].id)) {
|
|
56
|
+
removedIndices.add(i);
|
|
57
|
+
this.totalDocLength -= this.documents[i].length;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (removedIndices.size === 0) return;
|
|
61
|
+
const newDocuments = [];
|
|
62
|
+
const indexMap = /* @__PURE__ */ new Map();
|
|
63
|
+
for (let i = 0; i < this.documents.length; i++) {
|
|
64
|
+
if (!removedIndices.has(i)) {
|
|
65
|
+
indexMap.set(i, newDocuments.length);
|
|
66
|
+
newDocuments.push(this.documents[i]);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
this.documents = newDocuments;
|
|
70
|
+
for (const [term, postings] of this.invertedIndex) {
|
|
71
|
+
const filtered = [];
|
|
72
|
+
for (const p of postings) {
|
|
73
|
+
if (!removedIndices.has(p.docIdx)) {
|
|
74
|
+
filtered.push({ docIdx: indexMap.get(p.docIdx), tf: p.tf });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
if (filtered.length === 0) {
|
|
78
|
+
this.invertedIndex.delete(term);
|
|
79
|
+
} else {
|
|
80
|
+
this.invertedIndex.set(term, filtered);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
this.avgdl = this.documents.length > 0 ? this.totalDocLength / this.documents.length : 0;
|
|
84
|
+
this.recomputeIDF();
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Search the index with a query string.
|
|
88
|
+
*
|
|
89
|
+
* Returns results sorted by BM25 score (highest first).
|
|
90
|
+
* Scores are normalized to [0, 1] — the top result gets 1.0.
|
|
91
|
+
*
|
|
92
|
+
* Only documents containing at least one query term are scored,
|
|
93
|
+
* making queries fast even on large indexes.
|
|
94
|
+
*/
|
|
95
|
+
search(query, topK, threshold = 0) {
|
|
96
|
+
if (this.documents.length === 0) return [];
|
|
97
|
+
const queryTokens = tokenize(query);
|
|
98
|
+
if (queryTokens.length === 0) return [];
|
|
99
|
+
const scores = new Float64Array(this.documents.length);
|
|
100
|
+
let hasScores = false;
|
|
101
|
+
for (const token of queryTokens) {
|
|
102
|
+
const idf = this.idfCache.get(token);
|
|
103
|
+
if (idf === void 0 || idf <= 0) continue;
|
|
104
|
+
const postings = this.invertedIndex.get(token);
|
|
105
|
+
if (!postings) continue;
|
|
106
|
+
for (const posting of postings) {
|
|
107
|
+
const docLen = this.documents[posting.docIdx].length;
|
|
108
|
+
const tf = posting.tf;
|
|
109
|
+
const numerator = tf * (this.k1 + 1);
|
|
110
|
+
const denominator = tf + this.k1 * (1 - this.b + this.b * (docLen / this.avgdl));
|
|
111
|
+
scores[posting.docIdx] = scores[posting.docIdx] + idf * (numerator / denominator);
|
|
112
|
+
hasScores = true;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (!hasScores) return [];
|
|
116
|
+
let maxScore = 0;
|
|
117
|
+
for (let i = 0; i < scores.length; i++) {
|
|
118
|
+
if (scores[i] > maxScore) maxScore = scores[i];
|
|
119
|
+
}
|
|
120
|
+
if (maxScore === 0) return [];
|
|
121
|
+
const results = [];
|
|
122
|
+
for (let i = 0; i < scores.length; i++) {
|
|
123
|
+
const raw = scores[i];
|
|
124
|
+
if (raw === 0) continue;
|
|
125
|
+
const normalized = raw / maxScore;
|
|
126
|
+
if (normalized >= threshold) {
|
|
127
|
+
results.push({
|
|
128
|
+
id: this.documents[i].id,
|
|
129
|
+
score: normalized,
|
|
130
|
+
metadata: this.documents[i].metadata
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
results.sort((a, b) => b.score - a.score);
|
|
135
|
+
return results.slice(0, topK);
|
|
136
|
+
}
|
|
137
|
+
/** Number of indexed documents */
|
|
138
|
+
size() {
|
|
139
|
+
return this.documents.length;
|
|
140
|
+
}
|
|
141
|
+
/** Serialize to a JSON-compatible snapshot */
|
|
142
|
+
serialize() {
|
|
143
|
+
return {
|
|
144
|
+
version: 2,
|
|
145
|
+
documents: this.documents.map((d) => ({
|
|
146
|
+
id: d.id,
|
|
147
|
+
length: d.length,
|
|
148
|
+
metadata: d.metadata
|
|
149
|
+
})),
|
|
150
|
+
invertedIndex: Array.from(this.invertedIndex.entries()),
|
|
151
|
+
idf: Array.from(this.idfCache.entries()),
|
|
152
|
+
avgdl: this.avgdl,
|
|
153
|
+
k1: this.k1,
|
|
154
|
+
b: this.b
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
/** Restore from a serialized snapshot */
|
|
158
|
+
deserialize(data) {
|
|
159
|
+
const obj = data;
|
|
160
|
+
if (!obj || typeof obj !== "object") {
|
|
161
|
+
throw new Error("Invalid BM25 snapshot: expected an object");
|
|
162
|
+
}
|
|
163
|
+
if (obj.version !== 2) {
|
|
164
|
+
throw new Error(`Unsupported BM25 snapshot version: ${obj.version}`);
|
|
165
|
+
}
|
|
166
|
+
this.documents = obj.documents.map((d) => ({
|
|
167
|
+
id: d.id,
|
|
168
|
+
length: d.length,
|
|
169
|
+
metadata: d.metadata
|
|
170
|
+
}));
|
|
171
|
+
this.invertedIndex = new Map(obj.invertedIndex.map(([k, v]) => [k, [...v]]));
|
|
172
|
+
this.idfCache = new Map(obj.idf);
|
|
173
|
+
this.avgdl = obj.avgdl;
|
|
174
|
+
this.totalDocLength = this.documents.reduce((sum, d) => sum + d.length, 0);
|
|
175
|
+
}
|
|
176
|
+
/** Recompute IDF values for all terms in the inverted index */
|
|
177
|
+
recomputeIDF() {
|
|
178
|
+
const N = this.documents.length;
|
|
179
|
+
this.idfCache.clear();
|
|
180
|
+
for (const [term, postings] of this.invertedIndex) {
|
|
181
|
+
const df = postings.length;
|
|
182
|
+
this.idfCache.set(term, Math.log((N - df + 0.5) / (df + 0.5) + 1));
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
};
|
|
186
|
+
function tokenize(text) {
|
|
187
|
+
return text.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/).filter((t) => t.length > 1 && !STOP_WORDS.has(t));
|
|
188
|
+
}
|
|
189
|
+
var STOP_WORDS = /* @__PURE__ */ new Set([
|
|
190
|
+
"a",
|
|
191
|
+
"an",
|
|
192
|
+
"the",
|
|
193
|
+
"is",
|
|
194
|
+
"are",
|
|
195
|
+
"was",
|
|
196
|
+
"were",
|
|
197
|
+
"be",
|
|
198
|
+
"been",
|
|
199
|
+
"being",
|
|
200
|
+
"have",
|
|
201
|
+
"has",
|
|
202
|
+
"had",
|
|
203
|
+
"do",
|
|
204
|
+
"does",
|
|
205
|
+
"did",
|
|
206
|
+
"will",
|
|
207
|
+
"would",
|
|
208
|
+
"could",
|
|
209
|
+
"should",
|
|
210
|
+
"may",
|
|
211
|
+
"might",
|
|
212
|
+
"shall",
|
|
213
|
+
"can",
|
|
214
|
+
"must",
|
|
215
|
+
"to",
|
|
216
|
+
"of",
|
|
217
|
+
"in",
|
|
218
|
+
"for",
|
|
219
|
+
"on",
|
|
220
|
+
"with",
|
|
221
|
+
"at",
|
|
222
|
+
"by",
|
|
223
|
+
"from",
|
|
224
|
+
"as",
|
|
225
|
+
"into",
|
|
226
|
+
"through",
|
|
227
|
+
"during",
|
|
228
|
+
"before",
|
|
229
|
+
"after",
|
|
230
|
+
"above",
|
|
231
|
+
"below",
|
|
232
|
+
"between",
|
|
233
|
+
"out",
|
|
234
|
+
"off",
|
|
235
|
+
"over",
|
|
236
|
+
"under",
|
|
237
|
+
"again",
|
|
238
|
+
"further",
|
|
239
|
+
"then",
|
|
240
|
+
"once",
|
|
241
|
+
"here",
|
|
242
|
+
"there",
|
|
243
|
+
"when",
|
|
244
|
+
"where",
|
|
245
|
+
"why",
|
|
246
|
+
"how",
|
|
247
|
+
"all",
|
|
248
|
+
"each",
|
|
249
|
+
"every",
|
|
250
|
+
"both",
|
|
251
|
+
"few",
|
|
252
|
+
"more",
|
|
253
|
+
"most",
|
|
254
|
+
"other",
|
|
255
|
+
"some",
|
|
256
|
+
"such",
|
|
257
|
+
"no",
|
|
258
|
+
"nor",
|
|
259
|
+
"not",
|
|
260
|
+
"only",
|
|
261
|
+
"own",
|
|
262
|
+
"same",
|
|
263
|
+
"so",
|
|
264
|
+
"than",
|
|
265
|
+
"too",
|
|
266
|
+
"very",
|
|
267
|
+
"and",
|
|
268
|
+
"but",
|
|
269
|
+
"or",
|
|
270
|
+
"if",
|
|
271
|
+
"it",
|
|
272
|
+
"its",
|
|
273
|
+
"this",
|
|
274
|
+
"that",
|
|
275
|
+
"these",
|
|
276
|
+
"those",
|
|
277
|
+
"he",
|
|
278
|
+
"she",
|
|
279
|
+
"they",
|
|
280
|
+
"we",
|
|
281
|
+
"you",
|
|
282
|
+
"i",
|
|
283
|
+
"me",
|
|
284
|
+
"my",
|
|
285
|
+
"your",
|
|
286
|
+
"his",
|
|
287
|
+
"her",
|
|
288
|
+
"their",
|
|
289
|
+
"our",
|
|
290
|
+
"what",
|
|
291
|
+
"which",
|
|
292
|
+
"who",
|
|
293
|
+
"whom"
|
|
294
|
+
]);
|
|
295
|
+
|
|
296
|
+
// src/context/extractor.ts
|
|
297
|
+
var MAX_CONTEXT_TOKENS = 80;
|
|
298
|
+
function extractContext(skill) {
|
|
299
|
+
const terms = [];
|
|
300
|
+
const nameParts = skill.name.split(/[-_]/).map((p) => p.toLowerCase()).filter((p) => p.length > 1);
|
|
301
|
+
terms.push(...nameParts);
|
|
302
|
+
if (skill.sections) {
|
|
303
|
+
for (const section of skill.sections) {
|
|
304
|
+
const headingWords = section.heading.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/).filter((w) => w.length > 1);
|
|
305
|
+
terms.push(...headingWords);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
if (skill.body) {
|
|
309
|
+
const codeRefs = extractCodeRefs(skill.body);
|
|
310
|
+
terms.push(...codeRefs);
|
|
311
|
+
}
|
|
312
|
+
if (skill.sections) {
|
|
313
|
+
for (const section of skill.sections) {
|
|
314
|
+
const contentWords = section.content.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/).filter((w) => w.length > 3 && !COMMON_WORDS.has(w));
|
|
315
|
+
terms.push(...contentWords);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
const descLower = skill.description.toLowerCase();
|
|
319
|
+
const descTokens = new Set(
|
|
320
|
+
descLower.replace(/[^a-z0-9\s-]/g, " ").split(/\s+/).filter((t) => t.length > 0)
|
|
321
|
+
);
|
|
322
|
+
const seen = /* @__PURE__ */ new Set();
|
|
323
|
+
const unique = [];
|
|
324
|
+
for (const term of terms) {
|
|
325
|
+
const lower = term.toLowerCase();
|
|
326
|
+
if (lower.length < 2) continue;
|
|
327
|
+
if (seen.has(lower)) continue;
|
|
328
|
+
if (descTokens.has(lower)) continue;
|
|
329
|
+
seen.add(lower);
|
|
330
|
+
unique.push(lower);
|
|
331
|
+
}
|
|
332
|
+
return truncateTokens(unique, MAX_CONTEXT_TOKENS).join(" ");
|
|
333
|
+
}
|
|
334
|
+
function extractCodeRefs(body) {
|
|
335
|
+
const refs = [];
|
|
336
|
+
const pattern = /`([^`\n]+)`/g;
|
|
337
|
+
for (const match of body.matchAll(pattern)) {
|
|
338
|
+
const code = match[1].trim();
|
|
339
|
+
const parts = code.split(/\s+/).map((p) => p.replace(/^-+/, "").toLowerCase()).filter((p) => p.length > 1);
|
|
340
|
+
refs.push(...parts);
|
|
341
|
+
}
|
|
342
|
+
return refs;
|
|
343
|
+
}
|
|
344
|
+
function truncateTokens(tokens, max) {
|
|
345
|
+
if (tokens.length <= max) return tokens;
|
|
346
|
+
return tokens.slice(0, max);
|
|
347
|
+
}
|
|
348
|
+
var COMMON_WORDS = /* @__PURE__ */ new Set([
|
|
349
|
+
"also",
|
|
350
|
+
"about",
|
|
351
|
+
"after",
|
|
352
|
+
"again",
|
|
353
|
+
"been",
|
|
354
|
+
"before",
|
|
355
|
+
"being",
|
|
356
|
+
"between",
|
|
357
|
+
"both",
|
|
358
|
+
"check",
|
|
359
|
+
"could",
|
|
360
|
+
"does",
|
|
361
|
+
"done",
|
|
362
|
+
"down",
|
|
363
|
+
"each",
|
|
364
|
+
"even",
|
|
365
|
+
"every",
|
|
366
|
+
"first",
|
|
367
|
+
"following",
|
|
368
|
+
"from",
|
|
369
|
+
"have",
|
|
370
|
+
"here",
|
|
371
|
+
"into",
|
|
372
|
+
"just",
|
|
373
|
+
"know",
|
|
374
|
+
"like",
|
|
375
|
+
"make",
|
|
376
|
+
"many",
|
|
377
|
+
"might",
|
|
378
|
+
"more",
|
|
379
|
+
"most",
|
|
380
|
+
"much",
|
|
381
|
+
"must",
|
|
382
|
+
"need",
|
|
383
|
+
"only",
|
|
384
|
+
"other",
|
|
385
|
+
"over",
|
|
386
|
+
"same",
|
|
387
|
+
"should",
|
|
388
|
+
"some",
|
|
389
|
+
"such",
|
|
390
|
+
"sure",
|
|
391
|
+
"take",
|
|
392
|
+
"than",
|
|
393
|
+
"that",
|
|
394
|
+
"them",
|
|
395
|
+
"then",
|
|
396
|
+
"there",
|
|
397
|
+
"these",
|
|
398
|
+
"they",
|
|
399
|
+
"this",
|
|
400
|
+
"those",
|
|
401
|
+
"through",
|
|
402
|
+
"under",
|
|
403
|
+
"very",
|
|
404
|
+
"want",
|
|
405
|
+
"well",
|
|
406
|
+
"were",
|
|
407
|
+
"what",
|
|
408
|
+
"when",
|
|
409
|
+
"where",
|
|
410
|
+
"which",
|
|
411
|
+
"while",
|
|
412
|
+
"will",
|
|
413
|
+
"with",
|
|
414
|
+
"would",
|
|
415
|
+
"your"
|
|
416
|
+
]);
|
|
417
|
+
|
|
418
|
+
// src/embeddings/local.ts
|
|
419
|
+
var LocalEmbeddingProvider = class {
|
|
420
|
+
name = "local-tfidf";
|
|
421
|
+
dimensions;
|
|
422
|
+
vocabulary = /* @__PURE__ */ new Map();
|
|
423
|
+
idfValues = /* @__PURE__ */ new Map();
|
|
424
|
+
isBuilt = false;
|
|
425
|
+
constructor(dimensions = 256) {
|
|
426
|
+
this.dimensions = dimensions;
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Build the vocabulary and IDF values from a corpus.
|
|
430
|
+
* Call this once after indexing all skill descriptions.
|
|
431
|
+
*/
|
|
432
|
+
buildVocabulary(texts) {
|
|
433
|
+
const docCount = texts.length;
|
|
434
|
+
const termDocFreq = /* @__PURE__ */ new Map();
|
|
435
|
+
for (const text of texts) {
|
|
436
|
+
const tokens = new Set(tokenize2(text));
|
|
437
|
+
for (const token of tokens) {
|
|
438
|
+
termDocFreq.set(token, (termDocFreq.get(token) ?? 0) + 1);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
for (const [term, df] of termDocFreq) {
|
|
442
|
+
this.idfValues.set(term, Math.log((docCount + 1) / (df + 1)) + 1);
|
|
443
|
+
}
|
|
444
|
+
const sorted = Array.from(this.idfValues.entries()).sort((a, b) => b[1] - a[1]);
|
|
445
|
+
let idx = 0;
|
|
446
|
+
for (const [term] of sorted) {
|
|
447
|
+
this.vocabulary.set(term, idx % this.dimensions);
|
|
448
|
+
idx++;
|
|
449
|
+
}
|
|
450
|
+
this.isBuilt = true;
|
|
451
|
+
}
|
|
452
|
+
async embed(texts) {
|
|
453
|
+
return texts.map((text) => this.embedSingle(text));
|
|
454
|
+
}
|
|
455
|
+
embedSingle(text) {
|
|
456
|
+
const vector = new Float64Array(this.dimensions);
|
|
457
|
+
const tokens = tokenize2(text);
|
|
458
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
459
|
+
for (const token of tokens) {
|
|
460
|
+
termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
|
|
461
|
+
}
|
|
462
|
+
for (const [term, tf] of termFreq) {
|
|
463
|
+
const idf = this.idfValues.get(term) ?? 1;
|
|
464
|
+
const tfidf = (1 + Math.log(tf)) * idf;
|
|
465
|
+
if (this.isBuilt) {
|
|
466
|
+
const idx = this.vocabulary.get(term);
|
|
467
|
+
if (idx !== void 0) {
|
|
468
|
+
vector[idx] = (vector[idx] ?? 0) + tfidf;
|
|
469
|
+
}
|
|
470
|
+
} else {
|
|
471
|
+
const idx = hashToIndex(term, this.dimensions);
|
|
472
|
+
vector[idx] = (vector[idx] ?? 0) + tfidf;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
return l2Normalize(Array.from(vector));
|
|
476
|
+
}
|
|
477
|
+
};
|
|
478
|
+
function tokenize2(text) {
|
|
479
|
+
return text.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/\s+/).filter((t) => t.length > 1 && !STOP_WORDS2.has(t));
|
|
480
|
+
}
|
|
481
|
+
function hashToIndex(str, size) {
|
|
482
|
+
let hash = 2166136261;
|
|
483
|
+
for (let i = 0; i < str.length; i++) {
|
|
484
|
+
hash ^= str.charCodeAt(i);
|
|
485
|
+
hash = Math.imul(hash, 16777619);
|
|
486
|
+
}
|
|
487
|
+
return Math.abs(hash) % size;
|
|
488
|
+
}
|
|
489
|
+
function l2Normalize(vector) {
|
|
490
|
+
let sum = 0;
|
|
491
|
+
for (const v of vector) {
|
|
492
|
+
sum += v * v;
|
|
493
|
+
}
|
|
494
|
+
const magnitude = Math.sqrt(sum);
|
|
495
|
+
if (magnitude === 0) return vector;
|
|
496
|
+
return vector.map((v) => v / magnitude);
|
|
497
|
+
}
|
|
498
|
+
var STOP_WORDS2 = /* @__PURE__ */ new Set([
|
|
499
|
+
"a",
|
|
500
|
+
"an",
|
|
501
|
+
"the",
|
|
502
|
+
"is",
|
|
503
|
+
"are",
|
|
504
|
+
"was",
|
|
505
|
+
"were",
|
|
506
|
+
"be",
|
|
507
|
+
"been",
|
|
508
|
+
"being",
|
|
509
|
+
"have",
|
|
510
|
+
"has",
|
|
511
|
+
"had",
|
|
512
|
+
"do",
|
|
513
|
+
"does",
|
|
514
|
+
"did",
|
|
515
|
+
"will",
|
|
516
|
+
"would",
|
|
517
|
+
"could",
|
|
518
|
+
"should",
|
|
519
|
+
"may",
|
|
520
|
+
"might",
|
|
521
|
+
"shall",
|
|
522
|
+
"can",
|
|
523
|
+
"must",
|
|
524
|
+
"to",
|
|
525
|
+
"of",
|
|
526
|
+
"in",
|
|
527
|
+
"for",
|
|
528
|
+
"on",
|
|
529
|
+
"with",
|
|
530
|
+
"at",
|
|
531
|
+
"by",
|
|
532
|
+
"from",
|
|
533
|
+
"as",
|
|
534
|
+
"into",
|
|
535
|
+
"through",
|
|
536
|
+
"during",
|
|
537
|
+
"before",
|
|
538
|
+
"after",
|
|
539
|
+
"above",
|
|
540
|
+
"below",
|
|
541
|
+
"between",
|
|
542
|
+
"out",
|
|
543
|
+
"off",
|
|
544
|
+
"over",
|
|
545
|
+
"under",
|
|
546
|
+
"again",
|
|
547
|
+
"further",
|
|
548
|
+
"then",
|
|
549
|
+
"once",
|
|
550
|
+
"here",
|
|
551
|
+
"there",
|
|
552
|
+
"when",
|
|
553
|
+
"where",
|
|
554
|
+
"why",
|
|
555
|
+
"how",
|
|
556
|
+
"all",
|
|
557
|
+
"each",
|
|
558
|
+
"every",
|
|
559
|
+
"both",
|
|
560
|
+
"few",
|
|
561
|
+
"more",
|
|
562
|
+
"most",
|
|
563
|
+
"other",
|
|
564
|
+
"some",
|
|
565
|
+
"such",
|
|
566
|
+
"no",
|
|
567
|
+
"nor",
|
|
568
|
+
"not",
|
|
569
|
+
"only",
|
|
570
|
+
"own",
|
|
571
|
+
"same",
|
|
572
|
+
"so",
|
|
573
|
+
"than",
|
|
574
|
+
"too",
|
|
575
|
+
"very",
|
|
576
|
+
"and",
|
|
577
|
+
"but",
|
|
578
|
+
"or",
|
|
579
|
+
"if",
|
|
580
|
+
"it",
|
|
581
|
+
"its",
|
|
582
|
+
"this",
|
|
583
|
+
"that",
|
|
584
|
+
"these",
|
|
585
|
+
"those",
|
|
586
|
+
"he",
|
|
587
|
+
"she",
|
|
588
|
+
"they",
|
|
589
|
+
"we",
|
|
590
|
+
"you",
|
|
591
|
+
"i",
|
|
592
|
+
"me",
|
|
593
|
+
"my",
|
|
594
|
+
"your",
|
|
595
|
+
"his",
|
|
596
|
+
"her",
|
|
597
|
+
"their",
|
|
598
|
+
"our",
|
|
599
|
+
"what",
|
|
600
|
+
"which",
|
|
601
|
+
"who",
|
|
602
|
+
"whom"
|
|
603
|
+
]);
|
|
604
|
+
|
|
605
|
+
// src/stores/memory.ts
|
|
606
|
+
var MemoryVectorStore = class {
|
|
607
|
+
entries = [];
|
|
608
|
+
async add(entries) {
|
|
609
|
+
this.entries.push(...entries);
|
|
610
|
+
}
|
|
611
|
+
async search(queryVector, topK, threshold = 0) {
|
|
612
|
+
const scored = this.entries.map((entry) => ({
|
|
613
|
+
id: entry.id,
|
|
614
|
+
score: cosineSimilarity(queryVector, entry.vector),
|
|
615
|
+
metadata: entry.metadata
|
|
616
|
+
}));
|
|
617
|
+
return scored.filter((r) => r.score >= threshold).sort((a, b) => b.score - a.score).slice(0, topK);
|
|
618
|
+
}
|
|
619
|
+
async remove(ids) {
|
|
620
|
+
const idSet = new Set(ids);
|
|
621
|
+
this.entries = this.entries.filter((e) => !idSet.has(e.id));
|
|
622
|
+
}
|
|
623
|
+
size() {
|
|
624
|
+
return this.entries.length;
|
|
625
|
+
}
|
|
626
|
+
serialize() {
|
|
627
|
+
return {
|
|
628
|
+
version: 1,
|
|
629
|
+
entries: this.entries.map((e) => ({
|
|
630
|
+
id: e.id,
|
|
631
|
+
vector: e.vector,
|
|
632
|
+
metadata: e.metadata
|
|
633
|
+
}))
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
deserialize(data) {
|
|
637
|
+
if (!data || typeof data !== "object") {
|
|
638
|
+
throw new Error("Invalid snapshot: expected an object");
|
|
639
|
+
}
|
|
640
|
+
const obj = data;
|
|
641
|
+
if (obj.version !== 1) {
|
|
642
|
+
throw new Error(`Unsupported vector store version: ${obj.version}`);
|
|
643
|
+
}
|
|
644
|
+
if (!Array.isArray(obj.entries)) {
|
|
645
|
+
throw new Error("Invalid snapshot: entries must be an array");
|
|
646
|
+
}
|
|
647
|
+
for (const entry of obj.entries) {
|
|
648
|
+
if (!entry || typeof entry !== "object") {
|
|
649
|
+
throw new Error("Invalid snapshot: each entry must be an object");
|
|
650
|
+
}
|
|
651
|
+
const e = entry;
|
|
652
|
+
if (typeof e.id !== "string") {
|
|
653
|
+
throw new Error("Invalid snapshot: entry id must be a string");
|
|
654
|
+
}
|
|
655
|
+
if (!Array.isArray(e.vector) || !e.vector.every((v) => typeof v === "number")) {
|
|
656
|
+
throw new Error("Invalid snapshot: entry vector must be an array of numbers");
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
this.entries = obj.entries;
|
|
660
|
+
}
|
|
661
|
+
};
|
|
662
|
+
function cosineSimilarity(a, b) {
|
|
663
|
+
if (a.length !== b.length) {
|
|
664
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
665
|
+
}
|
|
666
|
+
let dotProduct = 0;
|
|
667
|
+
let normA = 0;
|
|
668
|
+
let normB = 0;
|
|
669
|
+
for (let i = 0; i < a.length; i++) {
|
|
670
|
+
dotProduct += a[i] * b[i];
|
|
671
|
+
normA += a[i] * a[i];
|
|
672
|
+
normB += b[i] * b[i];
|
|
673
|
+
}
|
|
674
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
675
|
+
if (denominator === 0) return 0;
|
|
676
|
+
return dotProduct / denominator;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// src/router.ts
|
|
680
|
+
var SkillRouter = class _SkillRouter {
|
|
681
|
+
/** BM25 index — used when no external embedding provider is configured */
|
|
682
|
+
bm25;
|
|
683
|
+
/** Embedding provider — used with custom/openai/ollama providers */
|
|
684
|
+
embedding;
|
|
685
|
+
/** Vector store — used alongside embedding provider */
|
|
686
|
+
store;
|
|
687
|
+
/** Whether the router uses the BM25 engine (true) or embedding+store (false) */
|
|
688
|
+
usesBM25;
|
|
689
|
+
/** Whether contextual retrieval is enabled */
|
|
690
|
+
contextEnabled;
|
|
691
|
+
skillNames = /* @__PURE__ */ new Set();
|
|
692
|
+
constructor(options) {
|
|
693
|
+
const embeddingConfig = options?.embedding ?? "local";
|
|
694
|
+
this.contextEnabled = options?.context !== false;
|
|
695
|
+
if (embeddingConfig === "local") {
|
|
696
|
+
this.bm25 = new BM25Index(options?.bm25);
|
|
697
|
+
this.embedding = null;
|
|
698
|
+
this.store = null;
|
|
699
|
+
this.usesBM25 = true;
|
|
700
|
+
} else {
|
|
701
|
+
this.embedding = createEmbeddingProvider(embeddingConfig);
|
|
702
|
+
this.store = new MemoryVectorStore();
|
|
703
|
+
this.bm25 = null;
|
|
704
|
+
this.usesBM25 = false;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Index a list of skill entries.
|
|
709
|
+
* With BM25 (default): indexes description text directly.
|
|
710
|
+
* With embeddings: embeds descriptions and stores vectors.
|
|
711
|
+
*/
|
|
712
|
+
async indexSkills(skills) {
|
|
713
|
+
if (skills.length === 0) return;
|
|
714
|
+
if (this.usesBM25 && this.bm25) {
|
|
715
|
+
this.bm25.add(
|
|
716
|
+
skills.map((s) => ({
|
|
717
|
+
id: s.name,
|
|
718
|
+
text: this.enrichText(s),
|
|
719
|
+
metadata: {
|
|
720
|
+
description: s.description,
|
|
721
|
+
path: s.path,
|
|
722
|
+
...s.metadata
|
|
723
|
+
}
|
|
724
|
+
}))
|
|
725
|
+
);
|
|
726
|
+
} else if (this.embedding && this.store) {
|
|
727
|
+
const descriptions = skills.map((s) => this.enrichText(s));
|
|
728
|
+
if (this.embedding instanceof LocalEmbeddingProvider) {
|
|
729
|
+
this.embedding.buildVocabulary(descriptions);
|
|
730
|
+
}
|
|
731
|
+
const vectors = await this.embedding.embed(descriptions);
|
|
732
|
+
const entries = skills.map((skill, i) => ({
|
|
733
|
+
id: skill.name,
|
|
734
|
+
vector: vectors[i],
|
|
735
|
+
metadata: {
|
|
736
|
+
description: skill.description,
|
|
737
|
+
path: skill.path,
|
|
738
|
+
...skill.metadata
|
|
739
|
+
}
|
|
740
|
+
}));
|
|
741
|
+
await this.store.add(entries);
|
|
742
|
+
}
|
|
743
|
+
for (const skill of skills) {
|
|
744
|
+
this.skillNames.add(skill.name);
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Index all SKILL.md files in a directory.
|
|
749
|
+
* Parses each file and indexes its description.
|
|
750
|
+
*/
|
|
751
|
+
async indexDirectory(dirPath) {
|
|
752
|
+
const locations = await resolveSkillFiles(dirPath);
|
|
753
|
+
const skills = [];
|
|
754
|
+
for (const location of locations) {
|
|
755
|
+
const result = await parseSkill(location.skillFile);
|
|
756
|
+
if (result.ok && result.skill.metadata.description) {
|
|
757
|
+
skills.push({
|
|
758
|
+
name: result.skill.metadata.name ?? location.dirName,
|
|
759
|
+
description: result.skill.metadata.description,
|
|
760
|
+
path: location.skillFile,
|
|
761
|
+
body: result.skill.body,
|
|
762
|
+
sections: result.skill.sections
|
|
763
|
+
});
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
await this.indexSkills(skills);
|
|
767
|
+
return skills.length;
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Select the most relevant skills for a query.
|
|
771
|
+
*/
|
|
772
|
+
async select(query, options) {
|
|
773
|
+
const topK = options?.topK ?? 5;
|
|
774
|
+
const threshold = options?.threshold ?? 0;
|
|
775
|
+
const boost = new Set(options?.boost ?? []);
|
|
776
|
+
const exclude = options?.exclude ?? [];
|
|
777
|
+
let results;
|
|
778
|
+
if (this.usesBM25 && this.bm25) {
|
|
779
|
+
results = this.bm25.search(query, topK * 2, threshold);
|
|
780
|
+
} else if (this.embedding && this.store) {
|
|
781
|
+
const [queryVector] = await this.embedding.embed([query]);
|
|
782
|
+
if (!queryVector) {
|
|
783
|
+
throw new Error("Embedding provider returned empty result for query");
|
|
784
|
+
}
|
|
785
|
+
results = await this.store.search(queryVector, topK * 2, threshold);
|
|
786
|
+
} else {
|
|
787
|
+
return [];
|
|
788
|
+
}
|
|
789
|
+
if (exclude.length > 0) {
|
|
790
|
+
results = results.filter((r) => {
|
|
791
|
+
return !exclude.some((pattern) => {
|
|
792
|
+
if (pattern.endsWith("*")) {
|
|
793
|
+
return r.id.startsWith(pattern.slice(0, -1));
|
|
794
|
+
}
|
|
795
|
+
return r.id === pattern;
|
|
796
|
+
});
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
if (boost.size > 0) {
|
|
800
|
+
results = results.map((r) => ({
|
|
801
|
+
...r,
|
|
802
|
+
score: boost.has(r.id) ? r.score * 1.2 : r.score
|
|
803
|
+
}));
|
|
804
|
+
results.sort((a, b) => b.score - a.score);
|
|
805
|
+
}
|
|
806
|
+
return results.slice(0, topK).map((r) => ({
|
|
807
|
+
skill: r.id,
|
|
808
|
+
score: r.score,
|
|
809
|
+
metadata: r.metadata
|
|
810
|
+
}));
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Detect skills with overlapping descriptions.
|
|
814
|
+
*/
|
|
815
|
+
async detectConflicts(threshold = 0.85) {
|
|
816
|
+
const conflicts = [];
|
|
817
|
+
const names = Array.from(this.skillNames);
|
|
818
|
+
if (this.usesBM25 && this.bm25) {
|
|
819
|
+
for (const name of names) {
|
|
820
|
+
const results = this.bm25.search(name, names.length, threshold);
|
|
821
|
+
const similar = results.filter((r) => r.id !== name && r.score >= threshold).map((r) => r.id);
|
|
822
|
+
if (similar.length > 0) {
|
|
823
|
+
const existing = conflicts.find(
|
|
824
|
+
(c) => c.skills.includes(name) || similar.some((s) => c.skills.includes(s))
|
|
825
|
+
);
|
|
826
|
+
if (!existing) {
|
|
827
|
+
conflicts.push({
|
|
828
|
+
skills: [name, ...similar],
|
|
829
|
+
similarity: results.find((r) => r.id !== name)?.score ?? threshold,
|
|
830
|
+
suggestion: "These skills have highly similar descriptions. Consider differentiating their trigger contexts."
|
|
831
|
+
});
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
} else if (this.embedding && this.store) {
|
|
836
|
+
for (const name of names) {
|
|
837
|
+
const results = await this.store.search(
|
|
838
|
+
(await this.embedding.embed([name]))[0],
|
|
839
|
+
names.length,
|
|
840
|
+
threshold
|
|
841
|
+
);
|
|
842
|
+
const similar = results.filter((r) => r.id !== name && r.score >= threshold).map((r) => r.id);
|
|
843
|
+
if (similar.length > 0) {
|
|
844
|
+
const existing = conflicts.find(
|
|
845
|
+
(c) => c.skills.includes(name) || similar.some((s) => c.skills.includes(s))
|
|
846
|
+
);
|
|
847
|
+
if (!existing) {
|
|
848
|
+
conflicts.push({
|
|
849
|
+
skills: [name, ...similar],
|
|
850
|
+
similarity: results.find((r) => r.id !== name)?.score ?? threshold,
|
|
851
|
+
suggestion: "These skills have highly similar descriptions. Consider differentiating their trigger contexts."
|
|
852
|
+
});
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
return conflicts;
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Build the text to index for a skill entry.
|
|
861
|
+
* When contextual retrieval is enabled and the skill has body/sections,
|
|
862
|
+
* prepends extracted context to the description.
|
|
863
|
+
*/
|
|
864
|
+
enrichText(skill) {
|
|
865
|
+
if (this.contextEnabled && (skill.body || skill.sections)) {
|
|
866
|
+
const ctx = extractContext(skill);
|
|
867
|
+
if (ctx) return `${ctx} ${skill.description}`;
|
|
868
|
+
}
|
|
869
|
+
return skill.description;
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Get the number of indexed skills.
|
|
873
|
+
*/
|
|
874
|
+
get count() {
|
|
875
|
+
if (this.usesBM25 && this.bm25) {
|
|
876
|
+
return this.bm25.size();
|
|
877
|
+
}
|
|
878
|
+
return this.store?.size() ?? 0;
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* Save the index to a JSON-serializable object.
|
|
882
|
+
*/
|
|
883
|
+
save() {
|
|
884
|
+
if (this.usesBM25 && this.bm25) {
|
|
885
|
+
return {
|
|
886
|
+
version: 1,
|
|
887
|
+
embeddingProvider: "bm25",
|
|
888
|
+
dimensions: 0,
|
|
889
|
+
store: this.bm25.serialize(),
|
|
890
|
+
skillNames: Array.from(this.skillNames)
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
return {
|
|
894
|
+
version: 1,
|
|
895
|
+
embeddingProvider: this.embedding.name,
|
|
896
|
+
dimensions: this.embedding.dimensions,
|
|
897
|
+
store: this.store.serialize(),
|
|
898
|
+
skillNames: Array.from(this.skillNames)
|
|
899
|
+
};
|
|
900
|
+
}
|
|
901
|
+
/**
|
|
902
|
+
* Load a previously saved index.
|
|
903
|
+
* Validates that the snapshot format matches the current engine.
|
|
904
|
+
*/
|
|
905
|
+
load(snapshot) {
|
|
906
|
+
if (this.usesBM25 && this.bm25) {
|
|
907
|
+
if (snapshot.embeddingProvider !== "bm25") {
|
|
908
|
+
throw new Error(
|
|
909
|
+
`Cannot load snapshot from provider "${snapshot.embeddingProvider}" into BM25 router. Create the router with a matching embedding config.`
|
|
910
|
+
);
|
|
911
|
+
}
|
|
912
|
+
this.bm25.deserialize(snapshot.store);
|
|
913
|
+
} else if (this.embedding && this.store) {
|
|
914
|
+
if (snapshot.dimensions !== this.embedding.dimensions) {
|
|
915
|
+
throw new Error(
|
|
916
|
+
`Snapshot dimensions (${snapshot.dimensions}) don't match current provider dimensions (${this.embedding.dimensions})`
|
|
917
|
+
);
|
|
918
|
+
}
|
|
919
|
+
this.store.deserialize(snapshot.store);
|
|
920
|
+
}
|
|
921
|
+
this.skillNames = new Set(snapshot.skillNames);
|
|
922
|
+
}
|
|
923
|
+
/**
|
|
924
|
+
* Create a SkillRouter from a saved snapshot.
|
|
925
|
+
*/
|
|
926
|
+
static fromSnapshot(snapshot, options) {
|
|
927
|
+
const router = new _SkillRouter(options);
|
|
928
|
+
router.load(snapshot);
|
|
929
|
+
return router;
|
|
930
|
+
}
|
|
931
|
+
};
|
|
932
|
+
function createEmbeddingProvider(config) {
|
|
933
|
+
if (config === "local") {
|
|
934
|
+
return new LocalEmbeddingProvider();
|
|
935
|
+
}
|
|
936
|
+
if (config.provider === "custom") {
|
|
937
|
+
return {
|
|
938
|
+
name: "custom",
|
|
939
|
+
dimensions: config.dimensions,
|
|
940
|
+
embed: config.embed
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
throw new Error(
|
|
944
|
+
`Embedding provider "${config.provider}" requires additional setup. Install the appropriate SDK and configure an API key. See: https://github.com/skill-tools/skill-tools#embedding-providers`
|
|
945
|
+
);
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
export { BM25Index, LocalEmbeddingProvider, MemoryVectorStore, SkillRouter, extractContext };
|
|
949
|
+
//# sourceMappingURL=index.js.map
|
|
950
|
+
//# sourceMappingURL=index.js.map
|