skills-atlas-cli 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/search-core.js +81 -2
package/package.json
CHANGED
package/src/search-core.js
CHANGED
|
@@ -215,6 +215,43 @@ const idfOf = (info, seg) => Math.log(1 + info.n / ((info.df.get(seg) || 0) + 1)
|
|
|
215
215
|
|
|
216
216
|
const FIRE_IDF = 4.2; // a single distinctive name word must clear this to fire alone
|
|
217
217
|
|
|
218
|
+
// --- Content anchors (match by FUNCTION, not just name) ----------------------
|
|
219
|
+
// ~a third of catalog skills have opaque names (sentry/grill-me/get-shit-done) that
|
|
220
|
+
// don't contain their function, and Chinese prompts never match an English name at
|
|
221
|
+
// all. So besides the skill NAME, anchor on the curated function text — use_case /
|
|
222
|
+
// group / when, in BOTH languages — with the same distinctiveness gate as names.
|
|
223
|
+
// A content match must include at least one DISTINCTIVE function word to fire, so
|
|
224
|
+
// generic prose overlap stays silent.
|
|
225
|
+
const CONTENT_FIRE_IDF = 4.6; // weight bar when only one distinctive word matched (+ a 2nd word)
|
|
226
|
+
const CONTENT_DISTINCT_IDF = 3.5; // a word this distinctive (~≤10 rows) counts toward "strong"
|
|
227
|
+
|
|
228
|
+
// Generic Chinese words that must not fire on their own — the CJK analog of
|
|
229
|
+
// ANCHOR_STOP (casual verbs + generic nouns that carry no domain intent).
|
|
230
|
+
const CJK_ANCHOR_STOP = new Set([
|
|
231
|
+
'看看', '看下', '看一', '帮忙', '处理', '解决', '完成', '搞定', '试试', '弄一', '做个', '做一',
|
|
232
|
+
'写个', '写一', '加个', '改改', '改一', '删掉', '运行', '创建', '生成', '修改', '优化', '检查',
|
|
233
|
+
'一下', '一个', '这个', '那个', '东西', '问题', '代码', '文件', '内容', '功能', '项目', '任务',
|
|
234
|
+
'系统', '方法', '工具', '数据', '需要', '想要', '怎么', '如何', '可以', '应该', '一些', '这些',
|
|
235
|
+
]);
|
|
236
|
+
|
|
237
|
+
// Tokens of a row's curated short function text (NOT the long description — keep it
|
|
238
|
+
// distinctive), both languages, for the corpus DF and for matching a query.
|
|
239
|
+
const rowContent = r =>
|
|
240
|
+
tokenize(lc([r.use_case, r.use_case_en, r.group, r.group_en, r.when_to_use, r.when_to_use_en].filter(Boolean).join(' ')));
|
|
241
|
+
const contentHas = (set, t) => set.has(t) || (t.length > 3 && t.endsWith('s') && set.has(t.slice(0, -1)));
|
|
242
|
+
|
|
243
|
+
const _contentDfCache = new WeakMap();
|
|
244
|
+
function contentDf(rows) {
|
|
245
|
+
let info = _contentDfCache.get(rows);
|
|
246
|
+
if (info) return info;
|
|
247
|
+
const df = new Map();
|
|
248
|
+
let n = 0;
|
|
249
|
+
for (const r of rows) { n++; for (const t of new Set(rowContent(r))) df.set(t, (df.get(t) || 0) + 1); }
|
|
250
|
+
info = { df, n: n || 1 };
|
|
251
|
+
_contentDfCache.set(rows, info);
|
|
252
|
+
return info;
|
|
253
|
+
}
|
|
254
|
+
|
|
218
255
|
// Autopilot recall: collect a SHORTLIST of catalog skills that may fit a free-text
|
|
219
256
|
// prompt, for Claude to judge (we do recall; Claude does precision). Returns
|
|
220
257
|
// { fire, candidates: [{skill, row}], weak }. Sources, in order:
|
|
@@ -252,10 +289,49 @@ function suggestCandidates(rows, prompt, { installed = new Set(), suggested = ne
|
|
|
252
289
|
}
|
|
253
290
|
anchors.sort((a, b) => b.weight - a.weight || maxStars(b.row) - maxStars(a.row));
|
|
254
291
|
|
|
292
|
+
// 1b. content anchors — match the curated FUNCTION text (use_case / group / when),
|
|
293
|
+
// so opaque-named skills are findable by what they do and Chinese prompts match at
|
|
294
|
+
// all. Generic words (ANCHOR_STOP / CJK_ANCHOR_STOP) are excluded up front.
|
|
295
|
+
const contentTokens = tokens.filter(t => !ANCHOR_STOP.has(t) && !CJK_ANCHOR_STOP.has(t));
|
|
296
|
+
const contentAnchors = [];
|
|
297
|
+
if (contentTokens.length) {
|
|
298
|
+
const cdf = contentDf(rows);
|
|
299
|
+
for (const r of rows) {
|
|
300
|
+
const content = new Set(rowContent(r));
|
|
301
|
+
let weight = 0, strong = 0, matched = 0;
|
|
302
|
+
for (const t of new Set(contentTokens)) {
|
|
303
|
+
if (!contentHas(content, t)) continue;
|
|
304
|
+
matched++;
|
|
305
|
+
const idf = idfOf(cdf, t);
|
|
306
|
+
weight += idf;
|
|
307
|
+
if (idf >= CONTENT_DISTINCT_IDF) strong++; // only distinctive words count as "strong"
|
|
308
|
+
}
|
|
309
|
+
if (strong) contentAnchors.push({ row: r, weight, strong, matched });
|
|
310
|
+
}
|
|
311
|
+
// Prefer rows matching MORE distinctive function words over an incidental hit.
|
|
312
|
+
contentAnchors.sort((a, b) => b.strong - a.strong || b.weight - a.weight || maxStars(b.row) - maxStars(a.row));
|
|
313
|
+
}
|
|
314
|
+
// Fire/qualify only with a distinctive function match: two distinctive words, or
|
|
315
|
+
// one distinctive word backed by a second matched word and enough total weight.
|
|
316
|
+
// (A single distinctive word alone never fires — too easy to hit by coincidence.)
|
|
317
|
+
const contentQualifies = a => a.strong >= 2 || (a.strong >= 1 && a.matched >= 2 && a.weight >= CONTENT_FIRE_IDF);
|
|
318
|
+
|
|
319
|
+
// Merge name + qualifying content anchors into ONE shortlist ranked by strength,
|
|
320
|
+
// so a strong function match (grill-me: interrogate + stress-test) outranks a
|
|
321
|
+
// single-word name match (launch) when it's the better fit. runSearch backfills.
|
|
322
|
+
const ranked0 = [];
|
|
323
|
+
for (const a of anchors) ranked0.push({ skill: a.skill, row: a.row, strong: a.strong, weight: a.weight });
|
|
324
|
+
for (const a of contentAnchors) {
|
|
325
|
+
if (!contentQualifies(a)) continue;
|
|
326
|
+
const s = (a.row.skills || []).find(x => !taken.has(x));
|
|
327
|
+
if (s) ranked0.push({ skill: s, row: a.row, strong: a.strong, weight: a.weight });
|
|
328
|
+
}
|
|
329
|
+
ranked0.sort((a, b) => b.strong - a.strong || b.weight - a.weight || maxStars(b.row) - maxStars(a.row));
|
|
330
|
+
|
|
255
331
|
const out = [];
|
|
256
332
|
const seen = new Set(taken);
|
|
257
333
|
const push = (skill, row) => { if (!seen.has(skill)) { seen.add(skill); out.push({ skill, row }); } };
|
|
258
|
-
for (const a of
|
|
334
|
+
for (const a of ranked0) { if (out.length >= limit) break; push(a.skill, a.row); }
|
|
259
335
|
|
|
260
336
|
// 2. fill remaining slots from the general ranked search — but only with rows
|
|
261
337
|
// that are actually on-topic (a name/group hit, or strong coverage). A lone
|
|
@@ -276,7 +352,10 @@ function suggestCandidates(rows, prompt, { installed = new Set(), suggested = ne
|
|
|
276
352
|
// distinctive (high-IDF) one. A prompt with mere prose overlap and no name
|
|
277
353
|
// signal stays silent (better a miss than noise on every generic prompt); the
|
|
278
354
|
// ranked search still ENRICHES the shortlist once an anchor has fired.
|
|
279
|
-
const fire = out.length > 0 &&
|
|
355
|
+
const fire = out.length > 0 && (
|
|
356
|
+
anchors.some(a => a.strong >= 2 || a.weight >= FIRE_IDF) ||
|
|
357
|
+
contentAnchors.some(contentQualifies)
|
|
358
|
+
);
|
|
280
359
|
return { fire, candidates: out.slice(0, limit), weak };
|
|
281
360
|
}
|
|
282
361
|
|