skills-atlas-cli 0.8.5 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/search-core.js +81 -2
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "skills-atlas-cli",
3
- "version": "0.8.5",
3
+ "version": "0.8.7",
4
4
  "description": "Search, install and learn AI agent skills from the terminal — powered by the Skills Atlas catalog.",
5
5
  "bin": {
6
6
  "skills-atlas": "bin/skills.js",
@@ -215,6 +215,43 @@ const idfOf = (info, seg) => Math.log(1 + info.n / ((info.df.get(seg) || 0) + 1)
215
215
 
216
216
  const FIRE_IDF = 4.2; // a single distinctive name word must clear this to fire alone
217
217
 
218
+ // --- Content anchors (match by FUNCTION, not just name) ----------------------
219
+ // ~a third of catalog skills have opaque names (sentry/grill-me/get-shit-done) that
220
+ // don't contain their function, and Chinese prompts never match an English name at
221
+ // all. So besides the skill NAME, anchor on the curated function text — use_case /
222
+ // group / when, in BOTH languages — with the same distinctiveness gate as names.
223
+ // A content match must include at least one DISTINCTIVE function word to fire, so
224
+ // generic prose overlap stays silent.
225
+ const CONTENT_FIRE_IDF = 4.6; // weight bar when only one distinctive word matched (+ a 2nd word)
226
+ const CONTENT_DISTINCT_IDF = 3.5; // a word this distinctive (~≤10 rows) counts toward "strong"
227
+
228
+ // Generic Chinese words that must not fire on their own — the CJK analog of
229
+ // ANCHOR_STOP (casual verbs + generic nouns that carry no domain intent).
230
+ const CJK_ANCHOR_STOP = new Set([
231
+ '看看', '看下', '看一', '帮忙', '处理', '解决', '完成', '搞定', '试试', '弄一', '做个', '做一',
232
+ '写个', '写一', '加个', '改改', '改一', '删掉', '运行', '创建', '生成', '修改', '优化', '检查',
233
+ '一下', '一个', '这个', '那个', '东西', '问题', '代码', '文件', '内容', '功能', '项目', '任务',
234
+ '系统', '方法', '工具', '数据', '需要', '想要', '怎么', '如何', '可以', '应该', '一些', '这些',
235
+ ]);
236
+
237
+ // Tokens of a row's curated short function text (NOT the long description — keep it
238
+ // distinctive), both languages, for the corpus DF and for matching a query.
239
+ const rowContent = r =>
240
+ tokenize(lc([r.use_case, r.use_case_en, r.group, r.group_en, r.when_to_use, r.when_to_use_en].filter(Boolean).join(' ')));
241
+ const contentHas = (set, t) => set.has(t) || (t.length > 3 && t.endsWith('s') && set.has(t.slice(0, -1)));
242
+
243
+ const _contentDfCache = new WeakMap();
244
+ function contentDf(rows) {
245
+ let info = _contentDfCache.get(rows);
246
+ if (info) return info;
247
+ const df = new Map();
248
+ let n = 0;
249
+ for (const r of rows) { n++; for (const t of new Set(rowContent(r))) df.set(t, (df.get(t) || 0) + 1); }
250
+ info = { df, n: n || 1 };
251
+ _contentDfCache.set(rows, info);
252
+ return info;
253
+ }
254
+
218
255
  // Autopilot recall: collect a SHORTLIST of catalog skills that may fit a free-text
219
256
  // prompt, for Claude to judge (we do recall; Claude does precision). Returns
220
257
  // { fire, candidates: [{skill, row}], weak }. Sources, in order:
@@ -252,10 +289,49 @@ function suggestCandidates(rows, prompt, { installed = new Set(), suggested = ne
252
289
  }
253
290
  anchors.sort((a, b) => b.weight - a.weight || maxStars(b.row) - maxStars(a.row));
254
291
 
292
+ // 1b. content anchors — match the curated FUNCTION text (use_case / group / when),
293
+ // so opaque-named skills are findable by what they do and Chinese prompts match at
294
+ // all. Generic words (ANCHOR_STOP / CJK_ANCHOR_STOP) are excluded up front.
295
+ const contentTokens = tokens.filter(t => !ANCHOR_STOP.has(t) && !CJK_ANCHOR_STOP.has(t));
296
+ const contentAnchors = [];
297
+ if (contentTokens.length) {
298
+ const cdf = contentDf(rows);
299
+ for (const r of rows) {
300
+ const content = new Set(rowContent(r));
301
+ let weight = 0, strong = 0, matched = 0;
302
+ for (const t of new Set(contentTokens)) {
303
+ if (!contentHas(content, t)) continue;
304
+ matched++;
305
+ const idf = idfOf(cdf, t);
306
+ weight += idf;
307
+ if (idf >= CONTENT_DISTINCT_IDF) strong++; // only distinctive words count as "strong"
308
+ }
309
+ if (strong) contentAnchors.push({ row: r, weight, strong, matched });
310
+ }
311
+ // Prefer rows matching MORE distinctive function words over an incidental hit.
312
+ contentAnchors.sort((a, b) => b.strong - a.strong || b.weight - a.weight || maxStars(b.row) - maxStars(a.row));
313
+ }
314
+ // Fire/qualify only with a distinctive function match: two distinctive words, or
315
+ // one distinctive word backed by a second matched word and enough total weight.
316
+ // (A single distinctive word alone never fires — too easy to hit by coincidence.)
317
+ const contentQualifies = a => a.strong >= 2 || (a.strong >= 1 && a.matched >= 2 && a.weight >= CONTENT_FIRE_IDF);
318
+
319
+ // Merge name + qualifying content anchors into ONE shortlist ranked by strength,
320
+ // so a strong function match (grill-me: interrogate + stress-test) outranks a
321
+ // single-word name match (launch) when it's the better fit. runSearch backfills.
322
+ const ranked0 = [];
323
+ for (const a of anchors) ranked0.push({ skill: a.skill, row: a.row, strong: a.strong, weight: a.weight });
324
+ for (const a of contentAnchors) {
325
+ if (!contentQualifies(a)) continue;
326
+ const s = (a.row.skills || []).find(x => !taken.has(x));
327
+ if (s) ranked0.push({ skill: s, row: a.row, strong: a.strong, weight: a.weight });
328
+ }
329
+ ranked0.sort((a, b) => b.strong - a.strong || b.weight - a.weight || maxStars(b.row) - maxStars(a.row));
330
+
255
331
  const out = [];
256
332
  const seen = new Set(taken);
257
333
  const push = (skill, row) => { if (!seen.has(skill)) { seen.add(skill); out.push({ skill, row }); } };
258
- for (const a of anchors) { if (out.length >= limit) break; push(a.skill, a.row); }
334
+ for (const a of ranked0) { if (out.length >= limit) break; push(a.skill, a.row); }
259
335
 
260
336
  // 2. fill remaining slots from the general ranked search — but only with rows
261
337
  // that are actually on-topic (a name/group hit, or strong coverage). A lone
@@ -276,7 +352,10 @@ function suggestCandidates(rows, prompt, { installed = new Set(), suggested = ne
276
352
  // distinctive (high-IDF) one. A prompt with mere prose overlap and no name
277
353
  // signal stays silent (better a miss than noise on every generic prompt); the
278
354
  // ranked search still ENRICHES the shortlist once an anchor has fired.
279
- const fire = out.length > 0 && anchors.some(a => a.strong >= 2 || a.weight >= FIRE_IDF);
355
+ const fire = out.length > 0 && (
356
+ anchors.some(a => a.strong >= 2 || a.weight >= FIRE_IDF) ||
357
+ contentAnchors.some(contentQualifies)
358
+ );
280
359
  return { fire, candidates: out.slice(0, limit), weak };
281
360
  }
282
361