autosnippet 2.16.0 → 2.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,8 @@
6
6
  */
7
7
 
8
8
  import Logger from '../../infrastructure/logging/Logger.js';
9
+ import { CoarseRanker } from './CoarseRanker.js';
10
+ import { MultiSignalRanker } from './MultiSignalRanker.js';
9
11
 
10
12
  /**
11
13
  * BM25 参数
@@ -143,6 +145,8 @@ export class SearchEngine {
143
145
  this.aiProvider = options.aiProvider || null;
144
146
  this.vectorStore = options.vectorStore || null;
145
147
  this.scorer = new BM25Scorer();
148
+ this._coarseRanker = new CoarseRanker(options);
149
+ this._multiSignalRanker = new MultiSignalRanker(options);
146
150
  this._indexed = false;
147
151
  this._cache = new Map();
148
152
  this._cacheMaxAge = options.cacheMaxAge || 300_000; // 5min
@@ -161,7 +165,8 @@ export class SearchEngine {
161
165
  try {
162
166
  entries = this.db.prepare(
163
167
  `SELECT id, title, description, language, category, knowledgeType, kind,
164
- content, lifecycle, tags, trigger
168
+ content, lifecycle, tags, trigger, difficulty, quality, stats,
169
+ updatedAt, createdAt
165
170
  FROM knowledge_entries WHERE lifecycle != 'deprecated'`
166
171
  ).all();
167
172
  entries = entries.map(e => ({
@@ -178,10 +183,28 @@ export class SearchEngine {
178
183
  } catch { /* ignore parse error */ }
179
184
  // 包含 tags + trigger 提升召回率
180
185
  let tagText = '';
181
- try { tagText = JSON.parse(r.tags || '[]').join(' '); } catch { /* ignore */ }
186
+ let parsedTags = [];
187
+ try { parsedTags = JSON.parse(r.tags || '[]'); tagText = parsedTags.join(' '); } catch { /* ignore */ }
188
+ // 解析 stats / quality JSON — 供排序信号使用
189
+ let usageCount = 0;
190
+ let authorityScore = 0;
191
+ try {
192
+ const stats = JSON.parse(r.stats || '{}');
193
+ usageCount = (stats.adoptions || 0) + (stats.applications || 0) + (stats.searchHits || 0);
194
+ authorityScore = stats.authority || 0;
195
+ } catch { /* ignore */ }
196
+ let qualityOverall = 0;
197
+ try { qualityOverall = JSON.parse(r.quality || '{}').overall || 0; } catch { /* ignore */ }
182
198
  const text = [r.title, r.description, r.trigger, r.language, r.category, r.knowledgeType, tagText, contentText]
183
199
  .filter(Boolean).join(' ');
184
- this.scorer.addDocument(r.id, text, { type: 'knowledge', title: r.title, trigger: r.trigger || '', status: r.status, knowledgeType: r.knowledgeType, kind: r.kind || 'pattern', language: r.language || '', category: r.category || '' });
200
+ this.scorer.addDocument(r.id, text, {
201
+ type: 'knowledge', title: r.title, trigger: r.trigger || '', status: r.status,
202
+ knowledgeType: r.knowledgeType, kind: r.kind || 'pattern',
203
+ language: r.language || '', category: r.category || '',
204
+ updatedAt: r.updatedAt || null, createdAt: r.createdAt || null,
205
+ difficulty: r.difficulty || 'intermediate', tags: parsedTags,
206
+ usageCount, authorityScore, qualityScore: qualityOverall,
207
+ });
185
208
  }
186
209
 
187
210
  this._indexed = true;
@@ -194,38 +217,79 @@ export class SearchEngine {
194
217
  }
195
218
  }
196
219
 
220
+ /**
221
+ * 确保索引已构建(幂等),supply 给需要准确 stats 的调用方
222
+ */
223
+ ensureIndex() {
224
+ if (!this._indexed) {
225
+ this.buildIndex();
226
+ }
227
+ }
228
+
197
229
  /**
198
230
  * 统一搜索入口
199
231
  * @param {string} query - 搜索关键词
200
232
  * @param {object} options - {type, limit, mode, useAI}
201
233
  */
202
234
  async search(query, options = {}) {
203
- const { type = 'all', limit = 20, mode = 'keyword' } = options;
235
+ const { type = 'all', limit = 20, mode = 'keyword', context } = options;
236
+ const shouldRank = options.rank ?? (mode !== 'keyword');
204
237
 
205
238
  if (!query || !query.trim()) {
206
239
  return { items: [], total: 0, query };
207
240
  }
208
241
 
209
- // 检查缓存
210
- const cacheKey = `${query}:${type}:${limit}:${mode}:${options.groupByKind ? 'g' : ''}`;
211
- const cached = this._getCache(cacheKey);
212
- if (cached) return cached;
242
+ // 带 sessionHistory 的上下文搜索不缓存(个性化结果)
243
+ const hasSessionContext = context?.sessionHistory?.length > 0;
244
+ const cacheKey = hasSessionContext
245
+ ? null
246
+ : `${query}:${type}:${limit}:${mode}:${shouldRank ? 'r' : ''}:${options.groupByKind ? 'g' : ''}`;
247
+ if (cacheKey) {
248
+ const cached = this._getCache(cacheKey);
249
+ if (cached) return cached;
250
+ }
213
251
 
214
252
  // 确保索引已构建
215
- if (!this._indexed) {
216
- this.buildIndex();
217
- }
253
+ this.ensureIndex();
218
254
 
255
+ // 排序阶段需要更多候选,过采样 3x
256
+ const recallLimit = shouldRank ? limit * 3 : limit;
219
257
  let results;
220
- let actualMode = mode; // 跟踪实际使用的搜索模式(semantic 可能降级为 bm25)
258
+ let actualMode = mode;
221
259
 
222
260
  switch (mode) {
261
+ case 'auto': {
262
+ // 同时做 BM25 + semantic,融合去重取最优分数
263
+ const [bm25Items, semResult] = await Promise.all([
264
+ Promise.resolve(this._bm25Search(query, type, recallLimit)),
265
+ this._semanticSearch(query, type, recallLimit).catch(() => ({ items: [], actualMode: 'bm25' })),
266
+ ]);
267
+ const semItems = semResult.items || [];
268
+ const merged = new Map();
269
+ for (const it of bm25Items) {
270
+ merged.set(it.id, { ...it, _bm25: it.score || 0, _sem: 0 });
271
+ }
272
+ for (const it of semItems) {
273
+ const existing = merged.get(it.id);
274
+ if (existing) {
275
+ existing._sem = it.score || 0;
276
+ existing.score = Math.max(existing._bm25, existing._sem);
277
+ } else {
278
+ merged.set(it.id, { ...it, _bm25: 0, _sem: it.score || 0 });
279
+ }
280
+ }
281
+ results = [...merged.values()].sort((a, b) => b.score - a.score);
282
+ for (const it of results) { delete it._bm25; delete it._sem; }
283
+ const semActuallyUsed = semResult.actualMode === 'semantic';
284
+ actualMode = semActuallyUsed ? 'auto(bm25+semantic)' : 'auto(bm25-only)';
285
+ break;
286
+ }
223
287
  case 'ranking':
224
288
  case 'bm25':
225
- results = this._bm25Search(query, type, limit);
289
+ results = this._bm25Search(query, type, recallLimit);
226
290
  break;
227
291
  case 'semantic': {
228
- const semResult = await this._semanticSearch(query, type, limit);
292
+ const semResult = await this._semanticSearch(query, type, recallLimit);
229
293
  results = semResult.items || semResult;
230
294
  actualMode = semResult.actualMode || 'semantic';
231
295
  break;
@@ -236,27 +300,110 @@ export class SearchEngine {
236
300
  break;
237
301
  }
238
302
 
303
+ // ── Ranking Pipeline (CoarseRanker → MultiSignalRanker → ContextBoost) ──
304
+ if (shouldRank && results.length > 0) {
305
+ results = this._applyRanking(results, query, context);
306
+ }
307
+ results = results.slice(0, limit);
308
+
239
309
  const response = {
240
310
  items: results,
241
311
  total: results.length,
242
312
  query,
243
313
  mode: actualMode,
244
314
  type,
315
+ ranked: shouldRank && results.length > 0,
245
316
  };
246
317
 
247
- // 按 kind 分组输出
248
318
  if (options.groupByKind) {
249
- response.byKind = {
250
- rule: results.filter(r => r.kind === 'rule'),
251
- pattern: results.filter(r => r.kind === 'pattern'),
252
- fact: results.filter(r => r.kind === 'fact'),
253
- };
319
+ response.byKind = { rule: [], pattern: [], fact: [] };
320
+ for (const r of results) {
321
+ const kind = r.kind || 'pattern';
322
+ (response.byKind[kind] || response.byKind.pattern).push(r);
323
+ }
254
324
  }
255
325
 
256
- this._setCache(cacheKey, response);
326
+ if (cacheKey) this._setCache(cacheKey, response);
257
327
  return response;
258
328
  }
259
329
 
330
+ // ── Ranking Pipeline ────────────────────────────────────────────
331
+
332
+ /**
333
+ * 内置排序管线: 规范化 → CoarseRanker (E-E-A-T 5维) → MultiSignalRanker (6信号) → 上下文加成
334
+ */
335
+ _applyRanking(items, query, context = {}) {
336
+ const normalized = this._normalizeForRanking(items);
337
+ let ranked = this._coarseRanker.rank(normalized);
338
+ ranked = this._multiSignalRanker.rank(ranked, {
339
+ ...context, query,
340
+ scenario: context?.intent || 'search',
341
+ });
342
+ if (context?.sessionHistory?.length > 0) {
343
+ ranked = this._contextBoost(ranked, context);
344
+ }
345
+ return ranked.map(r => ({
346
+ ...r,
347
+ recallScore: r.bm25Score || 0,
348
+ score: r.contextScore || r.rankerScore || r.coarseScore || r.bm25Score || 0,
349
+ }));
350
+ }
351
+
352
+ /**
353
+ * 将召回结果转换为 Ranker 所需格式(解析 content JSON、映射信号字段)
354
+ * 保留原始 content 供下游消费者使用
355
+ */
356
+ _normalizeForRanking(items) {
357
+ return items.map(item => {
358
+ let codeText = '';
359
+ if (item.content) {
360
+ try {
361
+ const parsed = typeof item.content === 'string' ? JSON.parse(item.content) : item.content;
362
+ codeText = parsed.pattern || parsed.code || '';
363
+ } catch { /* ignore */ }
364
+ }
365
+ let tags = item.tags || [];
366
+ if (typeof tags === 'string') {
367
+ try { tags = JSON.parse(tags); } catch { tags = []; }
368
+ }
369
+ return {
370
+ ...item,
371
+ code: codeText || item.code || '',
372
+ bm25Score: item.score || 0,
373
+ qualityScore: item.qualityScore || (item.status === 'active' ? 70 : 40),
374
+ usageCount: item.usageCount || 0,
375
+ authorityScore: item.authorityScore || 0,
376
+ tags,
377
+ difficulty: item.difficulty || 'intermediate',
378
+ };
379
+ });
380
+ }
381
+
382
+ /**
383
+ * 上下文感知加成 — 会话关键词重叠 +20%、语言匹配 +10%
384
+ */
385
+ _contextBoost(items, context) {
386
+ const { sessionHistory = [], language } = context || {};
387
+ if (!sessionHistory.length) return items;
388
+ const sessionKeywords = new Set();
389
+ for (const turn of sessionHistory) {
390
+ const tokens = tokenize(turn.content || turn.rawInput || '');
391
+ for (const t of tokens) sessionKeywords.add(t);
392
+ }
393
+ return items.map(item => {
394
+ let boost = 0;
395
+ const textTokens = tokenize(
396
+ [item.title, item.trigger, item.content].filter(Boolean).join(' ')
397
+ );
398
+ const overlap = textTokens.filter(t => sessionKeywords.has(t)).length;
399
+ if (overlap > 0) boost += 0.2 * Math.min(overlap / 5, 1);
400
+ if (language && item.language === language) boost += 0.1;
401
+ const baseScore = item.rankerScore || item.coarseScore || item.score || 0;
402
+ const contextScore = baseScore * (1 + boost);
403
+ return { ...item, contextScore, contextBoost: boost };
404
+ }).sort((a, b) => b.contextScore - a.contextScore);
405
+ }
406
+
260
407
  /**
261
408
  * 关键词搜索 - 直接 SQL LIKE
262
409
  * 返回包含 kind 字段的完整结果,使用 ESCAPE 防止通配符注入
@@ -318,6 +465,14 @@ export class SearchEngine {
318
465
  language: r.meta.language || '',
319
466
  category: r.meta.category || '',
320
467
  score: Math.round(r.score * 1000) / 1000,
468
+ // 排序信号字段(供 RetrievalFunnel / CoarseRanker / MultiSignalRanker 使用)
469
+ updatedAt: r.meta.updatedAt || null,
470
+ createdAt: r.meta.createdAt || null,
471
+ difficulty: r.meta.difficulty || 'intermediate',
472
+ tags: r.meta.tags || [],
473
+ usageCount: r.meta.usageCount || 0,
474
+ authorityScore: r.meta.authorityScore || 0,
475
+ qualityScore: r.meta.qualityScore || 0,
321
476
  }));
322
477
 
323
478
  // 为每个结果补充 content(NativeUI 预览需要)— 批量 IN 查询替代 N+1
@@ -343,10 +498,19 @@ export class SearchEngine {
343
498
  return { items: this._bm25Search(query, type, limit), actualMode: 'bm25' };
344
499
  }
345
500
 
346
- // 尝试通过 vectorStore 做的向量相似度搜索
501
+ // 尝试通过 vectorStore 做向量搜索(优先混合搜索: 向量70% + 关键词30%)
347
502
  if (this.vectorStore) {
348
503
  try {
349
- const vectorResults = await this.vectorStore.query(queryEmbedding, limit * 2);
504
+ let vectorResults;
505
+ if (typeof this.vectorStore.hybridSearch === 'function') {
506
+ const hybrid = await this.vectorStore.hybridSearch(queryEmbedding, query, { topK: limit * 2 });
507
+ vectorResults = hybrid.map(r => ({
508
+ id: r.item.id, similarity: r.score, score: r.score,
509
+ content: r.item.content, metadata: r.item.metadata || {},
510
+ }));
511
+ } else {
512
+ vectorResults = await this.vectorStore.query(queryEmbedding, limit * 2);
513
+ }
350
514
  if (vectorResults && vectorResults.length > 0) {
351
515
  let results = vectorResults.map(vr => ({
352
516
  id: vr.id,
@@ -393,7 +557,9 @@ export class SearchEngine {
393
557
  let rows = [];
394
558
  try {
395
559
  rows = this.db.prepare(
396
- `SELECT id, content, description, trigger, headers, moduleName FROM knowledge_entries WHERE id IN (${placeholders})`
560
+ `SELECT id, content, description, trigger, headers, moduleName,
561
+ tags, language, category, updatedAt, createdAt, quality, stats, difficulty
562
+ FROM knowledge_entries WHERE id IN (${placeholders})`
397
563
  ).all(...ids);
398
564
  } catch { /* table may not exist */ }
399
565
  const rowMap = new Map(rows.map(r => [r.id, r]));
@@ -405,6 +571,28 @@ export class SearchEngine {
405
571
  item.trigger = item.trigger || row.trigger || '';
406
572
  if (row.headers) item.headers = row.headers;
407
573
  if (row.moduleName) item.moduleName = row.moduleName;
574
+ // 排序信号补充 — 确保 Funnel/Ranker 有真实数据
575
+ if (!item.language && row.language) item.language = row.language;
576
+ if (!item.category && row.category) item.category = row.category;
577
+ if (!item.updatedAt && row.updatedAt) item.updatedAt = row.updatedAt;
578
+ if (!item.createdAt && row.createdAt) item.createdAt = row.createdAt;
579
+ if (!item.difficulty && row.difficulty) item.difficulty = row.difficulty;
580
+ // 解析 tags
581
+ if (!item.tags || (Array.isArray(item.tags) && item.tags.length === 0)) {
582
+ try { item.tags = JSON.parse(row.tags || '[]'); } catch { /* ignore */ }
583
+ }
584
+ // 解析 quality JSON → qualityScore
585
+ if (!item.qualityScore) {
586
+ try { item.qualityScore = JSON.parse(row.quality || '{}').overall || 0; } catch { /* ignore */ }
587
+ }
588
+ // 解析 stats JSON → usageCount + authorityScore
589
+ if (!item.usageCount) {
590
+ try {
591
+ const stats = JSON.parse(row.stats || '{}');
592
+ item.usageCount = (stats.adoptions || 0) + (stats.applications || 0) + (stats.searchHits || 0);
593
+ if (!item.authorityScore) item.authorityScore = stats.authority || 0;
594
+ } catch { /* ignore */ }
595
+ }
408
596
  }
409
597
  }
410
598
  } catch { /* DB may not be available */ }
@@ -419,7 +607,7 @@ export class SearchEngine {
419
607
  }
420
608
 
421
609
  /**
422
- * 获取索引统计
610
+ * 获取索引统计(如果尚未构建索引,自动触发构建)
423
611
  */
424
612
  getStats() {
425
613
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autosnippet",
3
- "version": "2.16.0",
3
+ "version": "2.18.0",
4
4
  "description": "AutoSnippet - 连接开发者、AI 与项目知识库的工具",
5
5
  "type": "module",
6
6
  "main": "lib/bootstrap.js",
@@ -333,13 +333,13 @@ class CombinedSearchWindowController: NSObject, NSTableViewDataSource, NSTableVi
333
333
  let cellView = NSTableCellView()
334
334
  cellView.wantsLayer = true
335
335
 
336
- // 标题 - 支持两行显示
336
+ // 标题 - 自动折行,最多两行,右侧留出足够边距
337
337
  let textField = NSTextField(labelWithString: item.title)
338
338
  textField.font = NSFont.systemFont(ofSize: 14, weight: .medium)
339
339
  textField.textColor = .labelColor
340
- textField.lineBreakMode = .byWordWrapping // 改为自动换行
341
- textField.maximumNumberOfLines = 2 // 最多显示两行
342
- textField.frame = NSRect(x: 12, y: 24, width: tableView.bounds.width - 24, height: 36) // 增加高度到36,调整y位置
340
+ textField.lineBreakMode = .byWordWrapping
341
+ textField.maximumNumberOfLines = 2 // 必须两行保证显示全
342
+ textField.frame = NSRect(x: 12, y: 24, width: tableView.bounds.width - 56, height: 36)
343
343
  cellView.addSubview(textField)
344
344
 
345
345
  // 说明
@@ -356,7 +356,7 @@ class CombinedSearchWindowController: NSObject, NSTableViewDataSource, NSTableVi
356
356
  subtitleField.font = NSFont.systemFont(ofSize: 11, weight: .regular)
357
357
  subtitleField.textColor = .secondaryLabelColor
358
358
  subtitleField.lineBreakMode = .byTruncatingTail
359
- subtitleField.frame = NSRect(x: 12, y: 8, width: tableView.bounds.width - 24, height: 16) // 调整y位置为8
359
+ subtitleField.frame = NSRect(x: 12, y: 8, width: tableView.bounds.width - 56, height: 16)
360
360
  cellView.addSubview(subtitleField)
361
361
  }
362
362