autosnippet 3.3.2 → 3.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/bin/cli.js +27 -1
  2. package/dist/lib/cli/KnowledgeSyncService.d.ts +26 -0
  3. package/dist/lib/cli/KnowledgeSyncService.js +33 -1
  4. package/dist/lib/external/mcp/handlers/browse.d.ts +1 -0
  5. package/dist/lib/external/mcp/handlers/browse.js +2 -1
  6. package/dist/lib/external/mcp/handlers/consolidated.d.ts +1 -0
  7. package/dist/lib/external/mcp/handlers/panorama.d.ts +11 -11
  8. package/dist/lib/external/mcp/handlers/panorama.js +20 -20
  9. package/dist/lib/external/mcp/handlers/system.d.ts +1 -1
  10. package/dist/lib/external/mcp/handlers/task.js +2 -1
  11. package/dist/lib/external/mcp/tools.d.ts +12 -12
  12. package/dist/lib/external/mcp/tools.js +120 -118
  13. package/dist/lib/http/middleware/validate.js +7 -3
  14. package/dist/lib/infrastructure/database/drizzle/schema.d.ts +100 -0
  15. package/dist/lib/infrastructure/database/drizzle/schema.js +10 -0
  16. package/dist/lib/infrastructure/database/migrations/005_recipe_source_refs.d.ts +9 -0
  17. package/dist/lib/infrastructure/database/migrations/005_recipe_source_refs.js +24 -0
  18. package/dist/lib/infrastructure/vector/HnswVectorAdapter.js +18 -2
  19. package/dist/lib/injection/ServiceContainer.js +2 -0
  20. package/dist/lib/injection/modules/KnowledgeModule.d.ts +5 -0
  21. package/dist/lib/injection/modules/KnowledgeModule.js +80 -0
  22. package/dist/lib/service/bootstrap/UiStartupTasks.d.ts +45 -0
  23. package/dist/lib/service/bootstrap/UiStartupTasks.js +101 -0
  24. package/dist/lib/service/evolution/ConsolidationAdvisor.js +9 -9
  25. package/dist/lib/service/evolution/ContradictionDetector.js +2 -2
  26. package/dist/lib/service/evolution/RedundancyAnalyzer.js +2 -2
  27. package/dist/lib/service/knowledge/SourceRefReconciler.d.ts +68 -0
  28. package/dist/lib/service/knowledge/SourceRefReconciler.js +309 -0
  29. package/dist/lib/service/panorama/PanoramaService.d.ts +18 -1
  30. package/dist/lib/service/panorama/PanoramaService.js +148 -5
  31. package/dist/lib/service/search/BM25Scorer.d.ts +2 -2
  32. package/dist/lib/service/search/CoarseRanker.d.ts +7 -6
  33. package/dist/lib/service/search/CoarseRanker.js +11 -10
  34. package/dist/lib/service/search/FieldWeightedScorer.d.ts +81 -0
  35. package/dist/lib/service/search/FieldWeightedScorer.js +318 -0
  36. package/dist/lib/service/search/MultiSignalRanker.d.ts +2 -2
  37. package/dist/lib/service/search/MultiSignalRanker.js +1 -1
  38. package/dist/lib/service/search/SearchEngine.d.ts +8 -7
  39. package/dist/lib/service/search/SearchEngine.js +59 -10
  40. package/dist/lib/service/search/SearchTypes.d.ts +23 -3
  41. package/dist/lib/service/search/SearchTypes.js +6 -1
  42. package/dist/lib/service/task/IntentExtractor.d.ts +8 -0
  43. package/dist/lib/service/task/IntentExtractor.js +115 -1
  44. package/dist/lib/service/task/PrimeSearchPipeline.js +39 -24
  45. package/dist/lib/service/vector/VectorService.d.ts +3 -0
  46. package/dist/lib/service/vector/VectorService.js +38 -4
  47. package/package.json +1 -1
  48. package/skills/autosnippet-create/SKILL.md +98 -89
  49. package/skills/autosnippet-devdocs/SKILL.md +55 -60
  50. package/templates/guard-ci.yml +2 -2
  51. package/templates/recipes-setup/_template.md +39 -39
@@ -0,0 +1,318 @@
1
+ /**
2
+ * FieldWeightedScorer — 加权字段匹配评分器
3
+ *
4
+ * 替代 BM25Scorer 作为结构化知识库的默认搜索评分引擎。
5
+ *
6
+ * 设计动机:
7
+ * - BM25 将所有字段拼接为文本做统计评分,tokenize 去重导致 TF 恒为 1,BM25F boost 失效
8
+ * - 对于 ~50–500 条结构化知识条目,BM25 的大规模语料假设不成立
9
+ * - FieldWeightedScorer 对每个字段独立打分并加权合并,精确匹配 > token 重叠 > IDF 加权
10
+ *
11
+ * 字段权重:
12
+ * trigger (5.0) > title (3.0) > tags (2.0) > description (1.5) > content (1.0) > facets (0.5)
13
+ *
14
+ * @module FieldWeightedScorer
15
+ */
16
+ import { tokenize } from './tokenizer.js';
17
+ // ── 字段权重常量(可调) ──
18
+ const TRIGGER_WEIGHT = 5.0;
19
+ const TITLE_WEIGHT = 3.0;
20
+ const TAG_WEIGHT = 2.0;
21
+ const DESCRIPTION_WEIGHT = 1.5;
22
+ const CONTENT_WEIGHT = 1.0;
23
+ const FACET_WEIGHT = 0.5;
24
+ /**
25
+ * FieldWeightedScorer — 加权字段匹配评分器
26
+ *
27
+ * 接口与 BM25Scorer 完全兼容(实现 Scorer 接口),可作为 drop-in 替换。
28
+ */
29
+ export class FieldWeightedScorer {
30
+ avgLength;
31
+ docFreq;
32
+ documents;
33
+ totalDocs;
34
+ _idIndex;
35
+ _totalLength;
36
+ constructor() {
37
+ this.documents = [];
38
+ this.totalDocs = 0;
39
+ this.docFreq = {};
40
+ this._idIndex = new Map();
41
+ this._totalLength = 0;
42
+ this.avgLength = 0;
43
+ }
44
+ /** 添加文档到索引 */
45
+ addDocument(id, text, meta = {}) {
46
+ if (this._idIndex.has(id)) {
47
+ this.removeDocument(id);
48
+ }
49
+ // 从 meta 提取结构化字段
50
+ const trigger = meta.trigger || '';
51
+ const title = meta.title || '';
52
+ const description = meta.description || '';
53
+ const tags = Array.isArray(meta.tags) ? meta.tags : [];
54
+ const language = meta.language || '';
55
+ const category = meta.category || '';
56
+ const knowledgeType = meta.knowledgeType || '';
57
+ const contentText = meta.contentText || '';
58
+ // 独立分词每个字段
59
+ const triggerTokens = tokenize(trigger);
60
+ const titleTokens = tokenize(title);
61
+ const descTokens = tokenize(description);
62
+ // contentText 优先;若 meta 无 contentText 则用拼接文本 text 作为回退
63
+ const contentTokens = tokenize(contentText || text);
64
+ // 合并所有唯一 token 用于 DF 计算
65
+ const allUnique = new Set();
66
+ for (const t of triggerTokens) {
67
+ allUnique.add(t);
68
+ }
69
+ for (const t of titleTokens) {
70
+ allUnique.add(t);
71
+ }
72
+ for (const t of descTokens) {
73
+ allUnique.add(t);
74
+ }
75
+ for (const t of contentTokens) {
76
+ allUnique.add(t);
77
+ }
78
+ for (const tag of tags) {
79
+ for (const t of tokenize(tag)) {
80
+ allUnique.add(t);
81
+ }
82
+ }
83
+ const doc = {
84
+ id,
85
+ fields: { trigger, title, description, tags, language, category, knowledgeType },
86
+ tokenizedFields: {
87
+ trigger: triggerTokens,
88
+ title: titleTokens,
89
+ description: descTokens,
90
+ content: contentTokens,
91
+ allUnique,
92
+ },
93
+ meta,
94
+ };
95
+ const idx = this.documents.length;
96
+ this.documents.push(doc);
97
+ this._idIndex.set(id, idx);
98
+ for (const token of allUnique) {
99
+ this.docFreq[token] = (this.docFreq[token] || 0) + 1;
100
+ }
101
+ this.totalDocs = this._idIndex.size;
102
+ this._totalLength += allUnique.size;
103
+ this.avgLength = this.totalDocs > 0 ? this._totalLength / this.totalDocs : 0;
104
+ }
105
+ /**
106
+ * 移除文档(tombstone + 懒压缩)
107
+ * @returns 是否成功移除
108
+ */
109
+ removeDocument(id) {
110
+ const idx = this._idIndex.get(id);
111
+ if (idx === undefined) {
112
+ return false;
113
+ }
114
+ const doc = this.documents[idx];
115
+ if (!doc) {
116
+ return false;
117
+ }
118
+ for (const token of doc.tokenizedFields.allUnique) {
119
+ if (this.docFreq[token]) {
120
+ this.docFreq[token]--;
121
+ if (this.docFreq[token] <= 0) {
122
+ delete this.docFreq[token];
123
+ }
124
+ }
125
+ }
126
+ this._totalLength -= doc.tokenizedFields.allUnique.size;
127
+ this.documents[idx] = null;
128
+ this._idIndex.delete(id);
129
+ this.totalDocs = this._idIndex.size;
130
+ this.avgLength = this.totalDocs > 0 ? this._totalLength / this.totalDocs : 0;
131
+ const nullCount = this.documents.length - this.totalDocs;
132
+ if (this.documents.length > 100 && nullCount / this.documents.length > 0.3) {
133
+ this._compact();
134
+ }
135
+ return true;
136
+ }
137
+ /** 更新文档(remove + add) */
138
+ updateDocument(id, text, meta = {}) {
139
+ this.removeDocument(id);
140
+ this.addDocument(id, text, meta);
141
+ }
142
+ /** 检查文档是否存在 */
143
+ hasDocument(id) {
144
+ return this._idIndex.has(id);
145
+ }
146
+ /** 清空索引 */
147
+ clear() {
148
+ this.documents = [];
149
+ this.docFreq = {};
150
+ this.totalDocs = 0;
151
+ this._totalLength = 0;
152
+ this.avgLength = 0;
153
+ this._idIndex.clear();
154
+ }
155
+ /** 压缩 documents 数组,清除 tombstone 空洞 */
156
+ _compact() {
157
+ const alive = this.documents.filter((d) => d !== null);
158
+ this.documents = alive;
159
+ this._idIndex.clear();
160
+ for (let i = 0; i < alive.length; i++) {
161
+ this._idIndex.set(alive[i].id, i);
162
+ }
163
+ }
164
+ /** 搜索:对每个文档按字段加权评分,返回降序结果 */
165
+ search(query, limit = 20) {
166
+ const queryTokens = tokenize(query);
167
+ if (queryTokens.length === 0) {
168
+ return [];
169
+ }
170
+ const scores = [];
171
+ for (const doc of this.documents) {
172
+ if (!doc) {
173
+ continue;
174
+ }
175
+ let totalScore = 0;
176
+ // 1. Trigger 评分 — 最高权重,精确标识
177
+ const triggerString = this._stringMatchScore(query, doc.fields.trigger);
178
+ const triggerToken = this._tokenOverlap(queryTokens, doc.tokenizedFields.trigger);
179
+ totalScore += TRIGGER_WEIGHT * Math.max(triggerString, triggerToken);
180
+ // 2. Title 评分 — 主要描述性字段
181
+ const titleString = this._stringMatchScore(query, doc.fields.title);
182
+ const titleToken = this._tokenOverlap(queryTokens, doc.tokenizedFields.title);
183
+ totalScore += TITLE_WEIGHT * Math.max(titleString, titleToken);
184
+ // 3. Tags 评分 — 分类标记
185
+ totalScore += TAG_WEIGHT * this._tagScore(queryTokens, doc.fields.tags);
186
+ // 4. Description 评分 — IDF 加权 token overlap
187
+ totalScore +=
188
+ DESCRIPTION_WEIGHT * this._idfWeightedOverlap(queryTokens, doc.tokenizedFields.description);
189
+ // 5. Content 评分 — IDF 加权 token overlap
190
+ totalScore +=
191
+ CONTENT_WEIGHT * this._idfWeightedOverlap(queryTokens, doc.tokenizedFields.content);
192
+ // 6. Facet 评分 — language/category/knowledgeType 精确匹配
193
+ totalScore += FACET_WEIGHT * this._facetScore(queryTokens, doc.fields);
194
+ if (totalScore > 0) {
195
+ scores.push({ id: doc.id, score: totalScore, meta: doc.meta });
196
+ }
197
+ }
198
+ scores.sort((a, b) => b.score - a.score);
199
+ return scores.slice(0, limit);
200
+ }
201
+ // ── 内部评分方法 ──
202
+ /** 字符串级别匹配评分(用于 trigger / title) */
203
+ _stringMatchScore(query, field) {
204
+ if (!field) {
205
+ return 0;
206
+ }
207
+ const q = query.toLowerCase();
208
+ const f = field.toLowerCase();
209
+ if (f === q) {
210
+ return 1.0;
211
+ }
212
+ if (f.startsWith(q)) {
213
+ return 0.7;
214
+ }
215
+ if (f.includes(q)) {
216
+ return 0.5;
217
+ }
218
+ if (q.includes(f) && f.length > 3) {
219
+ return 0.3;
220
+ }
221
+ return 0;
222
+ }
223
+ /** Token 集合重叠率(查询侧召回) */
224
+ _tokenOverlap(queryTokens, fieldTokens) {
225
+ if (queryTokens.length === 0) {
226
+ return 0;
227
+ }
228
+ const fieldSet = new Set(fieldTokens);
229
+ let matched = 0;
230
+ for (const qt of queryTokens) {
231
+ if (fieldSet.has(qt)) {
232
+ matched++;
233
+ }
234
+ }
235
+ return matched / queryTokens.length;
236
+ }
237
+ /** IDF 加权 token overlap(用于长文本字段) */
238
+ _idfWeightedOverlap(queryTokens, fieldTokens) {
239
+ if (queryTokens.length === 0) {
240
+ return 0;
241
+ }
242
+ const fieldSet = new Set(fieldTokens);
243
+ let matchedIdf = 0;
244
+ let totalIdf = 0;
245
+ for (const qt of queryTokens) {
246
+ const idf = this._idf(qt);
247
+ totalIdf += idf;
248
+ if (fieldSet.has(qt)) {
249
+ matchedIdf += idf;
250
+ }
251
+ }
252
+ return totalIdf > 0 ? matchedIdf / totalIdf : 0;
253
+ }
254
+ /** Tag 匹配评分 */
255
+ _tagScore(queryTokens, tags) {
256
+ if (tags.length === 0 || queryTokens.length === 0) {
257
+ return 0;
258
+ }
259
+ let score = 0;
260
+ const qtSet = new Set(queryTokens);
261
+ for (const tag of tags) {
262
+ const lowTag = tag.toLowerCase();
263
+ // 精确 token 匹配
264
+ if (qtSet.has(lowTag)) {
265
+ score += 1.0;
266
+ continue;
267
+ }
268
+ // 部分匹配:query token 包含 tag 或 tag 包含 query token
269
+ let partialFound = false;
270
+ for (const qt of queryTokens) {
271
+ if (lowTag.includes(qt) || qt.includes(lowTag)) {
272
+ score += 0.5;
273
+ partialFound = true;
274
+ break;
275
+ }
276
+ }
277
+ if (!partialFound) {
278
+ // 对 tag 分词再匹配
279
+ const tagTokens = tokenize(tag);
280
+ for (const tt of tagTokens) {
281
+ if (qtSet.has(tt)) {
282
+ score += 0.3;
283
+ break;
284
+ }
285
+ }
286
+ }
287
+ }
288
+ return Math.min(score / queryTokens.length, 1.0);
289
+ }
290
+ /** Facet 匹配评分(language / category / knowledgeType) */
291
+ _facetScore(queryTokens, fields) {
292
+ const facets = [fields.language, fields.category, fields.knowledgeType].filter(Boolean);
293
+ if (facets.length === 0) {
294
+ return 0;
295
+ }
296
+ let matched = 0;
297
+ const qtSet = new Set(queryTokens);
298
+ for (const facet of facets) {
299
+ const lower = facet.toLowerCase();
300
+ if (qtSet.has(lower)) {
301
+ matched++;
302
+ continue;
303
+ }
304
+ for (const ft of tokenize(facet)) {
305
+ if (qtSet.has(ft)) {
306
+ matched++;
307
+ break;
308
+ }
309
+ }
310
+ }
311
+ return matched / facets.length;
312
+ }
313
+ /** 计算 IDF(平滑,始终为正) */
314
+ _idf(token) {
315
+ const df = this.docFreq[token] || 0;
316
+ return Math.log2(1 + this.totalDocs / (df + 1));
317
+ }
318
+ }
@@ -4,7 +4,7 @@
4
4
  * 不同场景使用不同权重配置(向后兼容旧配置中的 'seasonality' 键)
5
5
  */
6
6
  interface SignalCandidate {
7
- bm25Score?: number;
7
+ recallScore?: number;
8
8
  score?: number;
9
9
  title?: string;
10
10
  trigger?: string;
@@ -84,7 +84,7 @@ export declare class MultiSignalRanker {
84
84
  rank(candidates: SignalCandidate[], context?: SignalContext): {
85
85
  rankerScore: number;
86
86
  signals: Record<string, number>;
87
- bm25Score?: number;
87
+ recallScore?: number;
88
88
  score?: number;
89
89
  title?: string;
90
90
  trigger?: string;
@@ -54,7 +54,7 @@ const SCENARIO_WEIGHTS = {
54
54
  /** 相关性信号 — BM25 + 标题匹配 + 内容匹配 */
55
55
  export class RelevanceSignal {
56
56
  compute(candidate, context) {
57
- let score = candidate.bm25Score || candidate.score || 0;
57
+ let score = candidate.recallScore || candidate.score || 0;
58
58
  const query = (context.query || '').toLowerCase();
59
59
  if (!query) {
60
60
  return Math.min(score, 1.0);
@@ -5,17 +5,17 @@
5
5
  * 从 V1 SearchServiceV2 迁移,适配 V2 架构
6
6
  */
7
7
  import Logger from '../../infrastructure/logging/Logger.js';
8
- import { BM25Scorer } from './BM25Scorer.js';
9
8
  import { CoarseRanker } from './CoarseRanker.js';
10
9
  import { MultiSignalRanker } from './MultiSignalRanker.js';
11
- import type { DbRow, RankingContext, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore } from './SearchTypes.js';
10
+ import type { DbRow, RankingContext, Scorer, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore } from './SearchTypes.js';
12
11
  export { BM25Scorer } from './BM25Scorer.js';
13
- export type { BM25DocMeta, BM25SearchResult, DbRow, RankingContext, RrfHit, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore, SlimSearchResult, VectorHit, } from './SearchTypes.js';
12
+ export { FieldWeightedScorer } from './FieldWeightedScorer.js';
13
+ export type { BM25DocMeta, BM25SearchResult, DbRow, RankingContext, RrfHit, Scorer, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore, SlimSearchResult, VectorHit, } from './SearchTypes.js';
14
14
  export { groupByKind, slimSearchResult } from './SearchTypes.js';
15
15
  export { tokenize } from './tokenizer.js';
16
16
  /**
17
17
  * SearchEngine - 完整搜索服务
18
- * 整合 BM25 + 关键词 + 可选 AI 增强
18
+ * 整合召回评分 + 关键词 + 可选 AI 增强
19
19
  */
20
20
  export declare class SearchEngine {
21
21
  _cache: Map<string, {
@@ -25,7 +25,7 @@ export declare class SearchEngine {
25
25
  _cacheMaxAge: number;
26
26
  _coarseRanker: CoarseRanker;
27
27
  _crossEncoder: SearchCrossEncoder | null;
28
- _fusionBm25Weight: number;
28
+ _fusionRecallWeight: number;
29
29
  _fusionSemanticWeight: number;
30
30
  _indexed: boolean;
31
31
  _lastIndexTime: string | null;
@@ -35,7 +35,7 @@ export declare class SearchEngine {
35
35
  db: SearchDb;
36
36
  hybridRetriever: SearchHybridRetriever | null;
37
37
  logger: ReturnType<typeof Logger.getInstance>;
38
- scorer: BM25Scorer;
38
+ scorer: Scorer;
39
39
  vectorService: SearchVectorService | null;
40
40
  vectorStore: SearchVectorStore | null;
41
41
  constructor(db: SearchDb & {
@@ -76,7 +76,6 @@ export declare class SearchEngine {
76
76
  headers?: string;
77
77
  moduleName?: string;
78
78
  knowledgeType?: string;
79
- bm25Score?: number;
80
79
  qualityScore?: number;
81
80
  usageCount?: number;
82
81
  authorityScore?: number;
@@ -151,6 +150,8 @@ export declare class SearchEngine {
151
150
  type: string;
152
151
  title: string | undefined;
153
152
  trigger: string;
153
+ description: string;
154
+ contentText: string;
154
155
  status: string | undefined;
155
156
  knowledgeType: string | undefined;
156
157
  kind: string;
@@ -5,24 +5,25 @@
5
5
  * 从 V1 SearchServiceV2 迁移,适配 V2 架构
6
6
  */
7
7
  import Logger from '../../infrastructure/logging/Logger.js';
8
- import { BM25Scorer } from './BM25Scorer.js';
9
8
  import { CoarseRanker } from './CoarseRanker.js';
10
9
  import { contextBoost } from './contextBoost.js';
10
+ import { FieldWeightedScorer } from './FieldWeightedScorer.js';
11
11
  import { MultiSignalRanker } from './MultiSignalRanker.js';
12
12
  // ── Re-exports for backward compatibility ──
13
13
  export { BM25Scorer } from './BM25Scorer.js';
14
+ export { FieldWeightedScorer } from './FieldWeightedScorer.js';
14
15
  export { groupByKind, slimSearchResult } from './SearchTypes.js';
15
16
  export { tokenize } from './tokenizer.js';
16
17
  /**
17
18
  * SearchEngine - 完整搜索服务
18
- * 整合 BM25 + 关键词 + 可选 AI 增强
19
+ * 整合召回评分 + 关键词 + 可选 AI 增强
19
20
  */
20
21
  export class SearchEngine {
21
22
  _cache;
22
23
  _cacheMaxAge;
23
24
  _coarseRanker;
24
25
  _crossEncoder;
25
- _fusionBm25Weight;
26
+ _fusionRecallWeight;
26
27
  _fusionSemanticWeight;
27
28
  _indexed;
28
29
  _lastIndexTime = null;
@@ -42,15 +43,15 @@ export class SearchEngine {
42
43
  this.vectorStore = options.vectorStore || null;
43
44
  this.vectorService = options.vectorService || null;
44
45
  this.hybridRetriever = options.hybridRetriever || null;
45
- this.scorer = new BM25Scorer();
46
+ this.scorer = new FieldWeightedScorer();
46
47
  this._coarseRanker = new CoarseRanker(options);
47
48
  this._multiSignalRanker = new MultiSignalRanker(options);
48
49
  this._crossEncoder = options.crossEncoderReranker || null;
49
50
  this._indexed = false;
50
51
  this._cache = new Map();
51
52
  this._cacheMaxAge = options.cacheMaxAge || 300_000; // 5min
52
- // auto 模式 BM25+semantic 融合权重(可配置)
53
- this._fusionBm25Weight = options.fusionBm25Weight ?? 0.6;
53
+ // auto 模式 召回+semantic 融合权重(可配置)
54
+ this._fusionRecallWeight = options.fusionRecallWeight ?? 0.6;
54
55
  this._fusionSemanticWeight = options.fusionSemanticWeight ?? 0.4;
55
56
  this._signalBus = options.signalBus || null;
56
57
  }
@@ -214,7 +215,8 @@ export class SearchEngine {
214
215
  response.byKind = { rule: [], pattern: [], fact: [] };
215
216
  for (const r of results) {
216
217
  const kind = r.kind || 'pattern';
217
- (response.byKind[kind] || response.byKind.pattern).push(r);
218
+ const bucket = response.byKind[kind] ?? response.byKind.pattern;
219
+ bucket.push(r);
218
220
  }
219
221
  }
220
222
  if (cacheKey) {
@@ -254,8 +256,8 @@ export class SearchEngine {
254
256
  }
255
257
  return ranked.map((r) => ({
256
258
  ...r,
257
- recallScore: r.bm25Score || 0,
258
- score: r.contextScore || r.rankerScore || r.coarseScore || r.bm25Score || 0,
259
+ recallScore: r.recallScore || 0,
260
+ score: r.contextScore || r.rankerScore || r.coarseScore || r.recallScore || 0,
259
261
  }));
260
262
  }
261
263
  /**
@@ -286,7 +288,7 @@ export class SearchEngine {
286
288
  return {
287
289
  ...item,
288
290
  code: codeText || item.code || '',
289
- bm25Score: item.score || 0,
291
+ recallScore: item.score || 0,
290
292
  qualityScore: item.qualityScore || (item.status === 'active' ? 70 : 40),
291
293
  usageCount: item.usageCount || 0,
292
294
  authorityScore: item.authorityScore || 0,
@@ -633,6 +635,40 @@ export class SearchEngine {
633
635
  catch {
634
636
  /* DB may not be available */
635
637
  }
638
+ // ── 从 recipe_source_refs 桥接表批量读取已验证的 sourceRefs ──
639
+ try {
640
+ const ids = items.map((it) => it.id);
641
+ if (ids.length === 0) {
642
+ return;
643
+ }
644
+ const placeholders = ids.map(() => '?').join(',');
645
+ const refsRows = this.db
646
+ .prepare(`SELECT recipe_id, source_path, status, new_path
647
+ FROM recipe_source_refs
648
+ WHERE recipe_id IN (${placeholders}) AND status != 'stale'`)
649
+ .all(...ids);
650
+ this.logger.debug('recipe_source_refs query', {
651
+ idCount: ids.length,
652
+ rowCount: refsRows.length,
653
+ });
654
+ const refsMap = new Map();
655
+ for (const row of refsRows) {
656
+ const refPath = row.status === 'renamed' && row.new_path ? row.new_path : row.source_path;
657
+ if (!refsMap.has(row.recipe_id)) {
658
+ refsMap.set(row.recipe_id, []);
659
+ }
660
+ refsMap.get(row.recipe_id)?.push(refPath);
661
+ }
662
+ for (const item of items) {
663
+ const refs = refsMap.get(item.id);
664
+ if (refs && refs.length > 0) {
665
+ item.sourceRefs = refs;
666
+ }
667
+ }
668
+ }
669
+ catch {
670
+ /* recipe_source_refs table may not exist */
671
+ }
636
672
  }
637
673
  /**
638
674
  * 刷新索引(增量模式)
@@ -764,10 +800,23 @@ export class SearchEngine {
764
800
  catch {
765
801
  /* ignore */
766
802
  }
803
+ // 提取 description 和 contentText 供 FieldWeightedScorer 字段级评分使用
804
+ let contentText = '';
805
+ try {
806
+ const content = JSON.parse(r.content || '{}');
807
+ contentText = [content.pattern, content.rationale, content.markdown]
808
+ .filter(Boolean)
809
+ .join(' ');
810
+ }
811
+ catch {
812
+ /* ignore */
813
+ }
767
814
  return {
768
815
  type: 'knowledge',
769
816
  title: r.title,
770
817
  trigger: r.trigger || '',
818
+ description: r.description || '',
819
+ contentText,
771
820
  status: r.lifecycle,
772
821
  knowledgeType: r.knowledgeType,
773
822
  kind: r.kind || 'pattern',
@@ -20,6 +20,25 @@ export interface BM25SearchResult {
20
20
  score: number;
21
21
  meta: Record<string, unknown>;
22
22
  }
23
+ /**
24
+ * Scorer 通用接口 — BM25Scorer 与 FieldWeightedScorer 共同实现
25
+ *
26
+ * SearchEngine 通过此接口与具体评分器解耦,可在运行时切换。
27
+ */
28
+ export interface Scorer {
29
+ totalDocs: number;
30
+ avgLength: number;
31
+ docFreq: Record<string, number>;
32
+ documents: ({
33
+ id: string;
34
+ } | null)[];
35
+ addDocument(id: string, text: string, meta: Record<string, unknown>): void;
36
+ removeDocument(id: string): boolean;
37
+ updateDocument(id: string, text: string, meta: Record<string, unknown>): void;
38
+ hasDocument(id: string): boolean;
39
+ search(query: string, limit?: number): BM25SearchResult[];
40
+ clear(): void;
41
+ }
23
42
  /** Meta structure produced by _buildDocMeta */
24
43
  export interface BM25DocMeta {
25
44
  type: string;
@@ -56,7 +75,6 @@ export interface SearchResultItem {
56
75
  headers?: string;
57
76
  moduleName?: string;
58
77
  knowledgeType?: string;
59
- bm25Score?: number;
60
78
  qualityScore?: number;
61
79
  usageCount?: number;
62
80
  authorityScore?: number;
@@ -191,7 +209,7 @@ export interface SearchEngineOptions {
191
209
  crossEncoderReranker?: SearchCrossEncoder | null;
192
210
  signalBus?: SignalBus | null;
193
211
  cacheMaxAge?: number;
194
- fusionBm25Weight?: number;
212
+ fusionRecallWeight?: number;
195
213
  fusionSemanticWeight?: number;
196
214
  [key: string]: unknown;
197
215
  }
@@ -210,12 +228,14 @@ export interface SlimSearchResult {
210
228
  actionHint?: string;
211
229
  /** 知识类型 (code-standard/code-pattern/...) — Bridge 场景需要 */
212
230
  knowledgeType?: string;
231
+ /** 已验证的项目来源文件路径(可信度证据链) */
232
+ sourceRefs?: string[];
213
233
  }
214
234
  /**
215
235
  * 统一投影函数 — 将 SearchResultItem 投影为 SlimSearchResult。
216
236
  *
217
237
  * 合并了 mcp/search.ts#_slimSearchItem() 和 TaskKnowledgeBridge#_projectItem() 的逻辑:
218
- * - 去除内部信号 (bm25Score, coarseScore, rankerScore, contextScore, content, code...)
238
+ * - 去除内部信号 (recallScore, coarseScore, rankerScore, contextScore, content, code...)
219
239
  * - description 截断 120 字符
220
240
  * - 生成 actionHint (whenClause → doClause)
221
241
  *
@@ -10,7 +10,7 @@
10
10
  * 统一投影函数 — 将 SearchResultItem 投影为 SlimSearchResult。
11
11
  *
12
12
  * 合并了 mcp/search.ts#_slimSearchItem() 和 TaskKnowledgeBridge#_projectItem() 的逻辑:
13
- * - 去除内部信号 (bm25Score, coarseScore, rankerScore, contextScore, content, code...)
13
+ * - 去除内部信号 (recallScore, coarseScore, rankerScore, contextScore, content, code...)
14
14
  * - description 截断 120 字符
15
15
  * - 生成 actionHint (whenClause → doClause)
16
16
  *
@@ -23,6 +23,10 @@ export function slimSearchResult(item) {
23
23
  const actionHint = doText || whenText
24
24
  ? `${whenText ? `${whenText} → ` : ''}${doText}`.replace(/ → $/, '')
25
25
  : undefined;
26
+ const rawRefs = item.sourceRefs;
27
+ const sourceRefs = Array.isArray(rawRefs) && rawRefs.length > 0
28
+ ? rawRefs.filter((s) => typeof s === 'string' && s.length > 0)
29
+ : undefined;
26
30
  return {
27
31
  id: item.id,
28
32
  title: item.title || '',
@@ -33,6 +37,7 @@ export function slimSearchResult(item) {
33
37
  description: (item.description || '').slice(0, 120),
34
38
  actionHint,
35
39
  knowledgeType: item.knowledgeType || undefined,
40
+ sourceRefs,
36
41
  };
37
42
  }
38
43
  /** items → byKind 分组(统一实现) */
@@ -10,6 +10,8 @@ export type SearchScenario = 'lint' | 'generate' | 'search' | 'learning';
10
10
  export interface ExtractedIntent {
11
11
  /** Multi-query set: Q1 raw + Q2 tech terms + Q3 file context */
12
12
  queries: string[];
13
+ /** Cross-language synonym queries (run in keyword mode to bypass semantic normalization) */
14
+ keywordQueries: string[];
13
15
  /** Inferred language from activeFile or args */
14
16
  language: string | null;
15
17
  /** Inferred module path from activeFile */
@@ -37,8 +39,14 @@ export declare function extract(userQuery: string, activeFile?: string, language
37
39
  /**
38
40
  * Build multi-query set from user query + active file.
39
41
  * Q1: raw query, Q2: extracted tech terms, Q3: file context.
42
+ * Q1 is enriched with cross-language synonyms to bridge EN↔CJK matching.
40
43
  */
41
44
  export declare function buildQueries(userQuery: string, activeFile?: string, termOpts?: TechTermOptions): string[];
45
+ /**
46
+ * Build keyword-mode queries for cross-language synonym matching.
47
+ * Uses keyword mode to preserve raw FWS scores without CoarseRanker semantic normalization.
48
+ */
49
+ export declare function buildKeywordQueries(userQuery: string): string[];
42
50
  /**
43
51
  * Extract tech terms from query using universal patterns + dynamic project prefixes.
44
52
  */