autosnippet 3.3.0 → 3.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/dist/assets/icons-BJ2mUBi8.js +1 -0
- package/dashboard/dist/assets/index-B659K9t5.js +128 -0
- package/dashboard/dist/assets/index-NCm40PMD.css +1 -0
- package/dashboard/dist/index.html +3 -3
- package/dist/bin/cli.d.ts +1 -0
- package/dist/bin/cli.js +284 -142
- package/dist/lib/agent/context/ExplorationTracker.d.ts +2 -0
- package/dist/lib/agent/context/ExplorationTracker.js +21 -3
- package/dist/lib/agent/core/ToolExecutionPipeline.d.ts +3 -1
- package/dist/lib/agent/core/ToolExecutionPipeline.js +8 -1
- package/dist/lib/agent/forge/DynamicComposer.d.ts +58 -0
- package/dist/lib/agent/forge/DynamicComposer.js +99 -0
- package/dist/lib/agent/forge/SandboxRunner.d.ts +60 -0
- package/dist/lib/agent/forge/SandboxRunner.js +251 -0
- package/dist/lib/agent/forge/TemporaryToolRegistry.d.ts +76 -0
- package/dist/lib/agent/forge/TemporaryToolRegistry.js +154 -0
- package/dist/lib/agent/forge/ToolForge.d.ts +92 -0
- package/dist/lib/agent/forge/ToolForge.js +239 -0
- package/dist/lib/agent/forge/ToolRequirementAnalyzer.d.ts +44 -0
- package/dist/lib/agent/forge/ToolRequirementAnalyzer.js +119 -0
- package/dist/lib/agent/tools/ToolRegistry.d.ts +2 -0
- package/dist/lib/agent/tools/ToolRegistry.js +4 -0
- package/dist/lib/agent/tools/composite.js +0 -1
- package/dist/lib/agent/tools/index.d.ts +2 -50
- package/dist/lib/agent/tools/index.js +2 -3
- package/dist/lib/agent/tools/lifecycle.d.ts +1 -58
- package/dist/lib/agent/tools/lifecycle.js +2 -75
- package/dist/lib/cli/KnowledgeSyncService.d.ts +26 -0
- package/dist/lib/cli/KnowledgeSyncService.js +33 -1
- package/dist/lib/cli/deploy/FileManifest.d.ts +0 -21
- package/dist/lib/cli/deploy/FileManifest.js +0 -11
- package/dist/lib/domain/knowledge/KnowledgeEntry.d.ts +10 -0
- package/dist/lib/domain/knowledge/KnowledgeEntry.js +2 -0
- package/dist/lib/domain/knowledge/Lifecycle.d.ts +19 -2
- package/dist/lib/domain/knowledge/Lifecycle.js +32 -6
- package/dist/lib/domain/knowledge/UnifiedValidator.d.ts +1 -5
- package/dist/lib/domain/knowledge/UnifiedValidator.js +7 -44
- package/dist/lib/domain/knowledge/values/Stats.d.ts +29 -0
- package/dist/lib/domain/knowledge/values/Stats.js +41 -0
- package/dist/lib/external/mcp/McpServer.d.ts +19 -38
- package/dist/lib/external/mcp/McpServer.js +145 -117
- package/dist/lib/external/mcp/autoApproveInjector.js +0 -2
- package/dist/lib/external/mcp/handlers/bootstrap/MissionBriefingBuilder.d.ts +26 -1
- package/dist/lib/external/mcp/handlers/bootstrap/MissionBriefingBuilder.js +41 -0
- package/dist/lib/external/mcp/handlers/bootstrap/pipeline/orchestrator.js +49 -0
- package/dist/lib/external/mcp/handlers/bootstrap/shared/bootstrap-phases.d.ts +3 -0
- package/dist/lib/external/mcp/handlers/bootstrap/shared/bootstrap-phases.js +27 -0
- package/dist/lib/external/mcp/handlers/bootstrap/skills.js +1 -1
- package/dist/lib/external/mcp/handlers/bootstrap-external.js +1 -0
- package/dist/lib/external/mcp/handlers/bootstrap-internal.js +2 -0
- package/dist/lib/external/mcp/handlers/browse.d.ts +1 -0
- package/dist/lib/external/mcp/handlers/browse.js +2 -1
- package/dist/lib/external/mcp/handlers/consolidated.d.ts +117 -6
- package/dist/lib/external/mcp/handlers/consolidated.js +251 -71
- package/dist/lib/external/mcp/handlers/guard.d.ts +150 -0
- package/dist/lib/external/mcp/handlers/guard.js +239 -5
- package/dist/lib/external/mcp/handlers/knowledge.d.ts +0 -29
- package/dist/lib/external/mcp/handlers/knowledge.js +1 -76
- package/dist/lib/external/mcp/handlers/panorama.d.ts +36 -0
- package/dist/lib/external/mcp/handlers/panorama.js +156 -0
- package/dist/lib/external/mcp/handlers/system.d.ts +2 -54
- package/dist/lib/external/mcp/handlers/system.js +3 -113
- package/dist/lib/external/mcp/handlers/task.d.ts +13 -24
- package/dist/lib/external/mcp/handlers/task.js +218 -557
- package/dist/lib/external/mcp/handlers/types.d.ts +91 -8
- package/dist/lib/external/mcp/handlers/types.js +18 -1
- package/dist/lib/external/mcp/handlers/wiki-external.d.ts +18 -1
- package/dist/lib/external/mcp/handlers/wiki-external.js +16 -1
- package/dist/lib/external/mcp/tools.d.ts +18 -24
- package/dist/lib/external/mcp/tools.js +132 -159
- package/dist/lib/http/HttpServer.js +52 -0
- package/dist/lib/http/middleware/validate.js +7 -3
- package/dist/lib/http/routes/audit.d.ts +8 -0
- package/dist/lib/http/routes/audit.js +51 -0
- package/dist/lib/http/routes/guardReport.d.ts +10 -0
- package/dist/lib/http/routes/guardReport.js +143 -0
- package/dist/lib/http/routes/knowledge.js +32 -1
- package/dist/lib/http/routes/panorama.d.ts +11 -0
- package/dist/lib/http/routes/panorama.js +322 -0
- package/dist/lib/http/routes/signals.d.ts +10 -0
- package/dist/lib/http/routes/signals.js +104 -0
- package/dist/lib/http/routes/task.d.ts +2 -3
- package/dist/lib/http/routes/task.js +17 -347
- package/dist/lib/http/routes/violations.js +1 -1
- package/dist/lib/infrastructure/audit/AuditLogger.d.ts +6 -1
- package/dist/lib/infrastructure/audit/AuditLogger.js +14 -1
- package/dist/lib/infrastructure/database/drizzle/schema.d.ts +202 -504
- package/dist/lib/infrastructure/database/drizzle/schema.js +38 -69
- package/dist/lib/infrastructure/database/migrations/004_evolution_proposals.d.ts +8 -0
- package/dist/lib/infrastructure/database/migrations/004_evolution_proposals.js +43 -0
- package/dist/lib/infrastructure/database/migrations/005_recipe_source_refs.d.ts +9 -0
- package/dist/lib/infrastructure/database/migrations/005_recipe_source_refs.js +24 -0
- package/dist/lib/infrastructure/logging/Logger.d.ts +2 -0
- package/dist/lib/infrastructure/logging/Logger.js +34 -7
- package/dist/lib/infrastructure/monitoring/ErrorTracker.js +3 -1
- package/dist/lib/infrastructure/monitoring/PerformanceMonitor.d.ts +2 -2
- package/dist/lib/infrastructure/monitoring/PerformanceMonitor.js +12 -10
- package/dist/lib/infrastructure/notification/LarkNotifier.d.ts +24 -0
- package/dist/lib/infrastructure/notification/LarkNotifier.js +97 -0
- package/dist/lib/infrastructure/report/ReportStore.d.ts +45 -0
- package/dist/lib/infrastructure/report/ReportStore.js +133 -0
- package/dist/lib/infrastructure/signal/SignalAggregator.d.ts +18 -0
- package/dist/lib/infrastructure/signal/SignalAggregator.js +84 -0
- package/dist/lib/infrastructure/signal/SignalBridge.d.ts +13 -0
- package/dist/lib/infrastructure/signal/SignalBridge.js +20 -0
- package/dist/lib/infrastructure/signal/SignalBus.d.ts +63 -0
- package/dist/lib/infrastructure/signal/SignalBus.js +106 -0
- package/dist/lib/infrastructure/signal/SignalTraceWriter.d.ts +36 -0
- package/dist/lib/infrastructure/signal/SignalTraceWriter.js +130 -0
- package/dist/lib/infrastructure/vector/HnswVectorAdapter.js +18 -2
- package/dist/lib/injection/ServiceContainer.js +8 -0
- package/dist/lib/injection/ServiceMap.d.ts +16 -10
- package/dist/lib/injection/modules/AgentModule.d.ts +1 -1
- package/dist/lib/injection/modules/AgentModule.js +7 -1
- package/dist/lib/injection/modules/AppModule.d.ts +1 -1
- package/dist/lib/injection/modules/AppModule.js +4 -13
- package/dist/lib/injection/modules/GuardModule.js +27 -2
- package/dist/lib/injection/modules/InfraModule.d.ts +0 -1
- package/dist/lib/injection/modules/InfraModule.js +9 -7
- package/dist/lib/injection/modules/KnowledgeModule.d.ts +5 -0
- package/dist/lib/injection/modules/KnowledgeModule.js +131 -0
- package/dist/lib/injection/modules/PanoramaModule.d.ts +18 -0
- package/dist/lib/injection/modules/PanoramaModule.js +76 -0
- package/dist/lib/injection/modules/SignalModule.d.ts +10 -0
- package/dist/lib/injection/modules/SignalModule.js +84 -0
- package/dist/lib/repository/knowledge/KnowledgeRepository.impl.d.ts +1 -0
- package/dist/lib/repository/knowledge/KnowledgeRepository.impl.js +6 -0
- package/dist/lib/service/bootstrap/BootstrapTaskManager.d.ts +3 -1
- package/dist/lib/service/bootstrap/BootstrapTaskManager.js +20 -1
- package/dist/lib/service/bootstrap/UiStartupTasks.d.ts +45 -0
- package/dist/lib/service/bootstrap/UiStartupTasks.js +101 -0
- package/dist/lib/service/delivery/AgentInstructionsGenerator.js +4 -5
- package/dist/lib/service/delivery/CursorDeliveryPipeline.d.ts +3 -1
- package/dist/lib/service/delivery/CursorDeliveryPipeline.js +13 -10
- package/dist/lib/service/delivery/RulesGenerator.js +3 -2
- package/dist/lib/service/evolution/ConsolidationAdvisor.d.ts +114 -0
- package/dist/lib/service/evolution/ConsolidationAdvisor.js +542 -0
- package/dist/lib/service/evolution/ContradictionDetector.d.ts +54 -0
- package/dist/lib/service/evolution/ContradictionDetector.js +253 -0
- package/dist/lib/service/evolution/DecayDetector.d.ts +71 -0
- package/dist/lib/service/evolution/DecayDetector.js +244 -0
- package/dist/lib/service/evolution/EnhancementSuggester.d.ts +38 -0
- package/dist/lib/service/evolution/EnhancementSuggester.js +220 -0
- package/dist/lib/service/evolution/KnowledgeMetabolism.d.ts +82 -0
- package/dist/lib/service/evolution/KnowledgeMetabolism.js +167 -0
- package/dist/lib/service/evolution/RedundancyAnalyzer.d.ts +53 -0
- package/dist/lib/service/evolution/RedundancyAnalyzer.js +210 -0
- package/dist/lib/service/evolution/StagingManager.d.ts +57 -0
- package/dist/lib/service/evolution/StagingManager.js +201 -0
- package/dist/lib/service/guard/ComplianceReporter.d.ts +42 -2
- package/dist/lib/service/guard/ComplianceReporter.js +43 -5
- package/dist/lib/service/guard/CoverageAnalyzer.d.ts +54 -0
- package/dist/lib/service/guard/CoverageAnalyzer.js +149 -0
- package/dist/lib/service/guard/GuardCheckEngine.d.ts +42 -0
- package/dist/lib/service/guard/GuardCheckEngine.js +465 -14
- package/dist/lib/service/guard/GuardFeedbackLoop.d.ts +3 -0
- package/dist/lib/service/guard/GuardFeedbackLoop.js +9 -0
- package/dist/lib/service/guard/ReverseGuard.d.ts +73 -0
- package/dist/lib/service/guard/ReverseGuard.js +256 -0
- package/dist/lib/service/guard/RuleLearner.d.ts +12 -0
- package/dist/lib/service/guard/RuleLearner.js +38 -0
- package/dist/lib/service/guard/UncertaintyCollector.d.ts +83 -0
- package/dist/lib/service/guard/UncertaintyCollector.js +149 -0
- package/dist/lib/service/guard/ViolationsStore.d.ts +1 -0
- package/dist/lib/service/guard/ViolationsStore.js +33 -3
- package/dist/lib/service/knowledge/ConfidenceRouter.d.ts +13 -0
- package/dist/lib/service/knowledge/ConfidenceRouter.js +14 -0
- package/dist/lib/service/knowledge/KnowledgeService.js +22 -4
- package/dist/lib/service/knowledge/SourceRefReconciler.d.ts +68 -0
- package/dist/lib/service/knowledge/SourceRefReconciler.js +309 -0
- package/dist/lib/service/panorama/CouplingAnalyzer.d.ts +27 -0
- package/dist/lib/service/panorama/CouplingAnalyzer.js +192 -0
- package/dist/lib/service/panorama/DimensionAnalyzer.d.ts +28 -0
- package/dist/lib/service/panorama/DimensionAnalyzer.js +320 -0
- package/dist/lib/service/panorama/LayerInferrer.d.ts +19 -0
- package/dist/lib/service/panorama/LayerInferrer.js +182 -0
- package/dist/lib/service/panorama/ModuleDiscoverer.d.ts +24 -0
- package/dist/lib/service/panorama/ModuleDiscoverer.js +185 -0
- package/dist/lib/service/panorama/PanoramaAggregator.d.ts +29 -0
- package/dist/lib/service/panorama/PanoramaAggregator.js +228 -0
- package/dist/lib/service/panorama/PanoramaScanner.d.ts +52 -0
- package/dist/lib/service/panorama/PanoramaScanner.js +188 -0
- package/dist/lib/service/panorama/PanoramaService.d.ts +125 -0
- package/dist/lib/service/panorama/PanoramaService.js +363 -0
- package/dist/lib/service/panorama/PanoramaTypes.d.ts +134 -0
- package/dist/lib/service/panorama/PanoramaTypes.js +6 -0
- package/dist/lib/service/panorama/RoleRefiner.d.ts +48 -0
- package/dist/lib/service/panorama/RoleRefiner.js +535 -0
- package/dist/lib/service/search/BM25Scorer.d.ts +2 -2
- package/dist/lib/service/search/CoarseRanker.d.ts +7 -6
- package/dist/lib/service/search/CoarseRanker.js +11 -10
- package/dist/lib/service/search/FieldWeightedScorer.d.ts +81 -0
- package/dist/lib/service/search/FieldWeightedScorer.js +318 -0
- package/dist/lib/service/search/MultiSignalRanker.d.ts +3 -2
- package/dist/lib/service/search/MultiSignalRanker.js +17 -1
- package/dist/lib/service/search/SearchEngine.d.ts +9 -7
- package/dist/lib/service/search/SearchEngine.js +67 -10
- package/dist/lib/service/search/SearchTypes.d.ts +25 -3
- package/dist/lib/service/search/SearchTypes.js +6 -1
- package/dist/lib/service/signal/HitRecorder.d.ts +68 -0
- package/dist/lib/service/signal/HitRecorder.js +173 -0
- package/dist/lib/service/skills/SignalCollector.d.ts +3 -1
- package/dist/lib/service/skills/SignalCollector.js +31 -1
- package/dist/lib/service/task/IntentExtractor.d.ts +66 -0
- package/dist/lib/service/task/IntentExtractor.js +256 -0
- package/dist/lib/service/task/PrimeSearchPipeline.d.ts +54 -0
- package/dist/lib/service/task/PrimeSearchPipeline.js +113 -0
- package/dist/lib/service/vector/VectorService.d.ts +3 -0
- package/dist/lib/service/vector/VectorService.js +38 -4
- package/dist/lib/shared/schemas/mcp-tools.d.ts +41 -96
- package/dist/lib/shared/schemas/mcp-tools.js +59 -119
- package/dist/scripts/analyze-signals.d.ts +20 -0
- package/dist/scripts/analyze-signals.js +155 -0
- package/dist/scripts/diagnose-mcp.js +1 -1
- package/package.json +1 -1
- package/skills/autosnippet-create/SKILL.md +98 -89
- package/skills/autosnippet-devdocs/SKILL.md +55 -57
- package/templates/claude-code/hooks/autosnippet-session.sh +10 -15
- package/templates/cursor-hooks/hooks/session-start.sh +1 -1
- package/templates/guard-ci.yml +2 -2
- package/templates/instructions/agent-static.md +2 -1
- package/templates/instructions/conventions.md +5 -6
- package/templates/recipes-setup/README.md +1 -2
- package/templates/recipes-setup/_template.md +39 -39
- package/dashboard/dist/assets/icons-BofcEZ3f.js +0 -1
- package/dashboard/dist/assets/index-D0whuycy.css +0 -1
- package/dashboard/dist/assets/index-SiN1GChm.js +0 -128
- package/dist/lib/domain/task/Task.d.ts +0 -140
- package/dist/lib/domain/task/Task.js +0 -254
- package/dist/lib/domain/task/TaskDependency.d.ts +0 -23
- package/dist/lib/domain/task/TaskDependency.js +0 -34
- package/dist/lib/domain/task/TaskIdGenerator.d.ts +0 -40
- package/dist/lib/domain/task/TaskIdGenerator.js +0 -75
- package/dist/lib/domain/task/index.d.ts +0 -4
- package/dist/lib/domain/task/index.js +0 -4
- package/dist/lib/infrastructure/database/migrations/002_add_tasks.d.ts +0 -11
- package/dist/lib/infrastructure/database/migrations/002_add_tasks.js +0 -86
- package/dist/lib/repository/task/TaskRepository.impl.d.ts +0 -171
- package/dist/lib/repository/task/TaskRepository.impl.js +0 -347
- package/dist/lib/service/task/TaskGraphService.d.ts +0 -222
- package/dist/lib/service/task/TaskGraphService.js +0 -597
- package/dist/lib/service/task/TaskKnowledgeBridge.d.ts +0 -95
- package/dist/lib/service/task/TaskKnowledgeBridge.js +0 -298
- package/dist/lib/service/task/TaskReadyEngine.d.ts +0 -84
- package/dist/lib/service/task/TaskReadyEngine.js +0 -115
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FieldWeightedScorer — 加权字段匹配评分器
|
|
3
|
+
*
|
|
4
|
+
* 替代 BM25Scorer 作为结构化知识库的默认搜索评分引擎。
|
|
5
|
+
*
|
|
6
|
+
* 设计动机:
|
|
7
|
+
* - BM25 将所有字段拼接为文本做统计评分,tokenize 去重导致 TF 恒为 1,BM25F boost 失效
|
|
8
|
+
* - 对于 ~50–500 条结构化知识条目,BM25 的大规模语料假设不成立
|
|
9
|
+
* - FieldWeightedScorer 对每个字段独立打分并加权合并,精确匹配 > token 重叠 > IDF 加权
|
|
10
|
+
*
|
|
11
|
+
* 字段权重:
|
|
12
|
+
* trigger (5.0) > title (3.0) > tags (2.0) > description (1.5) > content (1.0) > facets (0.5)
|
|
13
|
+
*
|
|
14
|
+
* @module FieldWeightedScorer
|
|
15
|
+
*/
|
|
16
|
+
import type { BM25SearchResult, Scorer } from './SearchTypes.js';
|
|
17
|
+
/** 字段加权文档内部表示 */
|
|
18
|
+
interface FieldWeightedDocument {
|
|
19
|
+
id: string;
|
|
20
|
+
fields: {
|
|
21
|
+
trigger: string;
|
|
22
|
+
title: string;
|
|
23
|
+
description: string;
|
|
24
|
+
tags: string[];
|
|
25
|
+
language: string;
|
|
26
|
+
category: string;
|
|
27
|
+
knowledgeType: string;
|
|
28
|
+
};
|
|
29
|
+
tokenizedFields: {
|
|
30
|
+
trigger: string[];
|
|
31
|
+
title: string[];
|
|
32
|
+
description: string[];
|
|
33
|
+
content: string[];
|
|
34
|
+
allUnique: Set<string>;
|
|
35
|
+
};
|
|
36
|
+
meta: Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* FieldWeightedScorer — 加权字段匹配评分器
|
|
40
|
+
*
|
|
41
|
+
* 接口与 BM25Scorer 完全兼容(实现 Scorer 接口),可作为 drop-in 替换。
|
|
42
|
+
*/
|
|
43
|
+
export declare class FieldWeightedScorer implements Scorer {
|
|
44
|
+
avgLength: number;
|
|
45
|
+
docFreq: Record<string, number>;
|
|
46
|
+
documents: (FieldWeightedDocument | null)[];
|
|
47
|
+
totalDocs: number;
|
|
48
|
+
_idIndex: Map<string, number>;
|
|
49
|
+
_totalLength: number;
|
|
50
|
+
constructor();
|
|
51
|
+
/** 添加文档到索引 */
|
|
52
|
+
addDocument(id: string, text: string, meta?: Record<string, unknown>): void;
|
|
53
|
+
/**
|
|
54
|
+
* 移除文档(tombstone + 懒压缩)
|
|
55
|
+
* @returns 是否成功移除
|
|
56
|
+
*/
|
|
57
|
+
removeDocument(id: string): boolean;
|
|
58
|
+
/** 更新文档(remove + add) */
|
|
59
|
+
updateDocument(id: string, text: string, meta?: Record<string, unknown>): void;
|
|
60
|
+
/** 检查文档是否存在 */
|
|
61
|
+
hasDocument(id: string): boolean;
|
|
62
|
+
/** 清空索引 */
|
|
63
|
+
clear(): void;
|
|
64
|
+
/** 压缩 documents 数组,清除 tombstone 空洞 */
|
|
65
|
+
_compact(): void;
|
|
66
|
+
/** 搜索:对每个文档按字段加权评分,返回降序结果 */
|
|
67
|
+
search(query: string, limit?: number): BM25SearchResult[];
|
|
68
|
+
/** 字符串级别匹配评分(用于 trigger / title) */
|
|
69
|
+
_stringMatchScore(query: string, field: string): number;
|
|
70
|
+
/** Token 集合重叠率(查询侧召回) */
|
|
71
|
+
_tokenOverlap(queryTokens: string[], fieldTokens: string[]): number;
|
|
72
|
+
/** IDF 加权 token overlap(用于长文本字段) */
|
|
73
|
+
_idfWeightedOverlap(queryTokens: string[], fieldTokens: string[]): number;
|
|
74
|
+
/** Tag 匹配评分 */
|
|
75
|
+
_tagScore(queryTokens: string[], tags: string[]): number;
|
|
76
|
+
/** Facet 匹配评分(language / category / knowledgeType) */
|
|
77
|
+
_facetScore(queryTokens: string[], fields: FieldWeightedDocument['fields']): number;
|
|
78
|
+
/** 计算 IDF(平滑,始终为正) */
|
|
79
|
+
_idf(token: string): number;
|
|
80
|
+
}
|
|
81
|
+
export {};
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FieldWeightedScorer — 加权字段匹配评分器
|
|
3
|
+
*
|
|
4
|
+
* 替代 BM25Scorer 作为结构化知识库的默认搜索评分引擎。
|
|
5
|
+
*
|
|
6
|
+
* 设计动机:
|
|
7
|
+
* - BM25 将所有字段拼接为文本做统计评分,tokenize 去重导致 TF 恒为 1,BM25F boost 失效
|
|
8
|
+
* - 对于 ~50–500 条结构化知识条目,BM25 的大规模语料假设不成立
|
|
9
|
+
* - FieldWeightedScorer 对每个字段独立打分并加权合并,精确匹配 > token 重叠 > IDF 加权
|
|
10
|
+
*
|
|
11
|
+
* 字段权重:
|
|
12
|
+
* trigger (5.0) > title (3.0) > tags (2.0) > description (1.5) > content (1.0) > facets (0.5)
|
|
13
|
+
*
|
|
14
|
+
* @module FieldWeightedScorer
|
|
15
|
+
*/
|
|
16
|
+
import { tokenize } from './tokenizer.js';
|
|
17
|
+
// ── 字段权重常量(可调) ──
|
|
18
|
+
const TRIGGER_WEIGHT = 5.0;
|
|
19
|
+
const TITLE_WEIGHT = 3.0;
|
|
20
|
+
const TAG_WEIGHT = 2.0;
|
|
21
|
+
const DESCRIPTION_WEIGHT = 1.5;
|
|
22
|
+
const CONTENT_WEIGHT = 1.0;
|
|
23
|
+
const FACET_WEIGHT = 0.5;
|
|
24
|
+
/**
|
|
25
|
+
* FieldWeightedScorer — 加权字段匹配评分器
|
|
26
|
+
*
|
|
27
|
+
* 接口与 BM25Scorer 完全兼容(实现 Scorer 接口),可作为 drop-in 替换。
|
|
28
|
+
*/
|
|
29
|
+
export class FieldWeightedScorer {
|
|
30
|
+
avgLength;
|
|
31
|
+
docFreq;
|
|
32
|
+
documents;
|
|
33
|
+
totalDocs;
|
|
34
|
+
_idIndex;
|
|
35
|
+
_totalLength;
|
|
36
|
+
constructor() {
|
|
37
|
+
this.documents = [];
|
|
38
|
+
this.totalDocs = 0;
|
|
39
|
+
this.docFreq = {};
|
|
40
|
+
this._idIndex = new Map();
|
|
41
|
+
this._totalLength = 0;
|
|
42
|
+
this.avgLength = 0;
|
|
43
|
+
}
|
|
44
|
+
/** 添加文档到索引 */
|
|
45
|
+
addDocument(id, text, meta = {}) {
|
|
46
|
+
if (this._idIndex.has(id)) {
|
|
47
|
+
this.removeDocument(id);
|
|
48
|
+
}
|
|
49
|
+
// 从 meta 提取结构化字段
|
|
50
|
+
const trigger = meta.trigger || '';
|
|
51
|
+
const title = meta.title || '';
|
|
52
|
+
const description = meta.description || '';
|
|
53
|
+
const tags = Array.isArray(meta.tags) ? meta.tags : [];
|
|
54
|
+
const language = meta.language || '';
|
|
55
|
+
const category = meta.category || '';
|
|
56
|
+
const knowledgeType = meta.knowledgeType || '';
|
|
57
|
+
const contentText = meta.contentText || '';
|
|
58
|
+
// 独立分词每个字段
|
|
59
|
+
const triggerTokens = tokenize(trigger);
|
|
60
|
+
const titleTokens = tokenize(title);
|
|
61
|
+
const descTokens = tokenize(description);
|
|
62
|
+
// contentText 优先;若 meta 无 contentText 则用拼接文本 text 作为回退
|
|
63
|
+
const contentTokens = tokenize(contentText || text);
|
|
64
|
+
// 合并所有唯一 token 用于 DF 计算
|
|
65
|
+
const allUnique = new Set();
|
|
66
|
+
for (const t of triggerTokens) {
|
|
67
|
+
allUnique.add(t);
|
|
68
|
+
}
|
|
69
|
+
for (const t of titleTokens) {
|
|
70
|
+
allUnique.add(t);
|
|
71
|
+
}
|
|
72
|
+
for (const t of descTokens) {
|
|
73
|
+
allUnique.add(t);
|
|
74
|
+
}
|
|
75
|
+
for (const t of contentTokens) {
|
|
76
|
+
allUnique.add(t);
|
|
77
|
+
}
|
|
78
|
+
for (const tag of tags) {
|
|
79
|
+
for (const t of tokenize(tag)) {
|
|
80
|
+
allUnique.add(t);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const doc = {
|
|
84
|
+
id,
|
|
85
|
+
fields: { trigger, title, description, tags, language, category, knowledgeType },
|
|
86
|
+
tokenizedFields: {
|
|
87
|
+
trigger: triggerTokens,
|
|
88
|
+
title: titleTokens,
|
|
89
|
+
description: descTokens,
|
|
90
|
+
content: contentTokens,
|
|
91
|
+
allUnique,
|
|
92
|
+
},
|
|
93
|
+
meta,
|
|
94
|
+
};
|
|
95
|
+
const idx = this.documents.length;
|
|
96
|
+
this.documents.push(doc);
|
|
97
|
+
this._idIndex.set(id, idx);
|
|
98
|
+
for (const token of allUnique) {
|
|
99
|
+
this.docFreq[token] = (this.docFreq[token] || 0) + 1;
|
|
100
|
+
}
|
|
101
|
+
this.totalDocs = this._idIndex.size;
|
|
102
|
+
this._totalLength += allUnique.size;
|
|
103
|
+
this.avgLength = this.totalDocs > 0 ? this._totalLength / this.totalDocs : 0;
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* 移除文档(tombstone + 懒压缩)
|
|
107
|
+
* @returns 是否成功移除
|
|
108
|
+
*/
|
|
109
|
+
removeDocument(id) {
|
|
110
|
+
const idx = this._idIndex.get(id);
|
|
111
|
+
if (idx === undefined) {
|
|
112
|
+
return false;
|
|
113
|
+
}
|
|
114
|
+
const doc = this.documents[idx];
|
|
115
|
+
if (!doc) {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
for (const token of doc.tokenizedFields.allUnique) {
|
|
119
|
+
if (this.docFreq[token]) {
|
|
120
|
+
this.docFreq[token]--;
|
|
121
|
+
if (this.docFreq[token] <= 0) {
|
|
122
|
+
delete this.docFreq[token];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
this._totalLength -= doc.tokenizedFields.allUnique.size;
|
|
127
|
+
this.documents[idx] = null;
|
|
128
|
+
this._idIndex.delete(id);
|
|
129
|
+
this.totalDocs = this._idIndex.size;
|
|
130
|
+
this.avgLength = this.totalDocs > 0 ? this._totalLength / this.totalDocs : 0;
|
|
131
|
+
const nullCount = this.documents.length - this.totalDocs;
|
|
132
|
+
if (this.documents.length > 100 && nullCount / this.documents.length > 0.3) {
|
|
133
|
+
this._compact();
|
|
134
|
+
}
|
|
135
|
+
return true;
|
|
136
|
+
}
|
|
137
|
+
/** 更新文档(remove + add) */
|
|
138
|
+
updateDocument(id, text, meta = {}) {
|
|
139
|
+
this.removeDocument(id);
|
|
140
|
+
this.addDocument(id, text, meta);
|
|
141
|
+
}
|
|
142
|
+
/** 检查文档是否存在 */
|
|
143
|
+
hasDocument(id) {
|
|
144
|
+
return this._idIndex.has(id);
|
|
145
|
+
}
|
|
146
|
+
/** 清空索引 */
|
|
147
|
+
clear() {
|
|
148
|
+
this.documents = [];
|
|
149
|
+
this.docFreq = {};
|
|
150
|
+
this.totalDocs = 0;
|
|
151
|
+
this._totalLength = 0;
|
|
152
|
+
this.avgLength = 0;
|
|
153
|
+
this._idIndex.clear();
|
|
154
|
+
}
|
|
155
|
+
/** 压缩 documents 数组,清除 tombstone 空洞 */
|
|
156
|
+
_compact() {
|
|
157
|
+
const alive = this.documents.filter((d) => d !== null);
|
|
158
|
+
this.documents = alive;
|
|
159
|
+
this._idIndex.clear();
|
|
160
|
+
for (let i = 0; i < alive.length; i++) {
|
|
161
|
+
this._idIndex.set(alive[i].id, i);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/** 搜索:对每个文档按字段加权评分,返回降序结果 */
|
|
165
|
+
search(query, limit = 20) {
|
|
166
|
+
const queryTokens = tokenize(query);
|
|
167
|
+
if (queryTokens.length === 0) {
|
|
168
|
+
return [];
|
|
169
|
+
}
|
|
170
|
+
const scores = [];
|
|
171
|
+
for (const doc of this.documents) {
|
|
172
|
+
if (!doc) {
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
let totalScore = 0;
|
|
176
|
+
// 1. Trigger 评分 — 最高权重,精确标识
|
|
177
|
+
const triggerString = this._stringMatchScore(query, doc.fields.trigger);
|
|
178
|
+
const triggerToken = this._tokenOverlap(queryTokens, doc.tokenizedFields.trigger);
|
|
179
|
+
totalScore += TRIGGER_WEIGHT * Math.max(triggerString, triggerToken);
|
|
180
|
+
// 2. Title 评分 — 主要描述性字段
|
|
181
|
+
const titleString = this._stringMatchScore(query, doc.fields.title);
|
|
182
|
+
const titleToken = this._tokenOverlap(queryTokens, doc.tokenizedFields.title);
|
|
183
|
+
totalScore += TITLE_WEIGHT * Math.max(titleString, titleToken);
|
|
184
|
+
// 3. Tags 评分 — 分类标记
|
|
185
|
+
totalScore += TAG_WEIGHT * this._tagScore(queryTokens, doc.fields.tags);
|
|
186
|
+
// 4. Description 评分 — IDF 加权 token overlap
|
|
187
|
+
totalScore +=
|
|
188
|
+
DESCRIPTION_WEIGHT * this._idfWeightedOverlap(queryTokens, doc.tokenizedFields.description);
|
|
189
|
+
// 5. Content 评分 — IDF 加权 token overlap
|
|
190
|
+
totalScore +=
|
|
191
|
+
CONTENT_WEIGHT * this._idfWeightedOverlap(queryTokens, doc.tokenizedFields.content);
|
|
192
|
+
// 6. Facet 评分 — language/category/knowledgeType 精确匹配
|
|
193
|
+
totalScore += FACET_WEIGHT * this._facetScore(queryTokens, doc.fields);
|
|
194
|
+
if (totalScore > 0) {
|
|
195
|
+
scores.push({ id: doc.id, score: totalScore, meta: doc.meta });
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
scores.sort((a, b) => b.score - a.score);
|
|
199
|
+
return scores.slice(0, limit);
|
|
200
|
+
}
|
|
201
|
+
// ── 内部评分方法 ──
|
|
202
|
+
/** 字符串级别匹配评分(用于 trigger / title) */
|
|
203
|
+
_stringMatchScore(query, field) {
|
|
204
|
+
if (!field) {
|
|
205
|
+
return 0;
|
|
206
|
+
}
|
|
207
|
+
const q = query.toLowerCase();
|
|
208
|
+
const f = field.toLowerCase();
|
|
209
|
+
if (f === q) {
|
|
210
|
+
return 1.0;
|
|
211
|
+
}
|
|
212
|
+
if (f.startsWith(q)) {
|
|
213
|
+
return 0.7;
|
|
214
|
+
}
|
|
215
|
+
if (f.includes(q)) {
|
|
216
|
+
return 0.5;
|
|
217
|
+
}
|
|
218
|
+
if (q.includes(f) && f.length > 3) {
|
|
219
|
+
return 0.3;
|
|
220
|
+
}
|
|
221
|
+
return 0;
|
|
222
|
+
}
|
|
223
|
+
/** Token 集合重叠率(查询侧召回) */
|
|
224
|
+
_tokenOverlap(queryTokens, fieldTokens) {
|
|
225
|
+
if (queryTokens.length === 0) {
|
|
226
|
+
return 0;
|
|
227
|
+
}
|
|
228
|
+
const fieldSet = new Set(fieldTokens);
|
|
229
|
+
let matched = 0;
|
|
230
|
+
for (const qt of queryTokens) {
|
|
231
|
+
if (fieldSet.has(qt)) {
|
|
232
|
+
matched++;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return matched / queryTokens.length;
|
|
236
|
+
}
|
|
237
|
+
/** IDF 加权 token overlap(用于长文本字段) */
|
|
238
|
+
_idfWeightedOverlap(queryTokens, fieldTokens) {
|
|
239
|
+
if (queryTokens.length === 0) {
|
|
240
|
+
return 0;
|
|
241
|
+
}
|
|
242
|
+
const fieldSet = new Set(fieldTokens);
|
|
243
|
+
let matchedIdf = 0;
|
|
244
|
+
let totalIdf = 0;
|
|
245
|
+
for (const qt of queryTokens) {
|
|
246
|
+
const idf = this._idf(qt);
|
|
247
|
+
totalIdf += idf;
|
|
248
|
+
if (fieldSet.has(qt)) {
|
|
249
|
+
matchedIdf += idf;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return totalIdf > 0 ? matchedIdf / totalIdf : 0;
|
|
253
|
+
}
|
|
254
|
+
/** Tag 匹配评分 */
|
|
255
|
+
_tagScore(queryTokens, tags) {
|
|
256
|
+
if (tags.length === 0 || queryTokens.length === 0) {
|
|
257
|
+
return 0;
|
|
258
|
+
}
|
|
259
|
+
let score = 0;
|
|
260
|
+
const qtSet = new Set(queryTokens);
|
|
261
|
+
for (const tag of tags) {
|
|
262
|
+
const lowTag = tag.toLowerCase();
|
|
263
|
+
// 精确 token 匹配
|
|
264
|
+
if (qtSet.has(lowTag)) {
|
|
265
|
+
score += 1.0;
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
// 部分匹配:query token 包含 tag 或 tag 包含 query token
|
|
269
|
+
let partialFound = false;
|
|
270
|
+
for (const qt of queryTokens) {
|
|
271
|
+
if (lowTag.includes(qt) || qt.includes(lowTag)) {
|
|
272
|
+
score += 0.5;
|
|
273
|
+
partialFound = true;
|
|
274
|
+
break;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (!partialFound) {
|
|
278
|
+
// 对 tag 分词再匹配
|
|
279
|
+
const tagTokens = tokenize(tag);
|
|
280
|
+
for (const tt of tagTokens) {
|
|
281
|
+
if (qtSet.has(tt)) {
|
|
282
|
+
score += 0.3;
|
|
283
|
+
break;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
return Math.min(score / queryTokens.length, 1.0);
|
|
289
|
+
}
|
|
290
|
+
/** Facet 匹配评分(language / category / knowledgeType) */
|
|
291
|
+
_facetScore(queryTokens, fields) {
|
|
292
|
+
const facets = [fields.language, fields.category, fields.knowledgeType].filter(Boolean);
|
|
293
|
+
if (facets.length === 0) {
|
|
294
|
+
return 0;
|
|
295
|
+
}
|
|
296
|
+
let matched = 0;
|
|
297
|
+
const qtSet = new Set(queryTokens);
|
|
298
|
+
for (const facet of facets) {
|
|
299
|
+
const lower = facet.toLowerCase();
|
|
300
|
+
if (qtSet.has(lower)) {
|
|
301
|
+
matched++;
|
|
302
|
+
continue;
|
|
303
|
+
}
|
|
304
|
+
for (const ft of tokenize(facet)) {
|
|
305
|
+
if (qtSet.has(ft)) {
|
|
306
|
+
matched++;
|
|
307
|
+
break;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return matched / facets.length;
|
|
312
|
+
}
|
|
313
|
+
/** 计算 IDF(平滑,始终为正) */
|
|
314
|
+
_idf(token) {
|
|
315
|
+
const df = this.docFreq[token] || 0;
|
|
316
|
+
return Math.log2(1 + this.totalDocs / (df + 1));
|
|
317
|
+
}
|
|
318
|
+
}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* 不同场景使用不同权重配置(向后兼容旧配置中的 'seasonality' 键)
|
|
5
5
|
*/
|
|
6
6
|
interface SignalCandidate {
|
|
7
|
-
|
|
7
|
+
recallScore?: number;
|
|
8
8
|
score?: number;
|
|
9
9
|
title?: string;
|
|
10
10
|
trigger?: string;
|
|
@@ -74,6 +74,7 @@ export declare class MultiSignalRanker {
|
|
|
74
74
|
#private;
|
|
75
75
|
constructor(options?: {
|
|
76
76
|
scenarioWeights?: Record<string, Record<string, number>>;
|
|
77
|
+
signalBus?: import('../../infrastructure/signal/SignalBus.js').SignalBus;
|
|
77
78
|
});
|
|
78
79
|
/**
|
|
79
80
|
* 对候选列表进行多信号加权排序
|
|
@@ -83,7 +84,7 @@ export declare class MultiSignalRanker {
|
|
|
83
84
|
rank(candidates: SignalCandidate[], context?: SignalContext): {
|
|
84
85
|
rankerScore: number;
|
|
85
86
|
signals: Record<string, number>;
|
|
86
|
-
|
|
87
|
+
recallScore?: number;
|
|
87
88
|
score?: number;
|
|
88
89
|
title?: string;
|
|
89
90
|
trigger?: string;
|
|
@@ -54,7 +54,7 @@ const SCENARIO_WEIGHTS = {
|
|
|
54
54
|
/** 相关性信号 — BM25 + 标题匹配 + 内容匹配 */
|
|
55
55
|
export class RelevanceSignal {
|
|
56
56
|
compute(candidate, context) {
|
|
57
|
-
let score = candidate.
|
|
57
|
+
let score = candidate.recallScore || candidate.score || 0;
|
|
58
58
|
const query = (context.query || '').toLowerCase();
|
|
59
59
|
if (!query) {
|
|
60
60
|
return Math.min(score, 1.0);
|
|
@@ -209,6 +209,8 @@ export class VectorSignal {
|
|
|
209
209
|
export class MultiSignalRanker {
|
|
210
210
|
#signals;
|
|
211
211
|
#scenarioWeights;
|
|
212
|
+
#realtimeWeights = new Map();
|
|
213
|
+
#recentlyUsed = new Set();
|
|
212
214
|
constructor(options = {}) {
|
|
213
215
|
this.#signals = {
|
|
214
216
|
relevance: new RelevanceSignal(),
|
|
@@ -230,6 +232,20 @@ export class MultiSignalRanker {
|
|
|
230
232
|
}
|
|
231
233
|
}
|
|
232
234
|
this.#scenarioWeights = { ...SCENARIO_WEIGHTS, ...remapped };
|
|
235
|
+
// Phase 2: 订阅实时信号更新权重
|
|
236
|
+
if (options.signalBus) {
|
|
237
|
+
options.signalBus.subscribe('quality|usage', (signal) => {
|
|
238
|
+
this.#onSignal(signal);
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
#onSignal(signal) {
|
|
243
|
+
if (signal.type === 'quality' && signal.target) {
|
|
244
|
+
this.#realtimeWeights.set(signal.target, signal.value);
|
|
245
|
+
}
|
|
246
|
+
if (signal.type === 'usage' && signal.target) {
|
|
247
|
+
this.#recentlyUsed.add(signal.target);
|
|
248
|
+
}
|
|
233
249
|
}
|
|
234
250
|
/**
|
|
235
251
|
* 对候选列表进行多信号加权排序
|
|
@@ -5,17 +5,17 @@
|
|
|
5
5
|
* 从 V1 SearchServiceV2 迁移,适配 V2 架构
|
|
6
6
|
*/
|
|
7
7
|
import Logger from '../../infrastructure/logging/Logger.js';
|
|
8
|
-
import { BM25Scorer } from './BM25Scorer.js';
|
|
9
8
|
import { CoarseRanker } from './CoarseRanker.js';
|
|
10
9
|
import { MultiSignalRanker } from './MultiSignalRanker.js';
|
|
11
|
-
import type { DbRow, RankingContext, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore } from './SearchTypes.js';
|
|
10
|
+
import type { DbRow, RankingContext, Scorer, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore } from './SearchTypes.js';
|
|
12
11
|
export { BM25Scorer } from './BM25Scorer.js';
|
|
13
|
-
export
|
|
12
|
+
export { FieldWeightedScorer } from './FieldWeightedScorer.js';
|
|
13
|
+
export type { BM25DocMeta, BM25SearchResult, DbRow, RankingContext, RrfHit, Scorer, SearchAiProvider, SearchCrossEncoder, SearchDb, SearchEngineOptions, SearchHybridRetriever, SearchOptions, SearchResponse, SearchResultItem, SearchVectorService, SearchVectorStore, SlimSearchResult, VectorHit, } from './SearchTypes.js';
|
|
14
14
|
export { groupByKind, slimSearchResult } from './SearchTypes.js';
|
|
15
15
|
export { tokenize } from './tokenizer.js';
|
|
16
16
|
/**
|
|
17
17
|
* SearchEngine - 完整搜索服务
|
|
18
|
-
*
|
|
18
|
+
* 整合召回评分 + 关键词 + 可选 AI 增强
|
|
19
19
|
*/
|
|
20
20
|
export declare class SearchEngine {
|
|
21
21
|
_cache: Map<string, {
|
|
@@ -25,16 +25,17 @@ export declare class SearchEngine {
|
|
|
25
25
|
_cacheMaxAge: number;
|
|
26
26
|
_coarseRanker: CoarseRanker;
|
|
27
27
|
_crossEncoder: SearchCrossEncoder | null;
|
|
28
|
-
|
|
28
|
+
_fusionRecallWeight: number;
|
|
29
29
|
_fusionSemanticWeight: number;
|
|
30
30
|
_indexed: boolean;
|
|
31
31
|
_lastIndexTime: string | null;
|
|
32
32
|
_multiSignalRanker: MultiSignalRanker;
|
|
33
|
+
_signalBus: import('../../infrastructure/signal/SignalBus.js').SignalBus | null;
|
|
33
34
|
aiProvider: SearchAiProvider | null;
|
|
34
35
|
db: SearchDb;
|
|
35
36
|
hybridRetriever: SearchHybridRetriever | null;
|
|
36
37
|
logger: ReturnType<typeof Logger.getInstance>;
|
|
37
|
-
scorer:
|
|
38
|
+
scorer: Scorer;
|
|
38
39
|
vectorService: SearchVectorService | null;
|
|
39
40
|
vectorStore: SearchVectorStore | null;
|
|
40
41
|
constructor(db: SearchDb & {
|
|
@@ -75,7 +76,6 @@ export declare class SearchEngine {
|
|
|
75
76
|
headers?: string;
|
|
76
77
|
moduleName?: string;
|
|
77
78
|
knowledgeType?: string;
|
|
78
|
-
bm25Score?: number;
|
|
79
79
|
qualityScore?: number;
|
|
80
80
|
usageCount?: number;
|
|
81
81
|
authorityScore?: number;
|
|
@@ -150,6 +150,8 @@ export declare class SearchEngine {
|
|
|
150
150
|
type: string;
|
|
151
151
|
title: string | undefined;
|
|
152
152
|
trigger: string;
|
|
153
|
+
description: string;
|
|
154
|
+
contentText: string;
|
|
153
155
|
status: string | undefined;
|
|
154
156
|
knowledgeType: string | undefined;
|
|
155
157
|
kind: string;
|
|
@@ -5,28 +5,30 @@
|
|
|
5
5
|
* 从 V1 SearchServiceV2 迁移,适配 V2 架构
|
|
6
6
|
*/
|
|
7
7
|
import Logger from '../../infrastructure/logging/Logger.js';
|
|
8
|
-
import { BM25Scorer } from './BM25Scorer.js';
|
|
9
8
|
import { CoarseRanker } from './CoarseRanker.js';
|
|
10
9
|
import { contextBoost } from './contextBoost.js';
|
|
10
|
+
import { FieldWeightedScorer } from './FieldWeightedScorer.js';
|
|
11
11
|
import { MultiSignalRanker } from './MultiSignalRanker.js';
|
|
12
12
|
// ── Re-exports for backward compatibility ──
|
|
13
13
|
export { BM25Scorer } from './BM25Scorer.js';
|
|
14
|
+
export { FieldWeightedScorer } from './FieldWeightedScorer.js';
|
|
14
15
|
export { groupByKind, slimSearchResult } from './SearchTypes.js';
|
|
15
16
|
export { tokenize } from './tokenizer.js';
|
|
16
17
|
/**
|
|
17
18
|
* SearchEngine - 完整搜索服务
|
|
18
|
-
*
|
|
19
|
+
* 整合召回评分 + 关键词 + 可选 AI 增强
|
|
19
20
|
*/
|
|
20
21
|
export class SearchEngine {
|
|
21
22
|
_cache;
|
|
22
23
|
_cacheMaxAge;
|
|
23
24
|
_coarseRanker;
|
|
24
25
|
_crossEncoder;
|
|
25
|
-
|
|
26
|
+
_fusionRecallWeight;
|
|
26
27
|
_fusionSemanticWeight;
|
|
27
28
|
_indexed;
|
|
28
29
|
_lastIndexTime = null;
|
|
29
30
|
_multiSignalRanker;
|
|
31
|
+
_signalBus;
|
|
30
32
|
aiProvider;
|
|
31
33
|
db;
|
|
32
34
|
hybridRetriever;
|
|
@@ -41,16 +43,17 @@ export class SearchEngine {
|
|
|
41
43
|
this.vectorStore = options.vectorStore || null;
|
|
42
44
|
this.vectorService = options.vectorService || null;
|
|
43
45
|
this.hybridRetriever = options.hybridRetriever || null;
|
|
44
|
-
this.scorer = new
|
|
46
|
+
this.scorer = new FieldWeightedScorer();
|
|
45
47
|
this._coarseRanker = new CoarseRanker(options);
|
|
46
48
|
this._multiSignalRanker = new MultiSignalRanker(options);
|
|
47
49
|
this._crossEncoder = options.crossEncoderReranker || null;
|
|
48
50
|
this._indexed = false;
|
|
49
51
|
this._cache = new Map();
|
|
50
52
|
this._cacheMaxAge = options.cacheMaxAge || 300_000; // 5min
|
|
51
|
-
// auto 模式
|
|
52
|
-
this.
|
|
53
|
+
// auto 模式 召回+semantic 融合权重(可配置)
|
|
54
|
+
this._fusionRecallWeight = options.fusionRecallWeight ?? 0.6;
|
|
53
55
|
this._fusionSemanticWeight = options.fusionSemanticWeight ?? 0.4;
|
|
56
|
+
this._signalBus = options.signalBus || null;
|
|
54
57
|
}
|
|
55
58
|
/** 构建搜索索引 - 从数据库加载所有可搜索实体 */
|
|
56
59
|
buildIndex() {
|
|
@@ -212,12 +215,19 @@ export class SearchEngine {
|
|
|
212
215
|
response.byKind = { rule: [], pattern: [], fact: [] };
|
|
213
216
|
for (const r of results) {
|
|
214
217
|
const kind = r.kind || 'pattern';
|
|
215
|
-
|
|
218
|
+
const bucket = response.byKind[kind] ?? response.byKind.pattern;
|
|
219
|
+
bucket.push(r);
|
|
216
220
|
}
|
|
217
221
|
}
|
|
218
222
|
if (cacheKey) {
|
|
219
223
|
this._setCache(cacheKey, response);
|
|
220
224
|
}
|
|
225
|
+
// ── Signal emission ──
|
|
226
|
+
if (this._signalBus && response.total > 0) {
|
|
227
|
+
this._signalBus.send('search', 'SearchEngine', Math.min(response.total / limit, 1), {
|
|
228
|
+
metadata: { query, mode: actualMode, total: response.total },
|
|
229
|
+
});
|
|
230
|
+
}
|
|
221
231
|
return response;
|
|
222
232
|
}
|
|
223
233
|
// ── Ranking Pipeline ────────────────────────────────────────────
|
|
@@ -246,8 +256,8 @@ export class SearchEngine {
|
|
|
246
256
|
}
|
|
247
257
|
return ranked.map((r) => ({
|
|
248
258
|
...r,
|
|
249
|
-
recallScore: r.
|
|
250
|
-
score: r.contextScore || r.rankerScore || r.coarseScore || r.
|
|
259
|
+
recallScore: r.recallScore || 0,
|
|
260
|
+
score: r.contextScore || r.rankerScore || r.coarseScore || r.recallScore || 0,
|
|
251
261
|
}));
|
|
252
262
|
}
|
|
253
263
|
/**
|
|
@@ -278,7 +288,7 @@ export class SearchEngine {
|
|
|
278
288
|
return {
|
|
279
289
|
...item,
|
|
280
290
|
code: codeText || item.code || '',
|
|
281
|
-
|
|
291
|
+
recallScore: item.score || 0,
|
|
282
292
|
qualityScore: item.qualityScore || (item.status === 'active' ? 70 : 40),
|
|
283
293
|
usageCount: item.usageCount || 0,
|
|
284
294
|
authorityScore: item.authorityScore || 0,
|
|
@@ -625,6 +635,40 @@ export class SearchEngine {
|
|
|
625
635
|
catch {
|
|
626
636
|
/* DB may not be available */
|
|
627
637
|
}
|
|
638
|
+
// ── 从 recipe_source_refs 桥接表批量读取已验证的 sourceRefs ──
|
|
639
|
+
try {
|
|
640
|
+
const ids = items.map((it) => it.id);
|
|
641
|
+
if (ids.length === 0) {
|
|
642
|
+
return;
|
|
643
|
+
}
|
|
644
|
+
const placeholders = ids.map(() => '?').join(',');
|
|
645
|
+
const refsRows = this.db
|
|
646
|
+
.prepare(`SELECT recipe_id, source_path, status, new_path
|
|
647
|
+
FROM recipe_source_refs
|
|
648
|
+
WHERE recipe_id IN (${placeholders}) AND status != 'stale'`)
|
|
649
|
+
.all(...ids);
|
|
650
|
+
this.logger.debug('recipe_source_refs query', {
|
|
651
|
+
idCount: ids.length,
|
|
652
|
+
rowCount: refsRows.length,
|
|
653
|
+
});
|
|
654
|
+
const refsMap = new Map();
|
|
655
|
+
for (const row of refsRows) {
|
|
656
|
+
const refPath = row.status === 'renamed' && row.new_path ? row.new_path : row.source_path;
|
|
657
|
+
if (!refsMap.has(row.recipe_id)) {
|
|
658
|
+
refsMap.set(row.recipe_id, []);
|
|
659
|
+
}
|
|
660
|
+
refsMap.get(row.recipe_id)?.push(refPath);
|
|
661
|
+
}
|
|
662
|
+
for (const item of items) {
|
|
663
|
+
const refs = refsMap.get(item.id);
|
|
664
|
+
if (refs && refs.length > 0) {
|
|
665
|
+
item.sourceRefs = refs;
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
catch {
|
|
670
|
+
/* recipe_source_refs table may not exist */
|
|
671
|
+
}
|
|
628
672
|
}
|
|
629
673
|
/**
|
|
630
674
|
* 刷新索引(增量模式)
|
|
@@ -756,10 +800,23 @@ export class SearchEngine {
|
|
|
756
800
|
catch {
|
|
757
801
|
/* ignore */
|
|
758
802
|
}
|
|
803
|
+
// 提取 description 和 contentText 供 FieldWeightedScorer 字段级评分使用
|
|
804
|
+
let contentText = '';
|
|
805
|
+
try {
|
|
806
|
+
const content = JSON.parse(r.content || '{}');
|
|
807
|
+
contentText = [content.pattern, content.rationale, content.markdown]
|
|
808
|
+
.filter(Boolean)
|
|
809
|
+
.join(' ');
|
|
810
|
+
}
|
|
811
|
+
catch {
|
|
812
|
+
/* ignore */
|
|
813
|
+
}
|
|
759
814
|
return {
|
|
760
815
|
type: 'knowledge',
|
|
761
816
|
title: r.title,
|
|
762
817
|
trigger: r.trigger || '',
|
|
818
|
+
description: r.description || '',
|
|
819
|
+
contentText,
|
|
763
820
|
status: r.lifecycle,
|
|
764
821
|
knowledgeType: r.knowledgeType,
|
|
765
822
|
kind: r.kind || 'pattern',
|