@astro-minimax/ai 0.9.0 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +108 -18
- package/dist/cache/global-cache.d.ts +6 -2
- package/dist/cache/global-cache.d.ts.map +1 -1
- package/dist/cache/global-cache.js +24 -9
- package/dist/cache/index.d.ts +7 -6
- package/dist/cache/index.d.ts.map +1 -1
- package/dist/cache/index.js +12 -4
- package/dist/cache/injection-cache.d.ts +36 -0
- package/dist/cache/injection-cache.d.ts.map +1 -0
- package/dist/cache/injection-cache.js +90 -0
- package/dist/cache/kv-adapter.d.ts.map +1 -1
- package/dist/cache/kv-adapter.js +2 -1
- package/dist/cache/memory-adapter.d.ts.map +1 -1
- package/dist/cache/memory-adapter.js +2 -1
- package/dist/cache/response-cache.d.ts +10 -5
- package/dist/cache/response-cache.d.ts.map +1 -1
- package/dist/cache/response-cache.js +18 -6
- package/dist/components/AIChatContainer.d.ts +2 -2
- package/dist/components/AIChatContainer.d.ts.map +1 -1
- package/dist/components/AIChatContainer.js +8 -920
- package/dist/components/ChatInput.d.ts +15 -0
- package/dist/components/ChatInput.d.ts.map +1 -0
- package/dist/components/ChatInput.js +72 -0
- package/dist/components/ChatPanel.d.ts +1 -1
- package/dist/components/ChatPanel.d.ts.map +1 -1
- package/dist/components/ChatPanel.js +210 -672
- package/dist/components/CodeBlock.d.ts +31 -0
- package/dist/components/CodeBlock.d.ts.map +1 -0
- package/dist/components/CodeBlock.js +143 -0
- package/dist/components/MarkmapBlock.d.ts +4 -0
- package/dist/components/MarkmapBlock.d.ts.map +1 -0
- package/dist/components/MarkmapBlock.js +180 -0
- package/dist/components/MermaidBlock.d.ts +4 -0
- package/dist/components/MermaidBlock.d.ts.map +1 -0
- package/dist/components/MermaidBlock.js +193 -0
- package/dist/components/MessageBubble.d.ts +21 -0
- package/dist/components/MessageBubble.d.ts.map +1 -0
- package/dist/components/MessageBubble.js +233 -0
- package/dist/components/ReasoningBlock.d.ts +6 -0
- package/dist/components/ReasoningBlock.d.ts.map +1 -0
- package/dist/components/ReasoningBlock.js +11 -0
- package/dist/components/RichText.d.ts +41 -0
- package/dist/components/RichText.d.ts.map +1 -0
- package/dist/components/RichText.js +202 -0
- package/dist/components/VizShared.d.ts +57 -0
- package/dist/components/VizShared.d.ts.map +1 -0
- package/dist/components/VizShared.js +233 -0
- package/dist/components/tool-auto-continue.d.ts +5 -0
- package/dist/components/tool-auto-continue.d.ts.map +1 -0
- package/dist/components/tool-auto-continue.js +33 -0
- package/dist/constants.d.ts +61 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +72 -0
- package/dist/data/index.d.ts +4 -3
- package/dist/data/index.d.ts.map +1 -1
- package/dist/data/index.js +4 -10
- package/dist/data/knowledge-types.d.ts +8 -0
- package/dist/data/knowledge-types.d.ts.map +1 -0
- package/dist/data/knowledge-types.js +14 -0
- package/dist/data/metadata-loader.d.ts +4 -28
- package/dist/data/metadata-loader.d.ts.map +1 -1
- package/dist/data/metadata-loader.js +11 -34
- package/dist/data/types.d.ts +17 -2
- package/dist/data/types.d.ts.map +1 -1
- package/dist/extensions/index.d.ts +5 -0
- package/dist/extensions/index.d.ts.map +1 -0
- package/dist/extensions/index.js +24 -0
- package/dist/extensions/injector.d.ts +14 -0
- package/dist/extensions/injector.d.ts.map +1 -0
- package/dist/extensions/injector.js +146 -0
- package/dist/extensions/loader.d.ts +5 -0
- package/dist/extensions/loader.d.ts.map +1 -0
- package/dist/extensions/loader.js +45 -0
- package/dist/extensions/registry.d.ts +4 -0
- package/dist/extensions/registry.d.ts.map +1 -0
- package/dist/extensions/registry.js +144 -0
- package/dist/extensions/types.d.ts +126 -0
- package/dist/extensions/types.d.ts.map +1 -0
- package/dist/extensions/types.js +0 -0
- package/dist/fact-registry/prompt-injector.d.ts +1 -1
- package/dist/fact-registry/prompt-injector.d.ts.map +1 -1
- package/dist/fact-registry/prompt-injector.js +2 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/intelligence/citation-guard.d.ts +2 -13
- package/dist/intelligence/citation-guard.d.ts.map +1 -1
- package/dist/intelligence/citation-guard.js +52 -23
- package/dist/intelligence/evidence-analysis.d.ts +24 -16
- package/dist/intelligence/evidence-analysis.d.ts.map +1 -1
- package/dist/intelligence/evidence-analysis.js +118 -20
- package/dist/intelligence/evidence-budget.d.ts +13 -0
- package/dist/intelligence/evidence-budget.d.ts.map +1 -0
- package/dist/intelligence/evidence-budget.js +49 -0
- package/dist/intelligence/index.d.ts +10 -4
- package/dist/intelligence/index.d.ts.map +1 -1
- package/dist/intelligence/index.js +27 -3
- package/dist/intelligence/keyword-extract.d.ts +1 -1
- package/dist/intelligence/keyword-extract.d.ts.map +1 -1
- package/dist/intelligence/keyword-extract.js +5 -9
- package/dist/intelligence/request-interpretation.d.ts +40 -0
- package/dist/intelligence/request-interpretation.d.ts.map +1 -0
- package/dist/intelligence/request-interpretation.js +71 -0
- package/dist/intelligence/response-templates.d.ts +1 -0
- package/dist/intelligence/response-templates.d.ts.map +1 -1
- package/dist/intelligence/response-templates.js +13 -0
- package/dist/prompt/dynamic-layer.d.ts +1 -5
- package/dist/prompt/dynamic-layer.d.ts.map +1 -1
- package/dist/prompt/dynamic-layer.js +145 -9
- package/dist/prompt/prompt-builder.d.ts +1 -1
- package/dist/prompt/prompt-builder.d.ts.map +1 -1
- package/dist/prompt/prompt-builder.js +5 -1
- package/dist/prompt/semi-static-layer.d.ts +1 -1
- package/dist/prompt/semi-static-layer.d.ts.map +1 -1
- package/dist/prompt/semi-static-layer.js +22 -12
- package/dist/prompt/static-layer.d.ts.map +1 -1
- package/dist/prompt/static-layer.js +37 -4
- package/dist/prompt/types.d.ts +9 -4
- package/dist/prompt/types.d.ts.map +1 -1
- package/dist/provider-manager/base.d.ts +5 -1
- package/dist/provider-manager/base.d.ts.map +1 -1
- package/dist/provider-manager/base.js +22 -2
- package/dist/provider-manager/config.d.ts.map +1 -1
- package/dist/provider-manager/config.js +3 -2
- package/dist/provider-manager/index.d.ts +1 -1
- package/dist/provider-manager/index.d.ts.map +1 -1
- package/dist/provider-manager/index.js +1 -2
- package/dist/provider-manager/manager.d.ts +10 -1
- package/dist/provider-manager/manager.d.ts.map +1 -1
- package/dist/provider-manager/manager.js +26 -10
- package/dist/provider-manager/openai.d.ts +2 -2
- package/dist/provider-manager/openai.d.ts.map +1 -1
- package/dist/provider-manager/openai.js +19 -4
- package/dist/provider-manager/types.d.ts +18 -38
- package/dist/provider-manager/types.d.ts.map +1 -1
- package/dist/provider-manager/workers.d.ts +2 -2
- package/dist/provider-manager/workers.d.ts.map +1 -1
- package/dist/provider-manager/workers.js +15 -4
- package/dist/query/followup.d.ts +7 -0
- package/dist/query/followup.d.ts.map +1 -0
- package/dist/query/followup.js +46 -0
- package/dist/query/intent.d.ts +6 -0
- package/dist/query/intent.d.ts.map +1 -0
- package/dist/query/intent.js +137 -0
- package/dist/query/types.d.ts +8 -0
- package/dist/query/types.d.ts.map +1 -0
- package/dist/query/types.js +0 -0
- package/dist/search/hybrid-search.d.ts +111 -0
- package/dist/search/hybrid-search.d.ts.map +1 -0
- package/dist/search/hybrid-search.js +326 -0
- package/dist/search/index.d.ts +11 -9
- package/dist/search/index.d.ts.map +1 -1
- package/dist/search/index.js +46 -10
- package/dist/search/scoring.d.ts +18 -0
- package/dist/search/scoring.d.ts.map +1 -0
- package/dist/search/{search-utils.js → scoring.js} +14 -27
- package/dist/search/search-api.d.ts +16 -1
- package/dist/search/search-api.d.ts.map +1 -1
- package/dist/search/search-api.js +118 -15
- package/dist/search/search-index.d.ts +2 -2
- package/dist/search/search-index.d.ts.map +1 -1
- package/dist/search/search-index.js +4 -2
- package/dist/search/session-cache.d.ts +4 -10
- package/dist/search/session-cache.d.ts.map +1 -1
- package/dist/search/session-cache.js +12 -45
- package/dist/search/types.d.ts +28 -0
- package/dist/search/types.d.ts.map +1 -1
- package/dist/search/vector-reranker.d.ts +3 -3
- package/dist/search/vector-reranker.d.ts.map +1 -1
- package/dist/search/vector-reranker.js +14 -2
- package/dist/server/chat-handler.d.ts +86 -1
- package/dist/server/chat-handler.d.ts.map +1 -1
- package/dist/server/chat-handler.js +835 -401
- package/dist/server/chat-message-utils.d.ts +6 -0
- package/dist/server/chat-message-utils.d.ts.map +1 -0
- package/dist/server/chat-message-utils.js +40 -0
- package/dist/server/chat-utils.d.ts +30 -0
- package/dist/server/chat-utils.d.ts.map +1 -0
- package/dist/server/chat-utils.js +88 -0
- package/dist/server/dev-server.js +238 -101
- package/dist/server/env-config.d.ts +22 -0
- package/dist/server/env-config.d.ts.map +1 -0
- package/dist/server/env-config.js +25 -0
- package/dist/server/errors.d.ts +1 -0
- package/dist/server/errors.d.ts.map +1 -1
- package/dist/server/errors.js +14 -7
- package/dist/server/index.d.ts +2 -4
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +4 -25
- package/dist/server/metadata-init.d.ts +10 -5
- package/dist/server/metadata-init.d.ts.map +1 -1
- package/dist/server/metadata-init.js +78 -34
- package/dist/server/notify.d.ts +12 -11
- package/dist/server/notify.d.ts.map +1 -1
- package/dist/server/notify.js +46 -48
- package/dist/server/prompt-runtime.d.ts +60 -0
- package/dist/server/prompt-runtime.d.ts.map +1 -0
- package/dist/server/prompt-runtime.js +284 -0
- package/dist/server/stream-helpers.d.ts +30 -16
- package/dist/server/stream-helpers.d.ts.map +1 -1
- package/dist/server/stream-helpers.js +152 -15
- package/dist/server/types.d.ts +47 -12
- package/dist/server/types.d.ts.map +1 -1
- package/dist/structured-output/generator.d.ts +6 -0
- package/dist/structured-output/generator.d.ts.map +1 -0
- package/dist/structured-output/generator.js +164 -0
- package/dist/structured-output/index.d.ts +4 -0
- package/dist/structured-output/index.d.ts.map +1 -0
- package/dist/structured-output/index.js +6 -0
- package/dist/structured-output/schemas/evidence.d.ts +88 -0
- package/dist/structured-output/schemas/evidence.d.ts.map +1 -0
- package/dist/structured-output/schemas/evidence.js +65 -0
- package/dist/structured-output/types.d.ts +69 -0
- package/dist/structured-output/types.d.ts.map +1 -0
- package/dist/structured-output/types.js +0 -0
- package/dist/tools/action-tools.d.ts +63 -0
- package/dist/tools/action-tools.d.ts.map +1 -0
- package/dist/tools/action-tools.js +158 -0
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +30 -0
- package/dist/utils/i18n.d.ts +1 -1
- package/dist/utils/i18n.d.ts.map +1 -1
- package/dist/utils/i18n.js +1 -1
- package/dist/utils/logger.d.ts +11 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +36 -0
- package/dist/utils/text.d.ts +11 -0
- package/dist/utils/text.d.ts.map +1 -0
- package/dist/utils/text.js +87 -0
- package/dist/utils/url.d.ts +19 -0
- package/dist/utils/url.d.ts.map +1 -0
- package/dist/utils/url.js +13 -0
- package/package.json +46 -12
- package/dist/intelligence/intent-detect.d.ts +0 -40
- package/dist/intelligence/intent-detect.d.ts.map +0 -1
- package/dist/intelligence/intent-detect.js +0 -93
- package/dist/providers/index.d.ts +0 -2
- package/dist/providers/index.d.ts.map +0 -1
- package/dist/providers/index.js +0 -5
- package/dist/search/search-utils.d.ts +0 -47
- package/dist/search/search-utils.d.ts.map +0 -1
- package/dist/stream/index.d.ts +0 -3
- package/dist/stream/index.d.ts.map +0 -1
- package/dist/stream/index.js +0 -8
- package/dist/stream/mock-stream.d.ts +0 -12
- package/dist/stream/mock-stream.d.ts.map +0 -1
- package/dist/stream/mock-stream.js +0 -26
- package/dist/stream/response.d.ts +0 -10
- package/dist/stream/response.d.ts.map +0 -1
- package/dist/stream/response.js +0 -21
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RRF 混合检索模块
|
|
3
|
+
*
|
|
4
|
+
* 实现 Reciprocal Rank Fusion 算法,融合关键词检索和向量检索结果。
|
|
5
|
+
*
|
|
6
|
+
* RRF 公式: score(d) = Σ 1/(k + rank(d))
|
|
7
|
+
* 默认 k=60,源自 Cormack, Clarke, Buettcher 2009 论文。
|
|
8
|
+
*/
|
|
9
|
+
import type { ArticleContext } from "./types.js";
|
|
10
|
+
export interface ArticleChunk {
|
|
11
|
+
id: string;
|
|
12
|
+
postId: string;
|
|
13
|
+
heading: string;
|
|
14
|
+
content: string;
|
|
15
|
+
position: number;
|
|
16
|
+
tokenCount: number;
|
|
17
|
+
headers: Record<string, string>;
|
|
18
|
+
}
|
|
19
|
+
export interface ArticleWithChunks extends ArticleContext {
|
|
20
|
+
chunks?: ArticleChunk[];
|
|
21
|
+
}
|
|
22
|
+
export interface RRFConfig {
|
|
23
|
+
/** RRF 常数 k,默认 60 */
|
|
24
|
+
k?: number;
|
|
25
|
+
/** BM25/TF-IDF 权重,默认 0.5 */
|
|
26
|
+
bm25Weight?: number;
|
|
27
|
+
/** 向量权重,默认 0.5 */
|
|
28
|
+
vectorWeight?: number;
|
|
29
|
+
/** 返回数量,默认 10 */
|
|
30
|
+
topK?: number;
|
|
31
|
+
}
|
|
32
|
+
export interface HybridSearchResult extends ArticleContext {
|
|
33
|
+
rrfScore: number;
|
|
34
|
+
bm25Rank?: number;
|
|
35
|
+
vectorRank?: number;
|
|
36
|
+
matchedChunks?: ArticleChunk[];
|
|
37
|
+
}
|
|
38
|
+
export interface ChunkMatchResult {
|
|
39
|
+
article: ArticleWithChunks;
|
|
40
|
+
chunk: ArticleChunk;
|
|
41
|
+
score: number;
|
|
42
|
+
}
|
|
43
|
+
export interface ChunkRelevanceOptions {
|
|
44
|
+
rawQuery?: string;
|
|
45
|
+
rawAnchors?: string[];
|
|
46
|
+
}
|
|
47
|
+
export interface NeighborChunkConfig {
|
|
48
|
+
includePrevious?: boolean;
|
|
49
|
+
includeNext?: boolean;
|
|
50
|
+
rawAnchors?: string[];
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Reciprocal Rank Fusion 算法
|
|
54
|
+
*
|
|
55
|
+
* 融合多个检索系统的排名结果,只依赖排名而非分数。
|
|
56
|
+
*
|
|
57
|
+
* @param rankings - 多个检索系统的排名结果
|
|
58
|
+
* @param k - RRF 常数,默认 60
|
|
59
|
+
*/
|
|
60
|
+
export declare function reciprocalRankFusion(rankings: Array<Array<{
|
|
61
|
+
url: string;
|
|
62
|
+
}>>, k?: number): Map<string, number>;
|
|
63
|
+
/**
|
|
64
|
+
* 执行 BM25 + 向量混合检索
|
|
65
|
+
*
|
|
66
|
+
* @param query - 查询文本
|
|
67
|
+
* @param bm25Results - BM25/TF-IDF 检索结果
|
|
68
|
+
* @param vectorResults - 向量检索结果(可选)
|
|
69
|
+
* @param config - RRF 配置
|
|
70
|
+
*/
|
|
71
|
+
export declare function hybridSearch(_query: string, bm25Results: ArticleContext[], vectorResults: ArticleContext[] | null, config?: RRFConfig): HybridSearchResult[];
|
|
72
|
+
/**
|
|
73
|
+
* 在段落级别搜索相关内容
|
|
74
|
+
*
|
|
75
|
+
* @param query - 查询文本
|
|
76
|
+
* @param articles - 包含段落的文章列表
|
|
77
|
+
* @param topK - 返回数量,默认 10
|
|
78
|
+
*/
|
|
79
|
+
export declare function searchChunks(query: string, articles: ArticleWithChunks[], topK?: number): ChunkMatchResult[];
|
|
80
|
+
/**
|
|
81
|
+
* 计算段落相关性分数
|
|
82
|
+
*
|
|
83
|
+
* 考虑因素:
|
|
84
|
+
* 1. 标题匹配(权重 2.0)
|
|
85
|
+
* 2. 内容关键词匹配
|
|
86
|
+
* 3. 标题层级匹配(H1/H2 权重更高)
|
|
87
|
+
*/
|
|
88
|
+
export declare function computeChunkRelevance(queryTokens: string[], chunk: ArticleChunk, article?: Pick<ArticleContext, "title" | "categories" | "keyPoints">, options?: ChunkRelevanceOptions): number;
|
|
89
|
+
/**
|
|
90
|
+
* 从段落匹配结果中格式化注入内容
|
|
91
|
+
*
|
|
92
|
+
* @param matches - 段落匹配结果
|
|
93
|
+
* @param maxTokens - 最大 token 数,默认 2000
|
|
94
|
+
*/
|
|
95
|
+
export declare function formatChunksForInjection(matches: ChunkMatchResult[], maxTokens?: number): string;
|
|
96
|
+
export interface ChunkInjectionConfig {
|
|
97
|
+
/** 最大注入 token 数 */
|
|
98
|
+
maxTokens: number;
|
|
99
|
+
/** 最低相关性分数 */
|
|
100
|
+
minChunkScore: number;
|
|
101
|
+
/** 每篇文章最多注入段落数 */
|
|
102
|
+
maxChunksPerArticle: number;
|
|
103
|
+
rawAnchors?: string[];
|
|
104
|
+
currentArticleId?: string;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* 选择最相关的段落用于注入
|
|
108
|
+
*/
|
|
109
|
+
export declare function selectRelevantChunks(query: string, articles: ArticleWithChunks[], config?: Partial<ChunkInjectionConfig>): ChunkMatchResult[];
|
|
110
|
+
export declare function expandChunkMatchesWithNeighbors(matches: ChunkMatchResult[], config?: NeighborChunkConfig): ChunkMatchResult[];
|
|
111
|
+
//# sourceMappingURL=hybrid-search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid-search.d.ts","sourceRoot":"","sources":["../../src/search/hybrid-search.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAKjD,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,iBAAkB,SAAQ,cAAc;IACvD,MAAM,CAAC,EAAE,YAAY,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,SAAS;IACxB,qBAAqB;IACrB,CAAC,CAAC,EAAE,MAAM,CAAC;IACX,4BAA4B;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,kBAAkB;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,iBAAiB;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,kBAAmB,SAAQ,cAAc;IACxD,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,YAAY,EAAE,CAAC;CAChC;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,iBAAiB,CAAC;IAC3B,KAAK,EAAE,YAAY,CAAC;IACpB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,MAAM,WAAW,mBAAmB;IAClC,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AASD;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,KAAK,CAAC,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,EACvC,CAAC,GAAE,MAAsB,GACxB,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAYrB;AAID;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAC1B,MAAM,EAAE,MAAM,EACd,WAAW,EAAE,cAAc,EAAE,EAC7B,aAAa,EAAE,cAAc,EAAE,GAAG,IAAI,EACtC,MAAM,CAAC,EAAE,SAAS,GACjB,kBAAkB,EAAE,CA8CtB;AAID;;;;;;GAMG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,IAAI,GAAE,MAAW,GAChB,gBAAgB,EAAE,CAkBpB;AAED;;;;;;;GAOG;AACH,wBAAgB,qBAAqB,CACnC,WAAW,EAAE,MAAM,EAAE,EACrB,KAAK,EAAE,YAAY,EACnB,OAAO,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,OAAO,GAAG,YAAY,GAAG,WAAW,CAAC,EACpE,OAAO,GAAE,qBAA0B,GAClC,MAAM,CAoGR;AA2FD;;;;;GAKG;AACH,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,gBAAgB,EAAE,EAC3B,SAAS,GAAE,MAAa,GACvB,MAAM,CAiBR;AAkBD,MAAM,WAAW,oBAAoB;IACnC,mBAAmB;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,cAAc;IACd,aAAa,EAAE,MAAM,CAAC;IACtB,kBAAkB;IAClB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAQD;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,iBAAiB,EAAE,EAC7B,MAAM,CAAC,EAAE,OAAO,CAAC,oBAAoB,CAAC,GACrC,gBAAgB,EAAE,CAmDpB;AAED,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,gBAAgB,EAAE,EAC3B,MAAM,GAAE,mBAAkE,GACzE,gBAAgB,EAAE,CAsDpB"}
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
import { tokenize, normalizeText, extractCodeAnchors } from "../utils/text.js";
|
|
2
|
+
const DEFAULT_RRF_K = 60;
|
|
3
|
+
const DEFAULT_TOP_K = 10;
|
|
4
|
+
function reciprocalRankFusion(rankings, k = DEFAULT_RRF_K) {
|
|
5
|
+
const scores = /* @__PURE__ */ new Map();
|
|
6
|
+
for (const ranking of rankings) {
|
|
7
|
+
for (let rank = 1; rank <= ranking.length; rank++) {
|
|
8
|
+
const doc = ranking[rank - 1];
|
|
9
|
+
const prev = scores.get(doc.url) ?? 0;
|
|
10
|
+
scores.set(doc.url, prev + 1 / (k + rank));
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
return new Map([...scores.entries()].sort((a, b) => b[1] - a[1]));
|
|
14
|
+
}
|
|
15
|
+
function hybridSearch(_query, bm25Results, vectorResults, config) {
|
|
16
|
+
const { k = DEFAULT_RRF_K, topK = DEFAULT_TOP_K } = config || {};
|
|
17
|
+
if (!bm25Results.length) return [];
|
|
18
|
+
if (!vectorResults || !vectorResults.length) {
|
|
19
|
+
return bm25Results.slice(0, topK).map((r, i) => ({
|
|
20
|
+
...r,
|
|
21
|
+
rrfScore: 1 / (k + i + 1),
|
|
22
|
+
bm25Rank: i + 1
|
|
23
|
+
}));
|
|
24
|
+
}
|
|
25
|
+
const bm25Ranking = bm25Results.map((r) => ({ url: r.url }));
|
|
26
|
+
const vectorRanking = vectorResults.map((r) => ({ url: r.url }));
|
|
27
|
+
const rrfScores = reciprocalRankFusion([bm25Ranking, vectorRanking], k);
|
|
28
|
+
const bm25Map = new Map(bm25Results.map((r) => [r.url, r]));
|
|
29
|
+
const vectorMap = new Map(vectorResults.map((r, i) => [r.url, i + 1]));
|
|
30
|
+
const results = [];
|
|
31
|
+
let rank = 1;
|
|
32
|
+
for (const [url, rrfScore] of rrfScores) {
|
|
33
|
+
const bm25Result = bm25Map.get(url);
|
|
34
|
+
const vectorRank = vectorMap.get(url);
|
|
35
|
+
if (bm25Result) {
|
|
36
|
+
results.push({
|
|
37
|
+
...bm25Result,
|
|
38
|
+
rrfScore,
|
|
39
|
+
bm25Rank: bm25Results.findIndex((r) => r.url === url) + 1,
|
|
40
|
+
vectorRank
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
if (results.length >= topK) break;
|
|
44
|
+
rank++;
|
|
45
|
+
}
|
|
46
|
+
return results;
|
|
47
|
+
}
|
|
48
|
+
function searchChunks(query, articles, topK = 10) {
|
|
49
|
+
const queryTokens = tokenize(query);
|
|
50
|
+
if (!queryTokens.length || !articles.length) return [];
|
|
51
|
+
const results = [];
|
|
52
|
+
for (const article of articles) {
|
|
53
|
+
if (!article.chunks?.length) continue;
|
|
54
|
+
for (const chunk of article.chunks) {
|
|
55
|
+
const score = computeChunkRelevance(queryTokens, chunk, article);
|
|
56
|
+
if (score > 0) {
|
|
57
|
+
results.push({ article, chunk, score });
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return results.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
62
|
+
}
|
|
63
|
+
function computeChunkRelevance(queryTokens, chunk, article, options = {}) {
|
|
64
|
+
let score = 0;
|
|
65
|
+
const rawQuery = options.rawQuery?.trim() ?? "";
|
|
66
|
+
const rawAnchors = options.rawAnchors && options.rawAnchors.length > 0 ? options.rawAnchors : extractCodeAnchors(rawQuery);
|
|
67
|
+
const headingTokens = tokenize(chunk.heading);
|
|
68
|
+
const contentTokens = tokenize(chunk.content);
|
|
69
|
+
const articleTitleTokens = article ? tokenize(article.title) : [];
|
|
70
|
+
const articleMetaText = article ? normalizeText(
|
|
71
|
+
[
|
|
72
|
+
article.title,
|
|
73
|
+
...article.categories ?? [],
|
|
74
|
+
...article.keyPoints ?? []
|
|
75
|
+
].join(" ")
|
|
76
|
+
) : "";
|
|
77
|
+
const anchorTokens = queryTokens.filter((token) => token.length >= 2).sort((a, b) => b.length - a.length).slice(0, 3);
|
|
78
|
+
const genericTopicTokens = /* @__PURE__ */ new Set([
|
|
79
|
+
"ai",
|
|
80
|
+
"rag",
|
|
81
|
+
"\u529F\u80FD",
|
|
82
|
+
"\u7279\u6027",
|
|
83
|
+
"\u529F\u80FD\u7279\u6027",
|
|
84
|
+
"\u8BBE\u8BA1",
|
|
85
|
+
"\u67B6\u6784",
|
|
86
|
+
"\u6A21\u5757",
|
|
87
|
+
"\u804A\u5929",
|
|
88
|
+
"\u914D\u7F6E"
|
|
89
|
+
]);
|
|
90
|
+
const strongAnchorTokens = anchorTokens.filter(
|
|
91
|
+
(token) => !genericTopicTokens.has(token)
|
|
92
|
+
);
|
|
93
|
+
const headingText = normalizeText(chunk.heading);
|
|
94
|
+
const titleText = article ? normalizeText(article.title) : "";
|
|
95
|
+
score += scoreExactQueryMatches(rawQuery, chunk);
|
|
96
|
+
score += scoreExactCodeAnchorMatches(rawAnchors, chunk, article);
|
|
97
|
+
for (const token of queryTokens) {
|
|
98
|
+
if (headingTokens.some((h) => h.includes(token) || token.includes(h))) {
|
|
99
|
+
score += 2;
|
|
100
|
+
}
|
|
101
|
+
if (articleTitleTokens.some(
|
|
102
|
+
(title) => title.includes(token) || token.includes(title)
|
|
103
|
+
)) {
|
|
104
|
+
score += 1.2;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
let contentMatches = 0;
|
|
108
|
+
for (const token of queryTokens) {
|
|
109
|
+
if (contentTokens.some((c) => c.includes(token) || token.includes(c))) {
|
|
110
|
+
contentMatches++;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
score += contentMatches / queryTokens.length * 1.5;
|
|
114
|
+
if (chunk.headers.H1 || chunk.headers.H2) {
|
|
115
|
+
score *= 1.1;
|
|
116
|
+
}
|
|
117
|
+
if (anchorTokens.length > 0) {
|
|
118
|
+
const anchorHitCount = anchorTokens.filter(
|
|
119
|
+
(term) => articleMetaText.includes(term)
|
|
120
|
+
).length;
|
|
121
|
+
if (anchorHitCount === 0) {
|
|
122
|
+
score *= 0.72;
|
|
123
|
+
} else {
|
|
124
|
+
score *= 1 + Math.min(anchorHitCount, 2) * 0.08;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (strongAnchorTokens.length > 0) {
|
|
128
|
+
const strongTitleHits = strongAnchorTokens.filter(
|
|
129
|
+
(token) => titleText.includes(token)
|
|
130
|
+
).length;
|
|
131
|
+
const strongHeadingHits = strongAnchorTokens.filter(
|
|
132
|
+
(token) => headingText.includes(token)
|
|
133
|
+
).length;
|
|
134
|
+
if (strongTitleHits === 0 && strongHeadingHits === 0) {
|
|
135
|
+
score *= 0.58;
|
|
136
|
+
} else {
|
|
137
|
+
score *= 1 + Math.min(strongTitleHits + strongHeadingHits, 2) * 0.12;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
return score;
|
|
141
|
+
}
|
|
142
|
+
function scoreExactCodeAnchorMatches(rawAnchors, chunk, article) {
|
|
143
|
+
if (rawAnchors.length === 0) return 0;
|
|
144
|
+
let bonus = 0;
|
|
145
|
+
const normalizedContent = normalizeText(chunk.content);
|
|
146
|
+
const normalizedHeading = normalizeText(chunk.heading);
|
|
147
|
+
const normalizedTitle = article ? normalizeText(article.title) : "";
|
|
148
|
+
for (const anchor of rawAnchors) {
|
|
149
|
+
if (anchor.length < 2) continue;
|
|
150
|
+
const normalizedAnchor = normalizeText(anchor);
|
|
151
|
+
if (!normalizedAnchor) continue;
|
|
152
|
+
if (chunk.content.includes(anchor)) {
|
|
153
|
+
bonus += 5.5;
|
|
154
|
+
continue;
|
|
155
|
+
}
|
|
156
|
+
if (chunk.heading.includes(anchor)) {
|
|
157
|
+
bonus += 4.2;
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
if (article?.title.includes(anchor)) {
|
|
161
|
+
bonus += 2.5;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
if (normalizedContent.includes(normalizedAnchor)) {
|
|
165
|
+
bonus += 3.5;
|
|
166
|
+
} else if (normalizedHeading.includes(normalizedAnchor)) {
|
|
167
|
+
bonus += 2.2;
|
|
168
|
+
} else if (normalizedTitle.includes(normalizedAnchor)) {
|
|
169
|
+
bonus += 1.4;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return bonus;
|
|
173
|
+
}
|
|
174
|
+
function scoreExactQueryMatches(query, chunk) {
|
|
175
|
+
const candidate = extractLikelyQuotedText(query);
|
|
176
|
+
if (!candidate) return 0;
|
|
177
|
+
const normalizedCandidate = normalizeText(candidate);
|
|
178
|
+
if (normalizedCandidate.length < 12) return 0;
|
|
179
|
+
const chunkContent = chunk.content;
|
|
180
|
+
const normalizedContent = normalizeText(chunkContent);
|
|
181
|
+
const normalizedHeading = normalizeText(chunk.heading);
|
|
182
|
+
let bonus = 0;
|
|
183
|
+
if (chunkContent.includes(candidate)) {
|
|
184
|
+
bonus += 6;
|
|
185
|
+
} else if (normalizedContent.includes(normalizedCandidate)) {
|
|
186
|
+
bonus += 4.5;
|
|
187
|
+
}
|
|
188
|
+
if (!chunk.heading && normalizedContent.startsWith(normalizedCandidate)) {
|
|
189
|
+
bonus += 8;
|
|
190
|
+
}
|
|
191
|
+
if (normalizedHeading.includes(normalizedCandidate)) {
|
|
192
|
+
bonus += 1.5;
|
|
193
|
+
}
|
|
194
|
+
return bonus;
|
|
195
|
+
}
|
|
196
|
+
function extractLikelyQuotedText(query) {
|
|
197
|
+
const trimmed = query.trim();
|
|
198
|
+
if (!trimmed) return "";
|
|
199
|
+
const quotedMatches = [...trimmed.matchAll(/["“”'‘’「」『』《》](.+?)["“”'‘’「」『』《》]/g)].map((match) => match[1]?.trim() ?? "").filter(Boolean).sort((a, b) => b.length - a.length);
|
|
200
|
+
if (quotedMatches[0]) {
|
|
201
|
+
return quotedMatches[0];
|
|
202
|
+
}
|
|
203
|
+
return trimmed;
|
|
204
|
+
}
|
|
205
|
+
function formatChunksForInjection(matches, maxTokens = 2e3) {
|
|
206
|
+
if (!matches.length) return "";
|
|
207
|
+
const lines = [];
|
|
208
|
+
let totalTokens = 0;
|
|
209
|
+
for (const match of matches) {
|
|
210
|
+
const chunkText = formatChunkForInjection(match);
|
|
211
|
+
const chunkTokens = estimateTokens(chunkText);
|
|
212
|
+
if (totalTokens + chunkTokens > maxTokens) break;
|
|
213
|
+
lines.push(chunkText);
|
|
214
|
+
totalTokens += chunkTokens;
|
|
215
|
+
}
|
|
216
|
+
return lines.join("\n\n");
|
|
217
|
+
}
|
|
218
|
+
function formatChunkForInjection(match) {
|
|
219
|
+
const { article, chunk } = match;
|
|
220
|
+
const heading = chunk.heading ? `\u3010${chunk.heading}\u3011` : "";
|
|
221
|
+
const source = `\u6765\u6E90: [${article.title}](${article.url})`;
|
|
222
|
+
return `${heading}
|
|
223
|
+
${chunk.content.slice(0, 500)}
|
|
224
|
+
|
|
225
|
+
${source}`;
|
|
226
|
+
}
|
|
227
|
+
function estimateTokens(text) {
|
|
228
|
+
const cjkChars = (text.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g) || []).length;
|
|
229
|
+
const nonCjkChars = text.length - cjkChars;
|
|
230
|
+
return Math.ceil(cjkChars / 2) + Math.ceil(nonCjkChars / 4);
|
|
231
|
+
}
|
|
232
|
+
const DEFAULT_INJECTION_CONFIG = {
|
|
233
|
+
maxTokens: 2e3,
|
|
234
|
+
minChunkScore: 0.3,
|
|
235
|
+
maxChunksPerArticle: 3
|
|
236
|
+
};
|
|
237
|
+
function selectRelevantChunks(query, articles, config) {
|
|
238
|
+
const cfg = { ...DEFAULT_INJECTION_CONFIG, ...config };
|
|
239
|
+
const queryTokens = tokenize(query);
|
|
240
|
+
if (!queryTokens.length) return [];
|
|
241
|
+
const allMatches = [];
|
|
242
|
+
for (const article of articles) {
|
|
243
|
+
if (!article.chunks?.length) continue;
|
|
244
|
+
const articleMatches = [];
|
|
245
|
+
for (const chunk of article.chunks) {
|
|
246
|
+
const score = computeChunkRelevance(queryTokens, chunk, article, {
|
|
247
|
+
rawQuery: query,
|
|
248
|
+
rawAnchors: cfg.rawAnchors
|
|
249
|
+
});
|
|
250
|
+
if (score >= cfg.minChunkScore) {
|
|
251
|
+
articleMatches.push({ article, chunk, score });
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
articleMatches.sort((a, b) => b.score - a.score);
|
|
255
|
+
allMatches.push(...articleMatches.slice(0, cfg.maxChunksPerArticle));
|
|
256
|
+
}
|
|
257
|
+
const globallyRanked = allMatches.sort((a, b) => b.score - a.score);
|
|
258
|
+
const selected = cfg.currentArticleId ? [
|
|
259
|
+
...globallyRanked.filter((match) => match.article.id === cfg.currentArticleId),
|
|
260
|
+
...globallyRanked.filter((match) => match.article.id !== cfg.currentArticleId)
|
|
261
|
+
].slice(0, 20) : globallyRanked.slice(0, 20);
|
|
262
|
+
log.debug(
|
|
263
|
+
`selectRelevantChunks: queryTokens=${queryTokens.length}, articles=${articles.length}, matched=${allMatches.length}, selected=${selected.length}, maxPerArticle=${cfg.maxChunksPerArticle}, minScore=${cfg.minChunkScore}`
|
|
264
|
+
);
|
|
265
|
+
if (selected.length > 0) {
|
|
266
|
+
log.debug(
|
|
267
|
+
`selectRelevantChunks top: ${selected.slice(0, 5).map(
|
|
268
|
+
(match) => `${match.article.title}#${match.chunk.id}:${match.score.toFixed(3)}`
|
|
269
|
+
).join(", ")}`
|
|
270
|
+
);
|
|
271
|
+
}
|
|
272
|
+
return selected;
|
|
273
|
+
}
|
|
274
|
+
function expandChunkMatchesWithNeighbors(matches, config = { includePrevious: true, includeNext: true }) {
|
|
275
|
+
const expanded = [];
|
|
276
|
+
const seen = /* @__PURE__ */ new Set();
|
|
277
|
+
const rawAnchors = config.rawAnchors ?? [];
|
|
278
|
+
const candidateHasAnchor = (candidate) => {
|
|
279
|
+
if (rawAnchors.length === 0) return true;
|
|
280
|
+
return rawAnchors.some((anchor) => {
|
|
281
|
+
const normalizedAnchor = normalizeText(anchor);
|
|
282
|
+
return candidate.chunk.content.includes(anchor) || candidate.chunk.heading.includes(anchor) || normalizeText(candidate.chunk.content).includes(normalizedAnchor) || normalizeText(candidate.chunk.heading).includes(normalizedAnchor);
|
|
283
|
+
});
|
|
284
|
+
};
|
|
285
|
+
for (const match of matches) {
|
|
286
|
+
const push = (candidate) => {
|
|
287
|
+
if (!candidate) return;
|
|
288
|
+
if (seen.has(candidate.chunk.id)) return;
|
|
289
|
+
if (candidate !== match && !candidateHasAnchor(candidate)) return;
|
|
290
|
+
seen.add(candidate.chunk.id);
|
|
291
|
+
expanded.push(candidate);
|
|
292
|
+
};
|
|
293
|
+
push(match);
|
|
294
|
+
const articleChunks = match.article.chunks ?? [];
|
|
295
|
+
const index = articleChunks.findIndex((chunk) => chunk.id === match.chunk.id);
|
|
296
|
+
if (index === -1) continue;
|
|
297
|
+
if (config.includePrevious && index > 0) {
|
|
298
|
+
push({
|
|
299
|
+
article: match.article,
|
|
300
|
+
chunk: articleChunks[index - 1],
|
|
301
|
+
score: match.score * 0.85
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
if (config.includeNext && index < articleChunks.length - 1) {
|
|
305
|
+
push({
|
|
306
|
+
article: match.article,
|
|
307
|
+
chunk: articleChunks[index + 1],
|
|
308
|
+
score: match.score * 0.9
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
return expanded.sort(
|
|
313
|
+
(a, b) => b.score - a.score || a.chunk.position - b.chunk.position
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
import { createLogger } from "../utils/logger.js";
|
|
317
|
+
const log = createLogger("hybrid-search");
|
|
318
|
+
export {
|
|
319
|
+
computeChunkRelevance,
|
|
320
|
+
expandChunkMatchesWithNeighbors,
|
|
321
|
+
formatChunksForInjection,
|
|
322
|
+
hybridSearch,
|
|
323
|
+
reciprocalRankFusion,
|
|
324
|
+
searchChunks,
|
|
325
|
+
selectRelevantChunks
|
|
326
|
+
};
|
package/dist/search/index.d.ts
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
export { initArticleIndex, initProjectIndex, searchArticles, searchProjects, mergeResults } from
|
|
2
|
-
export { getIDFMapForIndex } from
|
|
3
|
-
export { loadVectorIndex, clearVectorIndex, hasVectorIndex, rerankWithVectors } from
|
|
4
|
-
export type { VectorIndex, VectorChunk } from
|
|
5
|
-
export { getSessionCacheKey, getCachedContext, setCachedContext, deleteCachedContext, setCacheAdapter, getCacheAdapter, cleanupCache, SESSION_CACHE_TTL_SECONDS, SESSION_CACHE_TTL_MS,
|
|
6
|
-
export { normalizeText, tokenize
|
|
7
|
-
export {
|
|
8
|
-
export
|
|
9
|
-
export type {
|
|
1
|
+
export { initArticleIndex, initProjectIndex, initArticleChunks, hasArticleChunks, getArticleChunks, searchArticles, searchProjects, searchArticleChunks, mergeResults, } from "./search-api.js";
|
|
2
|
+
export { getIDFMapForIndex } from "./search-index.js";
|
|
3
|
+
export { loadVectorIndex, clearVectorIndex, hasVectorIndex, rerankWithVectors, } from "./vector-reranker.js";
|
|
4
|
+
export type { VectorIndex, VectorChunk } from "./vector-reranker.js";
|
|
5
|
+
export { getSessionCacheKey, getCachedContext, setCachedContext, deleteCachedContext, setCacheAdapter, getCacheAdapter, cleanupCache, SESSION_CACHE_TTL_SECONDS, SESSION_CACHE_TTL_MS, } from "./session-cache.js";
|
|
6
|
+
export { normalizeText, tokenize } from "../utils/text.js";
|
|
7
|
+
export { scoreDocument, filterLowRelevance, pickAnchorTerms, } from "./scoring.js";
|
|
8
|
+
export { buildIDFMap, getIDFWeight } from "./idf.js";
|
|
9
|
+
export type { IDFMap } from "./idf.js";
|
|
10
|
+
export type { SearchDocument, ArticleContext, ArticleChunk, ProjectContext, CachedSearchContext, SearchResult, } from "./types.js";
|
|
11
|
+
export { reciprocalRankFusion, hybridSearch, searchChunks, computeChunkRelevance, selectRelevantChunks, expandChunkMatchesWithNeighbors, formatChunksForInjection, type RRFConfig, type HybridSearchResult, type ChunkMatchResult, type ChunkInjectionConfig, } from "./hybrid-search.js";
|
|
10
12
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,gBAAgB,EAChB,cAAc,EACd,cAAc,EACd,mBAAmB,EACnB,YAAY,GACb,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EACL,eAAe,EACf,gBAAgB,EAChB,cAAc,EACd,iBAAiB,GAClB,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,YAAY,EACZ,yBAAyB,EACzB,oBAAoB,GACrB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC3D,OAAO,EACL,aAAa,EACb,kBAAkB,EAClB,eAAe,GAChB,MAAM,cAAc,CAAC;AACtB,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACrD,YAAY,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,YAAY,EACV,cAAc,EACd,cAAc,EACd,YAAY,EACZ,cAAc,EACd,mBAAmB,EACnB,YAAY,GACb,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,oBAAoB,EACpB,YAAY,EACZ,YAAY,EACZ,qBAAqB,EACrB,oBAAoB,EACpB,+BAA+B,EAC/B,wBAAwB,EACxB,KAAK,SAAS,EACd,KAAK,kBAAkB,EACvB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,GAC1B,MAAM,oBAAoB,CAAC"}
|
package/dist/search/index.js
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
initArticleIndex,
|
|
3
|
+
initProjectIndex,
|
|
4
|
+
initArticleChunks,
|
|
5
|
+
hasArticleChunks,
|
|
6
|
+
getArticleChunks,
|
|
7
|
+
searchArticles,
|
|
8
|
+
searchProjects,
|
|
9
|
+
searchArticleChunks,
|
|
10
|
+
mergeResults
|
|
11
|
+
} from "./search-api.js";
|
|
2
12
|
import { getIDFMapForIndex } from "./search-index.js";
|
|
3
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
loadVectorIndex,
|
|
15
|
+
clearVectorIndex,
|
|
16
|
+
hasVectorIndex,
|
|
17
|
+
rerankWithVectors
|
|
18
|
+
} from "./vector-reranker.js";
|
|
4
19
|
import {
|
|
5
20
|
getSessionCacheKey,
|
|
6
21
|
getCachedContext,
|
|
@@ -10,39 +25,60 @@ import {
|
|
|
10
25
|
getCacheAdapter,
|
|
11
26
|
cleanupCache,
|
|
12
27
|
SESSION_CACHE_TTL_SECONDS,
|
|
13
|
-
SESSION_CACHE_TTL_MS
|
|
14
|
-
getCachedContextSync,
|
|
15
|
-
setCachedContextSync,
|
|
16
|
-
cleanupCacheLegacy
|
|
28
|
+
SESSION_CACHE_TTL_MS
|
|
17
29
|
} from "./session-cache.js";
|
|
18
|
-
import { normalizeText, tokenize
|
|
30
|
+
import { normalizeText, tokenize } from "../utils/text.js";
|
|
31
|
+
import {
|
|
32
|
+
scoreDocument,
|
|
33
|
+
filterLowRelevance,
|
|
34
|
+
pickAnchorTerms
|
|
35
|
+
} from "./scoring.js";
|
|
19
36
|
import { buildIDFMap, getIDFWeight } from "./idf.js";
|
|
37
|
+
import {
|
|
38
|
+
reciprocalRankFusion,
|
|
39
|
+
hybridSearch,
|
|
40
|
+
searchChunks,
|
|
41
|
+
computeChunkRelevance,
|
|
42
|
+
selectRelevantChunks,
|
|
43
|
+
expandChunkMatchesWithNeighbors,
|
|
44
|
+
formatChunksForInjection
|
|
45
|
+
} from "./hybrid-search.js";
|
|
20
46
|
export {
|
|
21
47
|
SESSION_CACHE_TTL_MS,
|
|
22
48
|
SESSION_CACHE_TTL_SECONDS,
|
|
23
49
|
buildIDFMap,
|
|
24
50
|
cleanupCache,
|
|
25
|
-
cleanupCacheLegacy,
|
|
26
51
|
clearVectorIndex,
|
|
52
|
+
computeChunkRelevance,
|
|
27
53
|
deleteCachedContext,
|
|
54
|
+
expandChunkMatchesWithNeighbors,
|
|
55
|
+
filterLowRelevance,
|
|
56
|
+
formatChunksForInjection,
|
|
57
|
+
getArticleChunks,
|
|
28
58
|
getCacheAdapter,
|
|
29
59
|
getCachedContext,
|
|
30
|
-
getCachedContextSync,
|
|
31
60
|
getIDFMapForIndex,
|
|
32
61
|
getIDFWeight,
|
|
33
62
|
getSessionCacheKey,
|
|
63
|
+
hasArticleChunks,
|
|
34
64
|
hasVectorIndex,
|
|
65
|
+
hybridSearch,
|
|
66
|
+
initArticleChunks,
|
|
35
67
|
initArticleIndex,
|
|
36
68
|
initProjectIndex,
|
|
37
69
|
loadVectorIndex,
|
|
38
70
|
mergeResults,
|
|
39
71
|
normalizeText,
|
|
72
|
+
pickAnchorTerms,
|
|
73
|
+
reciprocalRankFusion,
|
|
40
74
|
rerankWithVectors,
|
|
41
75
|
scoreDocument,
|
|
76
|
+
searchArticleChunks,
|
|
42
77
|
searchArticles,
|
|
78
|
+
searchChunks,
|
|
43
79
|
searchProjects,
|
|
80
|
+
selectRelevantChunks,
|
|
44
81
|
setCacheAdapter,
|
|
45
82
|
setCachedContext,
|
|
46
|
-
setCachedContextSync,
|
|
47
83
|
tokenize
|
|
48
84
|
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { IDFMap } from "./idf.js";
|
|
2
|
+
export declare function scoreDocument(tokens: string[], doc: {
|
|
3
|
+
title: string;
|
|
4
|
+
content: string;
|
|
5
|
+
excerpt: string;
|
|
6
|
+
keyPoints: string[];
|
|
7
|
+
categories: string[];
|
|
8
|
+
tags: string[];
|
|
9
|
+
}, idfMap?: IDFMap | null): number;
|
|
10
|
+
export declare function filterLowRelevance<T extends {
|
|
11
|
+
score: number;
|
|
12
|
+
}>(results: T[], relativeThreshold?: number, minAbsoluteScore?: number): T[];
|
|
13
|
+
export declare function pickAnchorTerms(query: string, candidates: Array<{
|
|
14
|
+
title: string;
|
|
15
|
+
keyPoints: string[];
|
|
16
|
+
categories: string[];
|
|
17
|
+
}>, maxTerms?: number, minTermLength?: number): string[];
|
|
18
|
+
//# sourceMappingURL=scoring.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"scoring.d.ts","sourceRoot":"","sources":["../../src/search/scoring.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAavC,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,EAAE,EAChB,GAAG,EAAE;IACH,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,IAAI,EAAE,MAAM,EAAE,CAAC;CAChB,EACD,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,GACrB,MAAM,CAwBR;AAED,wBAAgB,kBAAkB,CAAC,CAAC,SAAS;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,EAC5D,OAAO,EAAE,CAAC,EAAE,EACZ,iBAAiB,SAAO,EACxB,gBAAgB,SAAI,GACnB,CAAC,EAAE,CAUL;AAED,wBAAgB,eAAe,CAC7B,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,KAAK,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB,CAAC,EACF,QAAQ,SAAI,EACZ,aAAa,SAAI,GAChB,MAAM,EAAE,CA2BV"}
|
|
@@ -1,22 +1,5 @@
|
|
|
1
1
|
import { getIDFWeight } from "./idf.js";
|
|
2
|
-
|
|
3
|
-
return text.toLowerCase().replace(/[^\u4e00-\u9fa5\w\s]/g, " ").replace(/\s+/g, " ").trim();
|
|
4
|
-
}
|
|
5
|
-
function tokenize(text) {
|
|
6
|
-
const normalized = normalizeText(text);
|
|
7
|
-
const parts = normalized.split(/\s+/).filter(Boolean);
|
|
8
|
-
return dedupeByContainment(parts);
|
|
9
|
-
}
|
|
10
|
-
function dedupeByContainment(terms) {
|
|
11
|
-
const unique = [...new Set(terms)];
|
|
12
|
-
const kept = [];
|
|
13
|
-
for (const term of unique.sort((a, b) => b.length - a.length)) {
|
|
14
|
-
if (!kept.some((existing) => existing.includes(term))) {
|
|
15
|
-
kept.push(term);
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
return kept;
|
|
19
|
-
}
|
|
2
|
+
import { normalizeText, tokenize } from "../utils/text.js";
|
|
20
3
|
const FIELD_WEIGHTS = {
|
|
21
4
|
title: 8,
|
|
22
5
|
keyPoints: 5,
|
|
@@ -47,11 +30,14 @@ function scoreDocument(tokens, doc, idfMap) {
|
|
|
47
30
|
return score;
|
|
48
31
|
}
|
|
49
32
|
function filterLowRelevance(results, relativeThreshold = 0.35, minAbsoluteScore = 2) {
|
|
50
|
-
if (results.length <=
|
|
33
|
+
if (results.length <= 2) return results;
|
|
51
34
|
const topScore = results[0]?.score ?? 0;
|
|
52
35
|
if (topScore <= 0) return results;
|
|
53
|
-
const threshold = Math.max(
|
|
54
|
-
|
|
36
|
+
const threshold = Math.max(
|
|
37
|
+
minAbsoluteScore,
|
|
38
|
+
topScore * (results.length > 8 ? relativeThreshold + 0.1 : relativeThreshold)
|
|
39
|
+
);
|
|
40
|
+
return results.filter((item, index) => index < 2 || item.score >= threshold);
|
|
55
41
|
}
|
|
56
42
|
function pickAnchorTerms(query, candidates, maxTerms = 2, minTermLength = 2) {
|
|
57
43
|
const terms = tokenize(query).filter((t) => t.length >= minTermLength);
|
|
@@ -59,8 +45,12 @@ function pickAnchorTerms(query, candidates, maxTerms = 2, minTermLength = 2) {
|
|
|
59
45
|
if (!candidates.length) return terms.slice(0, maxTerms);
|
|
60
46
|
const scored = terms.map((term) => {
|
|
61
47
|
let hitCount = 0;
|
|
62
|
-
for (const
|
|
63
|
-
const text = normalizeText(
|
|
48
|
+
for (const candidate of candidates) {
|
|
49
|
+
const text = normalizeText(
|
|
50
|
+
[candidate.title, ...candidate.keyPoints, ...candidate.categories].join(
|
|
51
|
+
" "
|
|
52
|
+
)
|
|
53
|
+
);
|
|
64
54
|
if (text.includes(term)) hitCount++;
|
|
65
55
|
}
|
|
66
56
|
if (hitCount <= 0) return { term, score: Number.NEGATIVE_INFINITY };
|
|
@@ -72,10 +62,7 @@ function pickAnchorTerms(query, candidates, maxTerms = 2, minTermLength = 2) {
|
|
|
72
62
|
return scored.filter((s) => Number.isFinite(s.score)).sort((a, b) => b.score - a.score).map((s) => s.term).slice(0, maxTerms);
|
|
73
63
|
}
|
|
74
64
|
export {
|
|
75
|
-
dedupeByContainment,
|
|
76
65
|
filterLowRelevance,
|
|
77
|
-
normalizeText,
|
|
78
66
|
pickAnchorTerms,
|
|
79
|
-
scoreDocument
|
|
80
|
-
tokenize
|
|
67
|
+
scoreDocument
|
|
81
68
|
};
|