@astro-minimax/ai 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/data/index.d.ts +1 -0
- package/dist/data/index.d.ts.map +1 -1
- package/dist/data/metadata-loader.d.ts +2 -2
- package/dist/data/metadata-loader.d.ts.map +1 -1
- package/dist/data/metadata-loader.js +15 -3
- package/dist/data/types.d.ts +2 -0
- package/dist/data/types.d.ts.map +1 -1
- package/dist/fact-registry/fact-matcher.d.ts +12 -0
- package/dist/fact-registry/fact-matcher.d.ts.map +1 -0
- package/dist/fact-registry/fact-matcher.js +94 -0
- package/dist/fact-registry/index.d.ts +5 -0
- package/dist/fact-registry/index.d.ts.map +1 -0
- package/dist/fact-registry/index.js +3 -0
- package/dist/fact-registry/prompt-injector.d.ts +7 -0
- package/dist/fact-registry/prompt-injector.d.ts.map +1 -0
- package/dist/fact-registry/prompt-injector.js +57 -0
- package/dist/fact-registry/registry.d.ts +10 -0
- package/dist/fact-registry/registry.d.ts.map +1 -0
- package/dist/fact-registry/registry.js +38 -0
- package/dist/fact-registry/types.d.ts +46 -0
- package/dist/fact-registry/types.d.ts.map +1 -0
- package/dist/fact-registry/types.js +5 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/prompt/dynamic-layer.d.ts.map +1 -1
- package/dist/prompt/dynamic-layer.js +6 -2
- package/dist/prompt/types.d.ts +2 -0
- package/dist/prompt/types.d.ts.map +1 -1
- package/dist/search/idf.d.ts +18 -0
- package/dist/search/idf.d.ts.map +1 -0
- package/dist/search/idf.js +31 -0
- package/dist/search/index.d.ts +5 -0
- package/dist/search/index.d.ts.map +1 -1
- package/dist/search/index.js +3 -0
- package/dist/search/search-api.d.ts.map +1 -1
- package/dist/search/search-api.js +10 -3
- package/dist/search/search-index.d.ts +7 -1
- package/dist/search/search-index.d.ts.map +1 -1
- package/dist/search/search-index.js +15 -2
- package/dist/search/search-utils.d.ts +7 -3
- package/dist/search/search-utils.d.ts.map +1 -1
- package/dist/search/search-utils.js +23 -15
- package/dist/search/vector-reranker.d.ts +38 -0
- package/dist/search/vector-reranker.d.ts.map +1 -0
- package/dist/search/vector-reranker.js +135 -0
- package/dist/server/chat-handler.d.ts.map +1 -1
- package/dist/server/chat-handler.js +8 -2
- package/dist/server/metadata-init.d.ts.map +1 -1
- package/dist/server/metadata-init.js +2 -0
- package/dist/server/types.d.ts +2 -0
- package/dist/server/types.d.ts.map +1 -1
- package/package.json +10 -3
- package/src/components/AIChatContainer.tsx +4 -3
- package/src/components/AIChatWidget.astro +1 -1
- package/src/components/ChatPanel.tsx +5 -4
- package/src/providers/mock.ts +240 -0
- package/src/server/types.ts +89 -0
- package/src/utils/i18n.ts +238 -0
package/dist/data/index.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
export { preloadMetadata, clearMetadataCache, getMetadata, getArticleSummary, getAllSummaries, getAuthorContext, getVoiceProfile, } from './metadata-loader.js';
|
|
2
2
|
export type { AISummariesFile, AuthorContextFile, VoiceProfile, LoadedMetadata, ArticleSummaryData, AuthorPost, } from './types.js';
|
|
3
|
+
export type { FactRegistryFile } from '../fact-registry/types.js';
|
|
3
4
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/data/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/data/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,WAAW,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,eAAe,GAChB,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,UAAU,GACX,MAAM,YAAY,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/data/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,WAAW,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,eAAe,GAChB,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,UAAU,GACX,MAAM,YAAY,CAAC;AACpB,YAAY,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC"}
|
|
@@ -5,11 +5,11 @@ import type { AuthorContextFile, VoiceProfile, LoadedMetadata, ArticleSummaryDat
|
|
|
5
5
|
*
|
|
6
6
|
* Example (in functions/lib/ai.ts):
|
|
7
7
|
* import summaries from '../../datas/ai-summaries.json' with { type: 'json' };
|
|
8
|
-
* preloadMetadata({ summaries, authorContext, voiceProfile });
|
|
8
|
+
* preloadMetadata({ summaries, authorContext, voiceProfile, factRegistry });
|
|
9
9
|
*/
|
|
10
10
|
export declare function preloadMetadata(data: Partial<LoadedMetadata>): void;
|
|
11
11
|
/**
|
|
12
|
-
* Clears the metadata cache (useful for testing).
|
|
12
|
+
* Clears the metadata cache and all associated sub-caches (useful for testing).
|
|
13
13
|
*/
|
|
14
14
|
export declare function clearMetadataCache(): void;
|
|
15
15
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata-loader.d.ts","sourceRoot":"","sources":["../../src/data/metadata-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAmB,iBAAiB,EAAE,YAAY,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"metadata-loader.d.ts","sourceRoot":"","sources":["../../src/data/metadata-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAmB,iBAAiB,EAAE,YAAY,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAQvH;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,IAAI,CAgBnE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAIzC;AAED;;GAEG;AACH,wBAAgB,WAAW,IAAI,cAAc,CAE5C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,kBAAkB,GAAG,SAAS,CAE9E;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,kBAAkB,CAAC,CAG9E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,iBAAiB,GAAG,IAAI,CAE3D;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,GAAG,IAAI,CAErD"}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { loadFactRegistry as loadFactRegistryCache } from '../fact-registry/registry.js';
|
|
2
|
+
import { loadVectorIndex as loadVectorIndexCache } from '../search/vector-reranker.js';
|
|
1
3
|
// Lazy-loaded, memory-cached metadata
|
|
2
4
|
let cachedMetadata = null;
|
|
3
5
|
/**
|
|
@@ -6,26 +8,36 @@ let cachedMetadata = null;
|
|
|
6
8
|
*
|
|
7
9
|
* Example (in functions/lib/ai.ts):
|
|
8
10
|
* import summaries from '../../datas/ai-summaries.json' with { type: 'json' };
|
|
9
|
-
* preloadMetadata({ summaries, authorContext, voiceProfile });
|
|
11
|
+
* preloadMetadata({ summaries, authorContext, voiceProfile, factRegistry });
|
|
10
12
|
*/
|
|
11
13
|
export function preloadMetadata(data) {
|
|
12
14
|
cachedMetadata = {
|
|
13
15
|
summaries: data.summaries ?? null,
|
|
14
16
|
authorContext: data.authorContext ?? null,
|
|
15
17
|
voiceProfile: data.voiceProfile ?? null,
|
|
18
|
+
factRegistry: data.factRegistry ?? null,
|
|
19
|
+
vectorIndex: data.vectorIndex ?? null,
|
|
16
20
|
};
|
|
21
|
+
if (cachedMetadata.factRegistry) {
|
|
22
|
+
loadFactRegistryCache(cachedMetadata.factRegistry);
|
|
23
|
+
}
|
|
24
|
+
if (cachedMetadata.vectorIndex) {
|
|
25
|
+
loadVectorIndexCache(cachedMetadata.vectorIndex);
|
|
26
|
+
}
|
|
17
27
|
}
|
|
18
28
|
/**
|
|
19
|
-
* Clears the metadata cache (useful for testing).
|
|
29
|
+
* Clears the metadata cache and all associated sub-caches (useful for testing).
|
|
20
30
|
*/
|
|
21
31
|
export function clearMetadataCache() {
|
|
22
32
|
cachedMetadata = null;
|
|
33
|
+
loadFactRegistryCache(null);
|
|
34
|
+
loadVectorIndexCache(null);
|
|
23
35
|
}
|
|
24
36
|
/**
|
|
25
37
|
* Returns the cached metadata. Must call preloadMetadata() first.
|
|
26
38
|
*/
|
|
27
39
|
export function getMetadata() {
|
|
28
|
-
return cachedMetadata ?? { summaries: null, authorContext: null, voiceProfile: null };
|
|
40
|
+
return cachedMetadata ?? { summaries: null, authorContext: null, voiceProfile: null, factRegistry: null, vectorIndex: null };
|
|
29
41
|
}
|
|
30
42
|
/**
|
|
31
43
|
* Returns the AI-generated summary for an article by its slug.
|
package/dist/data/types.d.ts
CHANGED
|
@@ -47,5 +47,7 @@ export interface LoadedMetadata {
|
|
|
47
47
|
summaries: AISummariesFile | null;
|
|
48
48
|
authorContext: AuthorContextFile | null;
|
|
49
49
|
voiceProfile: VoiceProfile | null;
|
|
50
|
+
factRegistry: import('../fact-registry/types.js').FactRegistryFile | null;
|
|
51
|
+
vectorIndex: import('../search/vector-reranker.js').VectorIndex | null;
|
|
50
52
|
}
|
|
51
53
|
//# sourceMappingURL=types.d.ts.map
|
package/dist/data/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/data/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,kBAAkB,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE;QACJ,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;CAC/C;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,aAAa,CAAC;IACvB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,eAAe,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/data/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,kBAAkB,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE;QACJ,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;CAC/C;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,aAAa,CAAC;IACvB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,eAAe,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAClC,YAAY,EAAE,OAAO,2BAA2B,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC1E,WAAW,EAAE,OAAO,8BAA8B,EAAE,WAAW,GAAG,IAAI,CAAC;CACxE"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { Fact } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Selects facts most relevant to the user's query.
|
|
4
|
+
*
|
|
5
|
+
* Strategy:
|
|
6
|
+
* 1. Always include very-high-confidence core facts (confidence >= 0.95)
|
|
7
|
+
* 2. Add category-matched facts based on query keywords
|
|
8
|
+
* 3. Add tag-matched facts for more specific queries
|
|
9
|
+
* 4. Deduplicate and cap total count
|
|
10
|
+
*/
|
|
11
|
+
export declare function matchFactsToQuery(query: string, lang?: string, maxFacts?: number): Fact[];
|
|
12
|
+
//# sourceMappingURL=fact-matcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fact-matcher.d.ts","sourceRoot":"","sources":["../../src/fact-registry/fact-matcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAgB,MAAM,YAAY,CAAC;AAwDrD;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,MAAM,EACb,IAAI,CAAC,EAAE,MAAM,EACb,QAAQ,SAAK,GACZ,IAAI,EAAE,CA2CR"}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { queryFacts } from './registry.js';
|
|
2
|
+
/**
|
|
3
|
+
* Category detection keywords — when any keyword appears in the user query,
|
|
4
|
+
* the corresponding fact category is considered relevant.
|
|
5
|
+
*/
|
|
6
|
+
const CATEGORY_KEYWORDS = {
|
|
7
|
+
author: [
|
|
8
|
+
'作者', '博主', '谁', '关于我', '自我介绍', '个人',
|
|
9
|
+
'author', 'who', 'about me', 'introduce',
|
|
10
|
+
],
|
|
11
|
+
blog: [
|
|
12
|
+
'博客', '文章', '多少', '数量', '统计', '总共', '分类', '标签', '语言',
|
|
13
|
+
'blog', 'post', 'how many', 'count', 'statistic', 'category', 'tag',
|
|
14
|
+
],
|
|
15
|
+
content: [
|
|
16
|
+
'写过', '提到', '讨论', '观点', '主题', '话题', '涵盖', '领域',
|
|
17
|
+
'wrote', 'mention', 'discuss', 'topic', 'cover', 'area', 'opinion',
|
|
18
|
+
],
|
|
19
|
+
project: [
|
|
20
|
+
'项目', '开源', '仓库', '工具', '产品',
|
|
21
|
+
'project', 'open source', 'repo', 'github', 'tool', 'product',
|
|
22
|
+
],
|
|
23
|
+
tech: [
|
|
24
|
+
'技术', '技术栈', '框架', '库', '编程语言', '前端', '后端',
|
|
25
|
+
'tech', 'stack', 'framework', 'library', 'language', 'frontend', 'backend',
|
|
26
|
+
],
|
|
27
|
+
};
|
|
28
|
+
/**
|
|
29
|
+
* Detect which fact categories are relevant to the user query.
|
|
30
|
+
*/
|
|
31
|
+
function detectRelevantCategories(query) {
|
|
32
|
+
const q = query.toLowerCase();
|
|
33
|
+
const matched = [];
|
|
34
|
+
for (const [category, keywords] of Object.entries(CATEGORY_KEYWORDS)) {
|
|
35
|
+
if (keywords.some(kw => q.includes(kw))) {
|
|
36
|
+
matched.push(category);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return matched;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Extract potential matching tags from the query by splitting into tokens.
|
|
43
|
+
*/
|
|
44
|
+
function extractQueryTags(query) {
|
|
45
|
+
const tokens = query.match(/[A-Za-z][A-Za-z0-9.+#-]{1,}|[\u4e00-\u9fa5]{2,6}/g);
|
|
46
|
+
return tokens?.map(t => t.toLowerCase()) ?? [];
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Selects facts most relevant to the user's query.
|
|
50
|
+
*
|
|
51
|
+
* Strategy:
|
|
52
|
+
* 1. Always include very-high-confidence core facts (confidence >= 0.95)
|
|
53
|
+
* 2. Add category-matched facts based on query keywords
|
|
54
|
+
* 3. Add tag-matched facts for more specific queries
|
|
55
|
+
* 4. Deduplicate and cap total count
|
|
56
|
+
*/
|
|
57
|
+
export function matchFactsToQuery(query, lang, maxFacts = 15) {
|
|
58
|
+
const categories = detectRelevantCategories(query);
|
|
59
|
+
const queryTags = extractQueryTags(query);
|
|
60
|
+
// Layer 1: always-present core facts (highest confidence)
|
|
61
|
+
const coreFacts = queryFacts({
|
|
62
|
+
minConfidence: 0.95,
|
|
63
|
+
lang,
|
|
64
|
+
limit: 5,
|
|
65
|
+
});
|
|
66
|
+
// Layer 2: category-matched facts
|
|
67
|
+
const categoryFacts = categories.length > 0
|
|
68
|
+
? queryFacts({
|
|
69
|
+
categories,
|
|
70
|
+
minConfidence: 0.7,
|
|
71
|
+
lang,
|
|
72
|
+
limit: 10,
|
|
73
|
+
})
|
|
74
|
+
: [];
|
|
75
|
+
// Layer 3: tag-matched facts (for specificity)
|
|
76
|
+
const tagFacts = queryTags.length > 0
|
|
77
|
+
? queryFacts({
|
|
78
|
+
tags: queryTags,
|
|
79
|
+
minConfidence: 0.6,
|
|
80
|
+
lang,
|
|
81
|
+
limit: 5,
|
|
82
|
+
})
|
|
83
|
+
: [];
|
|
84
|
+
// Merge with deduplication, preserving priority order
|
|
85
|
+
const seen = new Set();
|
|
86
|
+
const result = [];
|
|
87
|
+
for (const fact of [...categoryFacts, ...tagFacts, ...coreFacts]) {
|
|
88
|
+
if (!seen.has(fact.id)) {
|
|
89
|
+
seen.add(fact.id);
|
|
90
|
+
result.push(fact);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return result.slice(0, maxFacts);
|
|
94
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { loadFactRegistry, clearFactRegistry, getFactRegistry, queryFacts, } from './registry.js';
|
|
2
|
+
export { matchFactsToQuery } from './fact-matcher.js';
|
|
3
|
+
export { buildFactSection } from './prompt-injector.js';
|
|
4
|
+
export type { Fact, FactCategory, FactSource, FactRegistryFile, FactRegistryStats, FactQueryOptions, } from './types.js';
|
|
5
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/fact-registry/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,UAAU,GACX,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,YAAY,EACV,IAAI,EACJ,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,GACjB,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Fact } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Formats matched facts into a prompt section ready for injection.
|
|
4
|
+
* Groups facts by category with clear structure.
|
|
5
|
+
*/
|
|
6
|
+
export declare function buildFactSection(facts: Fact[], lang?: string): string;
|
|
7
|
+
//# sourceMappingURL=prompt-injector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt-injector.d.ts","sourceRoot":"","sources":["../../src/fact-registry/prompt-injector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAgB,MAAM,YAAY,CAAC;AAgCrD;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,IAAI,GAAE,MAAa,GAAG,MAAM,CA+B3E"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
const CATEGORY_LABELS = {
|
|
2
|
+
zh: {
|
|
3
|
+
author: '关于作者',
|
|
4
|
+
blog: '博客数据',
|
|
5
|
+
content: '内容事实',
|
|
6
|
+
project: '项目信息',
|
|
7
|
+
tech: '技术相关',
|
|
8
|
+
},
|
|
9
|
+
en: {
|
|
10
|
+
author: 'About the Author',
|
|
11
|
+
blog: 'Blog Statistics',
|
|
12
|
+
content: 'Content Facts',
|
|
13
|
+
project: 'Project Info',
|
|
14
|
+
tech: 'Tech Related',
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
const SECTION_TEXT = {
|
|
18
|
+
zh: {
|
|
19
|
+
title: '已验证事实(基于博客真实数据)',
|
|
20
|
+
instruction: '以上事实来自博客的真实数据。回答时优先使用这些已验证的事实,不要编造与之矛盾的信息。如果某个问题的答案不在已验证事实中,请如实说明。',
|
|
21
|
+
},
|
|
22
|
+
en: {
|
|
23
|
+
title: 'Verified Facts (based on real blog data)',
|
|
24
|
+
instruction: 'The above facts are derived from real blog data. Prioritize these verified facts when answering. Do not fabricate information that contradicts them. If the answer is not among verified facts, state that honestly.',
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Formats matched facts into a prompt section ready for injection.
|
|
29
|
+
* Groups facts by category with clear structure.
|
|
30
|
+
*/
|
|
31
|
+
export function buildFactSection(facts, lang = 'zh') {
|
|
32
|
+
if (!facts.length)
|
|
33
|
+
return '';
|
|
34
|
+
const l = lang === 'zh' ? 'zh' : 'en';
|
|
35
|
+
const labels = CATEGORY_LABELS[l];
|
|
36
|
+
const text = SECTION_TEXT[l];
|
|
37
|
+
// Group by category
|
|
38
|
+
const grouped = new Map();
|
|
39
|
+
for (const fact of facts) {
|
|
40
|
+
const group = grouped.get(fact.category) ?? [];
|
|
41
|
+
group.push(fact);
|
|
42
|
+
grouped.set(fact.category, group);
|
|
43
|
+
}
|
|
44
|
+
const lines = [];
|
|
45
|
+
lines.push(`## ${text.title}`);
|
|
46
|
+
for (const [category, categoryFacts] of grouped) {
|
|
47
|
+
const label = labels[category] ?? category;
|
|
48
|
+
lines.push('');
|
|
49
|
+
lines.push(`### ${label}`);
|
|
50
|
+
for (const fact of categoryFacts) {
|
|
51
|
+
lines.push(`- ${fact.statement}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
lines.push('');
|
|
55
|
+
lines.push(`> ${text.instruction}`);
|
|
56
|
+
return lines.join('\n');
|
|
57
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { Fact, FactRegistryFile, FactQueryOptions } from './types.js';
|
|
2
|
+
export declare function loadFactRegistry(data: FactRegistryFile | null): void;
|
|
3
|
+
export declare function clearFactRegistry(): void;
|
|
4
|
+
export declare function getFactRegistry(): FactRegistryFile | null;
|
|
5
|
+
/**
|
|
6
|
+
* Query facts with optional filters.
|
|
7
|
+
* Returns facts sorted by confidence (highest first).
|
|
8
|
+
*/
|
|
9
|
+
export declare function queryFacts(options?: FactQueryOptions): Fact[];
|
|
10
|
+
//# sourceMappingURL=registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/fact-registry/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAI3E,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,IAAI,GAAG,IAAI,CAEpE;AAED,wBAAgB,iBAAiB,IAAI,IAAI,CAExC;AAED,wBAAgB,eAAe,IAAI,gBAAgB,GAAG,IAAI,CAEzD;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,OAAO,GAAE,gBAAqB,GAAG,IAAI,EAAE,CAgCjE"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
let cachedRegistry = null;
|
|
2
|
+
export function loadFactRegistry(data) {
|
|
3
|
+
cachedRegistry = data;
|
|
4
|
+
}
|
|
5
|
+
export function clearFactRegistry() {
|
|
6
|
+
cachedRegistry = null;
|
|
7
|
+
}
|
|
8
|
+
export function getFactRegistry() {
|
|
9
|
+
return cachedRegistry;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Query facts with optional filters.
|
|
13
|
+
* Returns facts sorted by confidence (highest first).
|
|
14
|
+
*/
|
|
15
|
+
export function queryFacts(options = {}) {
|
|
16
|
+
if (!cachedRegistry?.facts.length)
|
|
17
|
+
return [];
|
|
18
|
+
let facts = cachedRegistry.facts;
|
|
19
|
+
if (options.categories?.length) {
|
|
20
|
+
const cats = new Set(options.categories);
|
|
21
|
+
facts = facts.filter(f => cats.has(f.category));
|
|
22
|
+
}
|
|
23
|
+
if (options.lang) {
|
|
24
|
+
facts = facts.filter(f => f.lang === options.lang || f.lang === 'all');
|
|
25
|
+
}
|
|
26
|
+
if (options.minConfidence !== undefined) {
|
|
27
|
+
facts = facts.filter(f => f.confidence >= options.minConfidence);
|
|
28
|
+
}
|
|
29
|
+
if (options.tags?.length) {
|
|
30
|
+
const tagSet = new Set(options.tags.map(t => t.toLowerCase()));
|
|
31
|
+
facts = facts.filter(f => f.tags.some(t => tagSet.has(t.toLowerCase())));
|
|
32
|
+
}
|
|
33
|
+
facts = [...facts].sort((a, b) => b.confidence - a.confidence);
|
|
34
|
+
if (options.limit && options.limit > 0) {
|
|
35
|
+
facts = facts.slice(0, options.limit);
|
|
36
|
+
}
|
|
37
|
+
return facts;
|
|
38
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fact Registry — structured, verifiable facts extracted from blog data.
|
|
3
|
+
* Injected into prompts to ground AI responses in real data and reduce hallucination.
|
|
4
|
+
*/
|
|
5
|
+
export type FactCategory = 'author' | 'blog' | 'content' | 'project' | 'tech';
|
|
6
|
+
/**
|
|
7
|
+
* How the fact was produced:
|
|
8
|
+
* - `explicit`: directly stated in blog content or configuration
|
|
9
|
+
* - `derived`: computed from blog data (counts, aggregations)
|
|
10
|
+
* - `aggregated`: synthesized from multiple posts/sources
|
|
11
|
+
*/
|
|
12
|
+
export type FactSource = 'explicit' | 'derived' | 'aggregated';
|
|
13
|
+
export interface Fact {
|
|
14
|
+
id: string;
|
|
15
|
+
category: FactCategory;
|
|
16
|
+
/** Human-readable statement in the target language */
|
|
17
|
+
statement: string;
|
|
18
|
+
/** Where this fact comes from (file, config, computation) */
|
|
19
|
+
evidence: string;
|
|
20
|
+
source: FactSource;
|
|
21
|
+
/** 0–1 reliability score; 1 = absolute certainty */
|
|
22
|
+
confidence: number;
|
|
23
|
+
/** Keywords for query matching */
|
|
24
|
+
tags: string[];
|
|
25
|
+
lang: string;
|
|
26
|
+
}
|
|
27
|
+
export interface FactRegistryFile {
|
|
28
|
+
$schema: string;
|
|
29
|
+
generatedAt: string;
|
|
30
|
+
version: number;
|
|
31
|
+
facts: Fact[];
|
|
32
|
+
stats: FactRegistryStats;
|
|
33
|
+
}
|
|
34
|
+
export interface FactRegistryStats {
|
|
35
|
+
total: number;
|
|
36
|
+
byCategory: Record<FactCategory, number>;
|
|
37
|
+
avgConfidence: number;
|
|
38
|
+
}
|
|
39
|
+
export interface FactQueryOptions {
|
|
40
|
+
categories?: FactCategory[];
|
|
41
|
+
tags?: string[];
|
|
42
|
+
minConfidence?: number;
|
|
43
|
+
lang?: string;
|
|
44
|
+
limit?: number;
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/fact-registry/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,MAAM,CAAC;AAE9E;;;;;GAKG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAE/D,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,YAAY,CAAC;IACvB,sDAAsD;IACtD,SAAS,EAAE,MAAM,CAAC;IAClB,6DAA6D;IAC7D,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,CAAC;IACnB,oDAAoD;IACpD,UAAU,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,KAAK,EAAE,iBAAiB,CAAC;CAC1B;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACzC,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,YAAY,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
|
package/dist/index.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ export * from './search/index.js';
|
|
|
14
14
|
export * from './intelligence/index.js';
|
|
15
15
|
export * from './prompt/index.js';
|
|
16
16
|
export * from './data/index.js';
|
|
17
|
+
export * from './fact-registry/index.js';
|
|
17
18
|
export * from './stream/index.js';
|
|
18
19
|
export * from './server/index.js';
|
|
19
20
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,cAAc,sBAAsB,CAAC;AAGrC,cAAc,6BAA6B,CAAC;AAG5C,cAAc,uBAAuB,CAAC;AAGtC,cAAc,kBAAkB,CAAC;AAGjC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AAGxC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,iBAAiB,CAAC;AAGhC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,mBAAmB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,cAAc,sBAAsB,CAAC;AAGrC,cAAc,6BAA6B,CAAC;AAG5C,cAAc,uBAAuB,CAAC;AAGtC,cAAc,kBAAkB,CAAC;AAGjC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AAGxC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,iBAAiB,CAAC;AAGhC,cAAc,0BAA0B,CAAC;AAGzC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,mBAAmB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -22,6 +22,8 @@ export * from './intelligence/index.js';
|
|
|
22
22
|
export * from './prompt/index.js';
|
|
23
23
|
// Build-time metadata loading
|
|
24
24
|
export * from './data/index.js';
|
|
25
|
+
// Fact Registry: verified facts for hallucination reduction
|
|
26
|
+
export * from './fact-registry/index.js';
|
|
25
27
|
// Stream utilities
|
|
26
28
|
export * from './stream/index.js';
|
|
27
29
|
// Server-side API handlers (chat handler, metadata init)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"dynamic-layer.d.ts","sourceRoot":"","sources":["../../src/prompt/dynamic-layer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAwBrD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,
|
|
1
|
+
{"version":3,"file":"dynamic-layer.d.ts","sourceRoot":"","sources":["../../src/prompt/dynamic-layer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAwBrD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+CpE"}
|
|
@@ -24,10 +24,10 @@ const LABELS = {
|
|
|
24
24
|
* Built fresh on every chat request.
|
|
25
25
|
*/
|
|
26
26
|
export function buildDynamicLayer(config) {
|
|
27
|
-
const { userQuery, articles, projects, evidenceSection } = config;
|
|
27
|
+
const { userQuery, articles, projects, evidenceSection, factSection } = config;
|
|
28
28
|
const lang = getLang(config.lang);
|
|
29
29
|
const l = LABELS[lang];
|
|
30
|
-
if (!articles.length && !projects.length)
|
|
30
|
+
if (!articles.length && !projects.length && !factSection)
|
|
31
31
|
return '';
|
|
32
32
|
const lines = [];
|
|
33
33
|
lines.push(`## ${l.relatedContent}`);
|
|
@@ -54,6 +54,10 @@ export function buildDynamicLayer(config) {
|
|
|
54
54
|
}
|
|
55
55
|
lines.push('');
|
|
56
56
|
}
|
|
57
|
+
if (factSection) {
|
|
58
|
+
lines.push(factSection);
|
|
59
|
+
lines.push('');
|
|
60
|
+
}
|
|
57
61
|
if (evidenceSection) {
|
|
58
62
|
lines.push(evidenceSection);
|
|
59
63
|
}
|
package/dist/prompt/types.d.ts
CHANGED
|
@@ -17,6 +17,8 @@ export interface DynamicLayerConfig {
|
|
|
17
17
|
articles: ArticleContext[];
|
|
18
18
|
projects: ProjectContext[];
|
|
19
19
|
evidenceSection?: string;
|
|
20
|
+
/** Pre-built verified-facts prompt section from Fact Registry */
|
|
21
|
+
factSection?: string;
|
|
20
22
|
lang?: string;
|
|
21
23
|
}
|
|
22
24
|
export interface PromptBuildConfig {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/prompt/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAExE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,qBAAqB;IACpC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,qBAAqB,CAAC;IAClC,OAAO,EAAE,kBAAkB,CAAC;CAC7B"}
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/prompt/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAExE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,qBAAqB;IACpC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,qBAAqB,CAAC;IAClC,OAAO,EAAE,kBAAkB,CAAC;CAC7B"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { IndexedDocument } from './types.js';
|
|
2
|
+
export interface IDFMap {
|
|
3
|
+
/** term → IDF score (log-scaled) */
|
|
4
|
+
weights: Map<string, number>;
|
|
5
|
+
/** Total document count used for IDF computation */
|
|
6
|
+
docCount: number;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Builds an IDF (Inverse Document Frequency) map from indexed documents.
|
|
10
|
+
* Terms appearing in many documents get lower scores; rare terms get higher scores.
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildIDFMap(documents: IndexedDocument[]): IDFMap;
|
|
13
|
+
/**
|
|
14
|
+
* Returns the IDF weight for a token. Defaults to a high value for unknown
|
|
15
|
+
* tokens (they are very rare, so should score higher than average).
|
|
16
|
+
*/
|
|
17
|
+
export declare function getIDFWeight(idfMap: IDFMap | null, token: string): number;
|
|
18
|
+
//# sourceMappingURL=idf.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"idf.d.ts","sourceRoot":"","sources":["../../src/search/idf.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAElD,MAAM,WAAW,MAAM;IACrB,oCAAoC;IACpC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,oDAAoD;IACpD,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,SAAS,EAAE,eAAe,EAAE,GAAG,MAAM,CAmBhE;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAGzE"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Builds an IDF (Inverse Document Frequency) map from indexed documents.
|
|
3
|
+
* Terms appearing in many documents get lower scores; rare terms get higher scores.
|
|
4
|
+
*/
|
|
5
|
+
export function buildIDFMap(documents) {
|
|
6
|
+
const N = documents.length;
|
|
7
|
+
if (N === 0)
|
|
8
|
+
return { weights: new Map(), docCount: 0 };
|
|
9
|
+
const df = new Map();
|
|
10
|
+
for (const doc of documents) {
|
|
11
|
+
const uniqueTokens = new Set(doc.tokens);
|
|
12
|
+
for (const token of uniqueTokens) {
|
|
13
|
+
df.set(token, (df.get(token) || 0) + 1);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
const weights = new Map();
|
|
17
|
+
for (const [term, count] of df) {
|
|
18
|
+
// Smooth IDF: log(N / (df + 1)) + 1 — ensures all terms have positive weight
|
|
19
|
+
weights.set(term, Math.log(N / (count + 1)) + 1);
|
|
20
|
+
}
|
|
21
|
+
return { weights, docCount: N };
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Returns the IDF weight for a token. Defaults to a high value for unknown
|
|
25
|
+
* tokens (they are very rare, so should score higher than average).
|
|
26
|
+
*/
|
|
27
|
+
export function getIDFWeight(idfMap, token) {
|
|
28
|
+
if (!idfMap)
|
|
29
|
+
return 1;
|
|
30
|
+
return idfMap.weights.get(token) ?? Math.log(idfMap.docCount + 1) + 1;
|
|
31
|
+
}
|
package/dist/search/index.d.ts
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
export { initArticleIndex, initProjectIndex, searchArticles, searchProjects, mergeResults } from './search-api.js';
|
|
2
|
+
export { getIDFMapForIndex } from './search-index.js';
|
|
3
|
+
export { loadVectorIndex, clearVectorIndex, hasVectorIndex, rerankWithVectors } from './vector-reranker.js';
|
|
4
|
+
export type { VectorIndex, VectorChunk } from './vector-reranker.js';
|
|
2
5
|
export { getSessionCacheKey, getCachedContext, setCachedContext, deleteCachedContext, setCacheAdapter, getCacheAdapter, cleanupCache, SESSION_CACHE_TTL_SECONDS, SESSION_CACHE_TTL_MS, getCachedContextSync, setCachedContextSync, cleanupCacheLegacy, } from './session-cache.js';
|
|
3
6
|
export { normalizeText, tokenize, scoreDocument } from './search-utils.js';
|
|
7
|
+
export { buildIDFMap, getIDFWeight } from './idf.js';
|
|
8
|
+
export type { IDFMap } from './idf.js';
|
|
4
9
|
export type { SearchDocument, ArticleContext, ProjectContext, CachedSearchContext, SearchResult } from './types.js';
|
|
5
10
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,cAAc,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AACnH,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,YAAY,EACZ,yBAAyB,EACzB,oBAAoB,EACpB,oBAAoB,EACpB,oBAAoB,EACpB,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAC3E,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,cAAc,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,cAAc,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AACnH,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAC5G,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,YAAY,EACZ,yBAAyB,EACzB,oBAAoB,EACpB,oBAAoB,EACpB,oBAAoB,EACpB,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACrD,YAAY,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,cAAc,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/search/index.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
1
|
export { initArticleIndex, initProjectIndex, searchArticles, searchProjects, mergeResults } from './search-api.js';
|
|
2
|
+
export { getIDFMapForIndex } from './search-index.js';
|
|
3
|
+
export { loadVectorIndex, clearVectorIndex, hasVectorIndex, rerankWithVectors } from './vector-reranker.js';
|
|
2
4
|
export { getSessionCacheKey, getCachedContext, setCachedContext, deleteCachedContext, setCacheAdapter, getCacheAdapter, cleanupCache, SESSION_CACHE_TTL_SECONDS, SESSION_CACHE_TTL_MS, getCachedContextSync, setCachedContextSync, cleanupCacheLegacy, } from './session-cache.js';
|
|
3
5
|
export { normalizeText, tokenize, scoreDocument } from './search-utils.js';
|
|
6
|
+
export { buildIDFMap, getIDFWeight } from './idf.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search-api.d.ts","sourceRoot":"","sources":["../../src/search/search-api.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"search-api.d.ts","sourceRoot":"","sources":["../../src/search/search-api.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,cAAc,EAAiC,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAYhH;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,IAAI,CAElE;AAED,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,IAAI,CAElE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GAC9D,cAAc,EAAE,CA6ClB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GACjC,cAAc,EAAE,CAgBlB;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAUzF"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { scoreDocument, filterLowRelevance, tokenize, pickAnchorTerms, normalizeText } from './search-utils.js';
|
|
2
|
-
import { buildSearchIndex } from './search-index.js';
|
|
2
|
+
import { buildSearchIndex, getIDFMapForIndex } from './search-index.js';
|
|
3
|
+
import { hasVectorIndex, rerankWithVectors } from './vector-reranker.js';
|
|
3
4
|
// Lazy-initialized, cached indexes
|
|
4
5
|
let articleIndex = null;
|
|
5
6
|
let projectIndex = null;
|
|
@@ -38,7 +39,7 @@ export function searchArticles(query, options = {}) {
|
|
|
38
39
|
const isDeepHit = options.enableDeepContent &&
|
|
39
40
|
topScore >= DEEP_CONTENT_SCORE_THRESHOLD &&
|
|
40
41
|
topScore > secondScore * 1.5;
|
|
41
|
-
|
|
42
|
+
let articles = results.map((result, index) => {
|
|
42
43
|
const baseUrl = options.siteUrl ?? '';
|
|
43
44
|
const url = result.url.startsWith('http') ? result.url : `${baseUrl}${result.url}`;
|
|
44
45
|
const fullContent = isDeepHit && index === 0 && result.content
|
|
@@ -55,6 +56,11 @@ export function searchArticles(query, options = {}) {
|
|
|
55
56
|
score: result.score,
|
|
56
57
|
};
|
|
57
58
|
});
|
|
59
|
+
// Optional: rerank using TF-IDF vector cosine similarity
|
|
60
|
+
if (hasVectorIndex() && articles.length > 1) {
|
|
61
|
+
articles = rerankWithVectors(query, articles);
|
|
62
|
+
}
|
|
63
|
+
return articles;
|
|
58
64
|
}
|
|
59
65
|
/**
|
|
60
66
|
* Searches for projects related to the query.
|
|
@@ -92,8 +98,9 @@ export function mergeResults(primary, secondary) {
|
|
|
92
98
|
}
|
|
93
99
|
// ---- Internals ----
|
|
94
100
|
function scoreDocs(index, tokens, limit) {
|
|
101
|
+
const idfMap = getIDFMapForIndex();
|
|
95
102
|
return index
|
|
96
|
-
.map(doc => ({ ...doc, score: scoreDocument(tokens, doc) }))
|
|
103
|
+
.map(doc => ({ ...doc, score: scoreDocument(tokens, doc, idfMap) }))
|
|
97
104
|
.filter(doc => doc.score > 0)
|
|
98
105
|
.sort((a, b) => b.score - a.score)
|
|
99
106
|
.slice(0, limit);
|
|
@@ -1,6 +1,12 @@
|
|
|
1
|
+
import { type IDFMap } from './idf.js';
|
|
1
2
|
import type { SearchDocument, IndexedDocument } from './types.js';
|
|
2
3
|
/**
|
|
3
|
-
* Builds an in-memory inverted index from a list of documents
|
|
4
|
+
* Builds an in-memory inverted index from a list of documents
|
|
5
|
+
* and computes IDF weights across the corpus.
|
|
6
|
+
*
|
|
7
|
+
* IDF map is only updated when the document set is non-empty,
|
|
8
|
+
* preventing an empty index (e.g. projects) from wiping article IDF.
|
|
4
9
|
*/
|
|
5
10
|
export declare function buildSearchIndex(documents: SearchDocument[]): IndexedDocument[];
|
|
11
|
+
export declare function getIDFMapForIndex(): IDFMap | null;
|
|
6
12
|
//# sourceMappingURL=search-index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search-index.d.ts","sourceRoot":"","sources":["../../src/search/search-index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"search-index.d.ts","sourceRoot":"","sources":["../../src/search/search-index.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,MAAM,EAAE,MAAM,UAAU,CAAC;AACpD,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAIlE;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,eAAe,EAAE,CAU/E;AAED,wBAAgB,iBAAiB,IAAI,MAAM,GAAG,IAAI,CAEjD"}
|