@astro-minimax/ai 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/data/index.d.ts +1 -0
  2. package/dist/data/index.d.ts.map +1 -1
  3. package/dist/data/metadata-loader.d.ts +2 -2
  4. package/dist/data/metadata-loader.d.ts.map +1 -1
  5. package/dist/data/metadata-loader.js +15 -3
  6. package/dist/data/types.d.ts +2 -0
  7. package/dist/data/types.d.ts.map +1 -1
  8. package/dist/fact-registry/fact-matcher.d.ts +12 -0
  9. package/dist/fact-registry/fact-matcher.d.ts.map +1 -0
  10. package/dist/fact-registry/fact-matcher.js +94 -0
  11. package/dist/fact-registry/index.d.ts +5 -0
  12. package/dist/fact-registry/index.d.ts.map +1 -0
  13. package/dist/fact-registry/index.js +3 -0
  14. package/dist/fact-registry/prompt-injector.d.ts +7 -0
  15. package/dist/fact-registry/prompt-injector.d.ts.map +1 -0
  16. package/dist/fact-registry/prompt-injector.js +57 -0
  17. package/dist/fact-registry/registry.d.ts +10 -0
  18. package/dist/fact-registry/registry.d.ts.map +1 -0
  19. package/dist/fact-registry/registry.js +38 -0
  20. package/dist/fact-registry/types.d.ts +46 -0
  21. package/dist/fact-registry/types.d.ts.map +1 -0
  22. package/dist/fact-registry/types.js +5 -0
  23. package/dist/index.d.ts +1 -0
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +2 -0
  26. package/dist/prompt/dynamic-layer.d.ts.map +1 -1
  27. package/dist/prompt/dynamic-layer.js +6 -2
  28. package/dist/prompt/types.d.ts +2 -0
  29. package/dist/prompt/types.d.ts.map +1 -1
  30. package/dist/search/idf.d.ts +18 -0
  31. package/dist/search/idf.d.ts.map +1 -0
  32. package/dist/search/idf.js +31 -0
  33. package/dist/search/index.d.ts +5 -0
  34. package/dist/search/index.d.ts.map +1 -1
  35. package/dist/search/index.js +3 -0
  36. package/dist/search/search-api.d.ts.map +1 -1
  37. package/dist/search/search-api.js +10 -3
  38. package/dist/search/search-index.d.ts +7 -1
  39. package/dist/search/search-index.d.ts.map +1 -1
  40. package/dist/search/search-index.js +15 -2
  41. package/dist/search/search-utils.d.ts +7 -3
  42. package/dist/search/search-utils.d.ts.map +1 -1
  43. package/dist/search/search-utils.js +23 -15
  44. package/dist/search/vector-reranker.d.ts +38 -0
  45. package/dist/search/vector-reranker.d.ts.map +1 -0
  46. package/dist/search/vector-reranker.js +135 -0
  47. package/dist/server/chat-handler.d.ts.map +1 -1
  48. package/dist/server/chat-handler.js +8 -2
  49. package/dist/server/metadata-init.d.ts.map +1 -1
  50. package/dist/server/metadata-init.js +2 -0
  51. package/dist/server/types.d.ts +2 -0
  52. package/dist/server/types.d.ts.map +1 -1
  53. package/package.json +6 -2
  54. package/src/components/AIChatWidget.astro +1 -1
@@ -1,3 +1,4 @@
1
1
  export { preloadMetadata, clearMetadataCache, getMetadata, getArticleSummary, getAllSummaries, getAuthorContext, getVoiceProfile, } from './metadata-loader.js';
2
2
  export type { AISummariesFile, AuthorContextFile, VoiceProfile, LoadedMetadata, ArticleSummaryData, AuthorPost, } from './types.js';
3
+ export type { FactRegistryFile } from '../fact-registry/types.js';
3
4
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/data/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,WAAW,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,eAAe,GAChB,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,UAAU,GACX,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/data/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,WAAW,EACX,iBAAiB,EACjB,eAAe,EACf,gBAAgB,EAChB,eAAe,GAChB,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,eAAe,EACf,iBAAiB,EACjB,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,UAAU,GACX,MAAM,YAAY,CAAC;AACpB,YAAY,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC"}
@@ -5,11 +5,11 @@ import type { AuthorContextFile, VoiceProfile, LoadedMetadata, ArticleSummaryDat
5
5
  *
6
6
  * Example (in functions/lib/ai.ts):
7
7
  * import summaries from '../../datas/ai-summaries.json' with { type: 'json' };
8
- * preloadMetadata({ summaries, authorContext, voiceProfile });
8
+ * preloadMetadata({ summaries, authorContext, voiceProfile, factRegistry });
9
9
  */
10
10
  export declare function preloadMetadata(data: Partial<LoadedMetadata>): void;
11
11
  /**
12
- * Clears the metadata cache (useful for testing).
12
+ * Clears the metadata cache and all associated sub-caches (useful for testing).
13
13
  */
14
14
  export declare function clearMetadataCache(): void;
15
15
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"metadata-loader.d.ts","sourceRoot":"","sources":["../../src/data/metadata-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAmB,iBAAiB,EAAE,YAAY,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAKvH;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,IAAI,CAMnE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAEzC;AAED;;GAEG;AACH,wBAAgB,WAAW,IAAI,cAAc,CAE5C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,kBAAkB,GAAG,SAAS,CAE9E;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,kBAAkB,CAAC,CAG9E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,iBAAiB,GAAG,IAAI,CAE3D;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,GAAG,IAAI,CAErD"}
1
+ {"version":3,"file":"metadata-loader.d.ts","sourceRoot":"","sources":["../../src/data/metadata-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAmB,iBAAiB,EAAE,YAAY,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAQvH;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,IAAI,CAgBnE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,IAAI,CAIzC;AAED;;GAEG;AACH,wBAAgB,WAAW,IAAI,cAAc,CAE5C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,kBAAkB,GAAG,SAAS,CAE9E;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,kBAAkB,CAAC,CAG9E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,iBAAiB,GAAG,IAAI,CAE3D;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,GAAG,IAAI,CAErD"}
@@ -1,3 +1,5 @@
1
+ import { loadFactRegistry as loadFactRegistryCache } from '../fact-registry/registry.js';
2
+ import { loadVectorIndex as loadVectorIndexCache } from '../search/vector-reranker.js';
1
3
  // Lazy-loaded, memory-cached metadata
2
4
  let cachedMetadata = null;
3
5
  /**
@@ -6,26 +8,36 @@ let cachedMetadata = null;
6
8
  *
7
9
  * Example (in functions/lib/ai.ts):
8
10
  * import summaries from '../../datas/ai-summaries.json' with { type: 'json' };
9
- * preloadMetadata({ summaries, authorContext, voiceProfile });
11
+ * preloadMetadata({ summaries, authorContext, voiceProfile, factRegistry });
10
12
  */
11
13
  export function preloadMetadata(data) {
12
14
  cachedMetadata = {
13
15
  summaries: data.summaries ?? null,
14
16
  authorContext: data.authorContext ?? null,
15
17
  voiceProfile: data.voiceProfile ?? null,
18
+ factRegistry: data.factRegistry ?? null,
19
+ vectorIndex: data.vectorIndex ?? null,
16
20
  };
21
+ if (cachedMetadata.factRegistry) {
22
+ loadFactRegistryCache(cachedMetadata.factRegistry);
23
+ }
24
+ if (cachedMetadata.vectorIndex) {
25
+ loadVectorIndexCache(cachedMetadata.vectorIndex);
26
+ }
17
27
  }
18
28
  /**
19
- * Clears the metadata cache (useful for testing).
29
+ * Clears the metadata cache and all associated sub-caches (useful for testing).
20
30
  */
21
31
  export function clearMetadataCache() {
22
32
  cachedMetadata = null;
33
+ loadFactRegistryCache(null);
34
+ loadVectorIndexCache(null);
23
35
  }
24
36
  /**
25
37
  * Returns the cached metadata. Must call preloadMetadata() first.
26
38
  */
27
39
  export function getMetadata() {
28
- return cachedMetadata ?? { summaries: null, authorContext: null, voiceProfile: null };
40
+ return cachedMetadata ?? { summaries: null, authorContext: null, voiceProfile: null, factRegistry: null, vectorIndex: null };
29
41
  }
30
42
  /**
31
43
  * Returns the AI-generated summary for an article by its slug.
@@ -47,5 +47,7 @@ export interface LoadedMetadata {
47
47
  summaries: AISummariesFile | null;
48
48
  authorContext: AuthorContextFile | null;
49
49
  voiceProfile: VoiceProfile | null;
50
+ factRegistry: import('../fact-registry/types.js').FactRegistryFile | null;
51
+ vectorIndex: import('../search/vector-reranker.js').VectorIndex | null;
50
52
  }
51
53
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/data/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,kBAAkB,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE;QACJ,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;CAC/C;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,aAAa,CAAC;IACvB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,eAAe,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;CACnC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/data/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,kBAAkB,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE;QACJ,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,MAAM,CAAC;QACd,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;CAC/C;AAED,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE,aAAa,CAAC;IACvB,KAAK,EAAE,UAAU,EAAE,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,eAAe,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAClC,YAAY,EAAE,OAAO,2BAA2B,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC1E,WAAW,EAAE,OAAO,8BAA8B,EAAE,WAAW,GAAG,IAAI,CAAC;CACxE"}
@@ -0,0 +1,12 @@
1
+ import type { Fact } from './types.js';
2
+ /**
3
+ * Selects facts most relevant to the user's query.
4
+ *
5
+ * Strategy:
6
+ * 1. Always include very-high-confidence core facts (confidence >= 0.95)
7
+ * 2. Add category-matched facts based on query keywords
8
+ * 3. Add tag-matched facts for more specific queries
9
+ * 4. Deduplicate and cap total count
10
+ */
11
+ export declare function matchFactsToQuery(query: string, lang?: string, maxFacts?: number): Fact[];
12
+ //# sourceMappingURL=fact-matcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fact-matcher.d.ts","sourceRoot":"","sources":["../../src/fact-registry/fact-matcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAgB,MAAM,YAAY,CAAC;AAwDrD;;;;;;;;GAQG;AACH,wBAAgB,iBAAiB,CAC/B,KAAK,EAAE,MAAM,EACb,IAAI,CAAC,EAAE,MAAM,EACb,QAAQ,SAAK,GACZ,IAAI,EAAE,CA2CR"}
@@ -0,0 +1,94 @@
1
+ import { queryFacts } from './registry.js';
2
+ /**
3
+ * Category detection keywords — when any keyword appears in the user query,
4
+ * the corresponding fact category is considered relevant.
5
+ */
6
+ const CATEGORY_KEYWORDS = {
7
+ author: [
8
+ '作者', '博主', '谁', '关于我', '自我介绍', '个人',
9
+ 'author', 'who', 'about me', 'introduce',
10
+ ],
11
+ blog: [
12
+ '博客', '文章', '多少', '数量', '统计', '总共', '分类', '标签', '语言',
13
+ 'blog', 'post', 'how many', 'count', 'statistic', 'category', 'tag',
14
+ ],
15
+ content: [
16
+ '写过', '提到', '讨论', '观点', '主题', '话题', '涵盖', '领域',
17
+ 'wrote', 'mention', 'discuss', 'topic', 'cover', 'area', 'opinion',
18
+ ],
19
+ project: [
20
+ '项目', '开源', '仓库', '工具', '产品',
21
+ 'project', 'open source', 'repo', 'github', 'tool', 'product',
22
+ ],
23
+ tech: [
24
+ '技术', '技术栈', '框架', '库', '编程语言', '前端', '后端',
25
+ 'tech', 'stack', 'framework', 'library', 'language', 'frontend', 'backend',
26
+ ],
27
+ };
28
+ /**
29
+ * Detect which fact categories are relevant to the user query.
30
+ */
31
+ function detectRelevantCategories(query) {
32
+ const q = query.toLowerCase();
33
+ const matched = [];
34
+ for (const [category, keywords] of Object.entries(CATEGORY_KEYWORDS)) {
35
+ if (keywords.some(kw => q.includes(kw))) {
36
+ matched.push(category);
37
+ }
38
+ }
39
+ return matched;
40
+ }
41
+ /**
42
+ * Extract potential matching tags from the query by splitting into tokens.
43
+ */
44
+ function extractQueryTags(query) {
45
+ const tokens = query.match(/[A-Za-z][A-Za-z0-9.+#-]{1,}|[\u4e00-\u9fa5]{2,6}/g);
46
+ return tokens?.map(t => t.toLowerCase()) ?? [];
47
+ }
48
+ /**
49
+ * Selects facts most relevant to the user's query.
50
+ *
51
+ * Strategy:
52
+ * 1. Always include very-high-confidence core facts (confidence >= 0.95)
53
+ * 2. Add category-matched facts based on query keywords
54
+ * 3. Add tag-matched facts for more specific queries
55
+ * 4. Deduplicate and cap total count
56
+ */
57
+ export function matchFactsToQuery(query, lang, maxFacts = 15) {
58
+ const categories = detectRelevantCategories(query);
59
+ const queryTags = extractQueryTags(query);
60
+ // Layer 1: always-present core facts (highest confidence)
61
+ const coreFacts = queryFacts({
62
+ minConfidence: 0.95,
63
+ lang,
64
+ limit: 5,
65
+ });
66
+ // Layer 2: category-matched facts
67
+ const categoryFacts = categories.length > 0
68
+ ? queryFacts({
69
+ categories,
70
+ minConfidence: 0.7,
71
+ lang,
72
+ limit: 10,
73
+ })
74
+ : [];
75
+ // Layer 3: tag-matched facts (for specificity)
76
+ const tagFacts = queryTags.length > 0
77
+ ? queryFacts({
78
+ tags: queryTags,
79
+ minConfidence: 0.6,
80
+ lang,
81
+ limit: 5,
82
+ })
83
+ : [];
84
+ // Merge with deduplication, preserving priority order
85
+ const seen = new Set();
86
+ const result = [];
87
+ for (const fact of [...categoryFacts, ...tagFacts, ...coreFacts]) {
88
+ if (!seen.has(fact.id)) {
89
+ seen.add(fact.id);
90
+ result.push(fact);
91
+ }
92
+ }
93
+ return result.slice(0, maxFacts);
94
+ }
@@ -0,0 +1,5 @@
1
+ export { loadFactRegistry, clearFactRegistry, getFactRegistry, queryFacts, } from './registry.js';
2
+ export { matchFactsToQuery } from './fact-matcher.js';
3
+ export { buildFactSection } from './prompt-injector.js';
4
+ export type { Fact, FactCategory, FactSource, FactRegistryFile, FactRegistryStats, FactQueryOptions, } from './types.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/fact-registry/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,UAAU,GACX,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,YAAY,EACV,IAAI,EACJ,YAAY,EACZ,UAAU,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,GACjB,MAAM,YAAY,CAAC"}
@@ -0,0 +1,3 @@
1
+ export { loadFactRegistry, clearFactRegistry, getFactRegistry, queryFacts, } from './registry.js';
2
+ export { matchFactsToQuery } from './fact-matcher.js';
3
+ export { buildFactSection } from './prompt-injector.js';
@@ -0,0 +1,7 @@
1
+ import type { Fact } from './types.js';
2
+ /**
3
+ * Formats matched facts into a prompt section ready for injection.
4
+ * Groups facts by category with clear structure.
5
+ */
6
+ export declare function buildFactSection(facts: Fact[], lang?: string): string;
7
+ //# sourceMappingURL=prompt-injector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt-injector.d.ts","sourceRoot":"","sources":["../../src/fact-registry/prompt-injector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAgB,MAAM,YAAY,CAAC;AAgCrD;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,IAAI,GAAE,MAAa,GAAG,MAAM,CA+B3E"}
@@ -0,0 +1,57 @@
1
+ const CATEGORY_LABELS = {
2
+ zh: {
3
+ author: '关于作者',
4
+ blog: '博客数据',
5
+ content: '内容事实',
6
+ project: '项目信息',
7
+ tech: '技术相关',
8
+ },
9
+ en: {
10
+ author: 'About the Author',
11
+ blog: 'Blog Statistics',
12
+ content: 'Content Facts',
13
+ project: 'Project Info',
14
+ tech: 'Tech Related',
15
+ },
16
+ };
17
+ const SECTION_TEXT = {
18
+ zh: {
19
+ title: '已验证事实(基于博客真实数据)',
20
+ instruction: '以上事实来自博客的真实数据。回答时优先使用这些已验证的事实,不要编造与之矛盾的信息。如果某个问题的答案不在已验证事实中,请如实说明。',
21
+ },
22
+ en: {
23
+ title: 'Verified Facts (based on real blog data)',
24
+ instruction: 'The above facts are derived from real blog data. Prioritize these verified facts when answering. Do not fabricate information that contradicts them. If the answer is not among verified facts, state that honestly.',
25
+ },
26
+ };
27
+ /**
28
+ * Formats matched facts into a prompt section ready for injection.
29
+ * Groups facts by category with clear structure.
30
+ */
31
+ export function buildFactSection(facts, lang = 'zh') {
32
+ if (!facts.length)
33
+ return '';
34
+ const l = lang === 'zh' ? 'zh' : 'en';
35
+ const labels = CATEGORY_LABELS[l];
36
+ const text = SECTION_TEXT[l];
37
+ // Group by category
38
+ const grouped = new Map();
39
+ for (const fact of facts) {
40
+ const group = grouped.get(fact.category) ?? [];
41
+ group.push(fact);
42
+ grouped.set(fact.category, group);
43
+ }
44
+ const lines = [];
45
+ lines.push(`## ${text.title}`);
46
+ for (const [category, categoryFacts] of grouped) {
47
+ const label = labels[category] ?? category;
48
+ lines.push('');
49
+ lines.push(`### ${label}`);
50
+ for (const fact of categoryFacts) {
51
+ lines.push(`- ${fact.statement}`);
52
+ }
53
+ }
54
+ lines.push('');
55
+ lines.push(`> ${text.instruction}`);
56
+ return lines.join('\n');
57
+ }
@@ -0,0 +1,10 @@
1
+ import type { Fact, FactRegistryFile, FactQueryOptions } from './types.js';
2
+ export declare function loadFactRegistry(data: FactRegistryFile | null): void;
3
+ export declare function clearFactRegistry(): void;
4
+ export declare function getFactRegistry(): FactRegistryFile | null;
5
+ /**
6
+ * Query facts with optional filters.
7
+ * Returns facts sorted by confidence (highest first).
8
+ */
9
+ export declare function queryFacts(options?: FactQueryOptions): Fact[];
10
+ //# sourceMappingURL=registry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/fact-registry/registry.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAI3E,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,IAAI,GAAG,IAAI,CAEpE;AAED,wBAAgB,iBAAiB,IAAI,IAAI,CAExC;AAED,wBAAgB,eAAe,IAAI,gBAAgB,GAAG,IAAI,CAEzD;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,OAAO,GAAE,gBAAqB,GAAG,IAAI,EAAE,CAgCjE"}
@@ -0,0 +1,38 @@
1
+ let cachedRegistry = null;
2
+ export function loadFactRegistry(data) {
3
+ cachedRegistry = data;
4
+ }
5
+ export function clearFactRegistry() {
6
+ cachedRegistry = null;
7
+ }
8
+ export function getFactRegistry() {
9
+ return cachedRegistry;
10
+ }
11
+ /**
12
+ * Query facts with optional filters.
13
+ * Returns facts sorted by confidence (highest first).
14
+ */
15
+ export function queryFacts(options = {}) {
16
+ if (!cachedRegistry?.facts.length)
17
+ return [];
18
+ let facts = cachedRegistry.facts;
19
+ if (options.categories?.length) {
20
+ const cats = new Set(options.categories);
21
+ facts = facts.filter(f => cats.has(f.category));
22
+ }
23
+ if (options.lang) {
24
+ facts = facts.filter(f => f.lang === options.lang || f.lang === 'all');
25
+ }
26
+ if (options.minConfidence !== undefined) {
27
+ facts = facts.filter(f => f.confidence >= options.minConfidence);
28
+ }
29
+ if (options.tags?.length) {
30
+ const tagSet = new Set(options.tags.map(t => t.toLowerCase()));
31
+ facts = facts.filter(f => f.tags.some(t => tagSet.has(t.toLowerCase())));
32
+ }
33
+ facts = [...facts].sort((a, b) => b.confidence - a.confidence);
34
+ if (options.limit && options.limit > 0) {
35
+ facts = facts.slice(0, options.limit);
36
+ }
37
+ return facts;
38
+ }
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Fact Registry — structured, verifiable facts extracted from blog data.
3
+ * Injected into prompts to ground AI responses in real data and reduce hallucination.
4
+ */
5
+ export type FactCategory = 'author' | 'blog' | 'content' | 'project' | 'tech';
6
+ /**
7
+ * How the fact was produced:
8
+ * - `explicit`: directly stated in blog content or configuration
9
+ * - `derived`: computed from blog data (counts, aggregations)
10
+ * - `aggregated`: synthesized from multiple posts/sources
11
+ */
12
+ export type FactSource = 'explicit' | 'derived' | 'aggregated';
13
+ export interface Fact {
14
+ id: string;
15
+ category: FactCategory;
16
+ /** Human-readable statement in the target language */
17
+ statement: string;
18
+ /** Where this fact comes from (file, config, computation) */
19
+ evidence: string;
20
+ source: FactSource;
21
+ /** 0–1 reliability score; 1 = absolute certainty */
22
+ confidence: number;
23
+ /** Keywords for query matching */
24
+ tags: string[];
25
+ lang: string;
26
+ }
27
+ export interface FactRegistryFile {
28
+ $schema: string;
29
+ generatedAt: string;
30
+ version: number;
31
+ facts: Fact[];
32
+ stats: FactRegistryStats;
33
+ }
34
+ export interface FactRegistryStats {
35
+ total: number;
36
+ byCategory: Record<FactCategory, number>;
37
+ avgConfidence: number;
38
+ }
39
+ export interface FactQueryOptions {
40
+ categories?: FactCategory[];
41
+ tags?: string[];
42
+ minConfidence?: number;
43
+ lang?: string;
44
+ limit?: number;
45
+ }
46
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/fact-registry/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,YAAY,GAAG,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,MAAM,CAAC;AAE9E;;;;;GAKG;AACH,MAAM,MAAM,UAAU,GAAG,UAAU,GAAG,SAAS,GAAG,YAAY,CAAC;AAE/D,MAAM,WAAW,IAAI;IACnB,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,YAAY,CAAC;IACvB,sDAAsD;IACtD,SAAS,EAAE,MAAM,CAAC;IAClB,6DAA6D;IAC7D,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,UAAU,CAAC;IACnB,oDAAoD;IACpD,UAAU,EAAE,MAAM,CAAC;IACnB,kCAAkC;IAClC,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,KAAK,EAAE,iBAAiB,CAAC;CAC1B;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IACzC,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,CAAC,EAAE,YAAY,EAAE,CAAC;IAC5B,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Fact Registry — structured, verifiable facts extracted from blog data.
3
+ * Injected into prompts to ground AI responses in real data and reduce hallucination.
4
+ */
5
+ export {};
package/dist/index.d.ts CHANGED
@@ -14,6 +14,7 @@ export * from './search/index.js';
14
14
  export * from './intelligence/index.js';
15
15
  export * from './prompt/index.js';
16
16
  export * from './data/index.js';
17
+ export * from './fact-registry/index.js';
17
18
  export * from './stream/index.js';
18
19
  export * from './server/index.js';
19
20
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,cAAc,sBAAsB,CAAC;AAGrC,cAAc,6BAA6B,CAAC;AAG5C,cAAc,uBAAuB,CAAC;AAGtC,cAAc,kBAAkB,CAAC;AAGjC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AAGxC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,iBAAiB,CAAC;AAGhC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,mBAAmB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,cAAc,sBAAsB,CAAC;AAGrC,cAAc,6BAA6B,CAAC;AAG5C,cAAc,uBAAuB,CAAC;AAGtC,cAAc,kBAAkB,CAAC;AAGjC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,yBAAyB,CAAC;AAGxC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,iBAAiB,CAAC;AAGhC,cAAc,0BAA0B,CAAC;AAGzC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -22,6 +22,8 @@ export * from './intelligence/index.js';
22
22
  export * from './prompt/index.js';
23
23
  // Build-time metadata loading
24
24
  export * from './data/index.js';
25
+ // Fact Registry: verified facts for hallucination reduction
26
+ export * from './fact-registry/index.js';
25
27
  // Stream utilities
26
28
  export * from './stream/index.js';
27
29
  // Server-side API handlers (chat handler, metadata init)
@@ -1 +1 @@
1
- {"version":3,"file":"dynamic-layer.d.ts","sourceRoot":"","sources":["../../src/prompt/dynamic-layer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAwBrD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA0CpE"}
1
+ {"version":3,"file":"dynamic-layer.d.ts","sourceRoot":"","sources":["../../src/prompt/dynamic-layer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAwBrD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,kBAAkB,GAAG,MAAM,CA+CpE"}
@@ -24,10 +24,10 @@ const LABELS = {
24
24
  * Built fresh on every chat request.
25
25
  */
26
26
  export function buildDynamicLayer(config) {
27
- const { userQuery, articles, projects, evidenceSection } = config;
27
+ const { userQuery, articles, projects, evidenceSection, factSection } = config;
28
28
  const lang = getLang(config.lang);
29
29
  const l = LABELS[lang];
30
- if (!articles.length && !projects.length)
30
+ if (!articles.length && !projects.length && !factSection)
31
31
  return '';
32
32
  const lines = [];
33
33
  lines.push(`## ${l.relatedContent}`);
@@ -54,6 +54,10 @@ export function buildDynamicLayer(config) {
54
54
  }
55
55
  lines.push('');
56
56
  }
57
+ if (factSection) {
58
+ lines.push(factSection);
59
+ lines.push('');
60
+ }
57
61
  if (evidenceSection) {
58
62
  lines.push(evidenceSection);
59
63
  }
@@ -17,6 +17,8 @@ export interface DynamicLayerConfig {
17
17
  articles: ArticleContext[];
18
18
  projects: ProjectContext[];
19
19
  evidenceSection?: string;
20
+ /** Pre-built verified-facts prompt section from Fact Registry */
21
+ factSection?: string;
20
22
  lang?: string;
21
23
  }
22
24
  export interface PromptBuildConfig {
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/prompt/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAExE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,qBAAqB;IACpC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,qBAAqB,CAAC;IAClC,OAAO,EAAE,kBAAkB,CAAC;CAC7B"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/prompt/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,KAAK,EAAE,iBAAiB,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAC;AAExE,MAAM,WAAW,iBAAiB;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,qBAAqB;IACpC,aAAa,EAAE,iBAAiB,GAAG,IAAI,CAAC;IACxC,YAAY,EAAE,YAAY,GAAG,IAAI,CAAC;IAClC,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,kBAAkB;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,iEAAiE;IACjE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,iBAAiB,CAAC;IAC1B,UAAU,EAAE,qBAAqB,CAAC;IAClC,OAAO,EAAE,kBAAkB,CAAC;CAC7B"}
@@ -0,0 +1,18 @@
1
+ import type { IndexedDocument } from './types.js';
2
+ export interface IDFMap {
3
+ /** term → IDF score (log-scaled) */
4
+ weights: Map<string, number>;
5
+ /** Total document count used for IDF computation */
6
+ docCount: number;
7
+ }
8
+ /**
9
+ * Builds an IDF (Inverse Document Frequency) map from indexed documents.
10
+ * Terms appearing in many documents get lower scores; rare terms get higher scores.
11
+ */
12
+ export declare function buildIDFMap(documents: IndexedDocument[]): IDFMap;
13
+ /**
14
+ * Returns the IDF weight for a token. Defaults to a high value for unknown
15
+ * tokens (they are very rare, so should score higher than average).
16
+ */
17
+ export declare function getIDFWeight(idfMap: IDFMap | null, token: string): number;
18
+ //# sourceMappingURL=idf.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"idf.d.ts","sourceRoot":"","sources":["../../src/search/idf.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAElD,MAAM,WAAW,MAAM;IACrB,oCAAoC;IACpC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,oDAAoD;IACpD,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,SAAS,EAAE,eAAe,EAAE,GAAG,MAAM,CAmBhE;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAGzE"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Builds an IDF (Inverse Document Frequency) map from indexed documents.
3
+ * Terms appearing in many documents get lower scores; rare terms get higher scores.
4
+ */
5
+ export function buildIDFMap(documents) {
6
+ const N = documents.length;
7
+ if (N === 0)
8
+ return { weights: new Map(), docCount: 0 };
9
+ const df = new Map();
10
+ for (const doc of documents) {
11
+ const uniqueTokens = new Set(doc.tokens);
12
+ for (const token of uniqueTokens) {
13
+ df.set(token, (df.get(token) || 0) + 1);
14
+ }
15
+ }
16
+ const weights = new Map();
17
+ for (const [term, count] of df) {
18
+ // Smooth IDF: log(N / (df + 1)) + 1 — ensures all terms have positive weight
19
+ weights.set(term, Math.log(N / (count + 1)) + 1);
20
+ }
21
+ return { weights, docCount: N };
22
+ }
23
+ /**
24
+ * Returns the IDF weight for a token. Defaults to a high value for unknown
25
+ * tokens (they are very rare, so should score higher than average).
26
+ */
27
+ export function getIDFWeight(idfMap, token) {
28
+ if (!idfMap)
29
+ return 1;
30
+ return idfMap.weights.get(token) ?? Math.log(idfMap.docCount + 1) + 1;
31
+ }
@@ -1,5 +1,10 @@
1
1
  export { initArticleIndex, initProjectIndex, searchArticles, searchProjects, mergeResults } from './search-api.js';
2
+ export { getIDFMapForIndex } from './search-index.js';
3
+ export { loadVectorIndex, clearVectorIndex, hasVectorIndex, rerankWithVectors } from './vector-reranker.js';
4
+ export type { VectorIndex, VectorChunk } from './vector-reranker.js';
2
5
  export { getSessionCacheKey, getCachedContext, setCachedContext, deleteCachedContext, setCacheAdapter, getCacheAdapter, cleanupCache, SESSION_CACHE_TTL_SECONDS, SESSION_CACHE_TTL_MS, getCachedContextSync, setCachedContextSync, cleanupCacheLegacy, } from './session-cache.js';
3
6
  export { normalizeText, tokenize, scoreDocument } from './search-utils.js';
7
+ export { buildIDFMap, getIDFWeight } from './idf.js';
8
+ export type { IDFMap } from './idf.js';
4
9
  export type { SearchDocument, ArticleContext, ProjectContext, CachedSearchContext, SearchResult } from './types.js';
5
10
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,cAAc,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AACnH,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,YAAY,EACZ,yBAAyB,EACzB,oBAAoB,EACpB,oBAAoB,EACpB,oBAAoB,EACpB,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAC3E,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,cAAc,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,cAAc,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AACnH,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,gBAAgB,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAC5G,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,gBAAgB,EAChB,mBAAmB,EACnB,eAAe,EACf,eAAe,EACf,YAAY,EACZ,yBAAyB,EACzB,oBAAoB,EACpB,oBAAoB,EACpB,oBAAoB,EACpB,kBAAkB,GACnB,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAC3E,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACrD,YAAY,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AACvC,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,cAAc,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC"}
@@ -1,3 +1,6 @@
1
1
  export { initArticleIndex, initProjectIndex, searchArticles, searchProjects, mergeResults } from './search-api.js';
2
+ export { getIDFMapForIndex } from './search-index.js';
3
+ export { loadVectorIndex, clearVectorIndex, hasVectorIndex, rerankWithVectors } from './vector-reranker.js';
2
4
  export { getSessionCacheKey, getCachedContext, setCachedContext, deleteCachedContext, setCacheAdapter, getCacheAdapter, cleanupCache, SESSION_CACHE_TTL_SECONDS, SESSION_CACHE_TTL_MS, getCachedContextSync, setCachedContextSync, cleanupCacheLegacy, } from './session-cache.js';
3
5
  export { normalizeText, tokenize, scoreDocument } from './search-utils.js';
6
+ export { buildIDFMap, getIDFWeight } from './idf.js';
@@ -1 +1 @@
1
- {"version":3,"file":"search-api.d.ts","sourceRoot":"","sources":["../../src/search/search-api.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,cAAc,EAAiC,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAYhH;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,IAAI,CAElE;AAED,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,IAAI,CAElE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GAC9D,cAAc,EAAE,CAsClB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GACjC,cAAc,EAAE,CAgBlB;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAUzF"}
1
+ {"version":3,"file":"search-api.d.ts","sourceRoot":"","sources":["../../src/search/search-api.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,cAAc,EAAiC,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAYhH;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,IAAI,CAElE;AAED,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,IAAI,CAElE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,iBAAiB,CAAC,EAAE,OAAO,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GAC9D,cAAc,EAAE,CA6ClB;AAED;;GAEG;AACH,wBAAgB,cAAc,CAC5B,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GACjC,cAAc,EAAE,CAgBlB;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,CAAC,SAAS;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,EAAE,OAAO,EAAE,CAAC,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,GAAG,CAAC,EAAE,CAUzF"}
@@ -1,5 +1,6 @@
1
1
  import { scoreDocument, filterLowRelevance, tokenize, pickAnchorTerms, normalizeText } from './search-utils.js';
2
- import { buildSearchIndex } from './search-index.js';
2
+ import { buildSearchIndex, getIDFMapForIndex } from './search-index.js';
3
+ import { hasVectorIndex, rerankWithVectors } from './vector-reranker.js';
3
4
  // Lazy-initialized, cached indexes
4
5
  let articleIndex = null;
5
6
  let projectIndex = null;
@@ -38,7 +39,7 @@ export function searchArticles(query, options = {}) {
38
39
  const isDeepHit = options.enableDeepContent &&
39
40
  topScore >= DEEP_CONTENT_SCORE_THRESHOLD &&
40
41
  topScore > secondScore * 1.5;
41
- return results.map((result, index) => {
42
+ let articles = results.map((result, index) => {
42
43
  const baseUrl = options.siteUrl ?? '';
43
44
  const url = result.url.startsWith('http') ? result.url : `${baseUrl}${result.url}`;
44
45
  const fullContent = isDeepHit && index === 0 && result.content
@@ -55,6 +56,11 @@ export function searchArticles(query, options = {}) {
55
56
  score: result.score,
56
57
  };
57
58
  });
59
+ // Optional: rerank using TF-IDF vector cosine similarity
60
+ if (hasVectorIndex() && articles.length > 1) {
61
+ articles = rerankWithVectors(query, articles);
62
+ }
63
+ return articles;
58
64
  }
59
65
  /**
60
66
  * Searches for projects related to the query.
@@ -92,8 +98,9 @@ export function mergeResults(primary, secondary) {
92
98
  }
93
99
  // ---- Internals ----
94
100
  function scoreDocs(index, tokens, limit) {
101
+ const idfMap = getIDFMapForIndex();
95
102
  return index
96
- .map(doc => ({ ...doc, score: scoreDocument(tokens, doc) }))
103
+ .map(doc => ({ ...doc, score: scoreDocument(tokens, doc, idfMap) }))
97
104
  .filter(doc => doc.score > 0)
98
105
  .sort((a, b) => b.score - a.score)
99
106
  .slice(0, limit);
@@ -1,6 +1,12 @@
1
+ import { type IDFMap } from './idf.js';
1
2
  import type { SearchDocument, IndexedDocument } from './types.js';
2
3
  /**
3
- * Builds an in-memory inverted index from a list of documents.
4
+ * Builds an in-memory inverted index from a list of documents
5
+ * and computes IDF weights across the corpus.
6
+ *
7
+ * IDF map is only updated when the document set is non-empty,
8
+ * preventing an empty index (e.g. projects) from wiping article IDF.
4
9
  */
5
10
  export declare function buildSearchIndex(documents: SearchDocument[]): IndexedDocument[];
11
+ export declare function getIDFMapForIndex(): IDFMap | null;
6
12
  //# sourceMappingURL=search-index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"search-index.d.ts","sourceRoot":"","sources":["../../src/search/search-index.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAElE;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,eAAe,EAAE,CAK/E"}
1
+ {"version":3,"file":"search-index.d.ts","sourceRoot":"","sources":["../../src/search/search-index.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,KAAK,MAAM,EAAE,MAAM,UAAU,CAAC;AACpD,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAIlE;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,SAAS,EAAE,cAAc,EAAE,GAAG,eAAe,EAAE,CAU/E;AAED,wBAAgB,iBAAiB,IAAI,MAAM,GAAG,IAAI,CAEjD"}
@@ -1,12 +1,25 @@
1
1
  import { normalizeText } from './search-utils.js';
2
+ import { buildIDFMap } from './idf.js';
3
+ let cachedIDFMap = null;
2
4
  /**
3
- * Builds an in-memory inverted index from a list of documents.
5
+ * Builds an in-memory inverted index from a list of documents
6
+ * and computes IDF weights across the corpus.
7
+ *
8
+ * IDF map is only updated when the document set is non-empty,
9
+ * preventing an empty index (e.g. projects) from wiping article IDF.
4
10
  */
5
11
  export function buildSearchIndex(documents) {
6
- return documents.map(doc => ({
12
+ const indexed = documents.map(doc => ({
7
13
  ...doc,
8
14
  tokens: buildDocumentTokens(doc),
9
15
  }));
16
+ if (indexed.length > 0) {
17
+ cachedIDFMap = buildIDFMap(indexed);
18
+ }
19
+ return indexed;
20
+ }
21
+ export function getIDFMapForIndex() {
22
+ return cachedIDFMap;
10
23
  }
11
24
  function buildDocumentTokens(doc) {
12
25
  const parts = [
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * Text normalization and tokenization utilities for search.
3
3
  */
4
+ import type { IDFMap } from './idf.js';
4
5
  /**
5
6
  * Normalizes text for search: lowercase, remove punctuation, normalize whitespace.
6
7
  */
@@ -14,8 +15,11 @@ export declare function tokenize(text: string): string[];
14
15
  */
15
16
  export declare function dedupeByContainment(terms: string[]): string[];
16
17
  /**
17
- * Computes a relevance score for a document against a set of query tokens.
18
- * Title matches score higher than content matches.
18
+ * Computes a relevance score for a document against query tokens.
19
+ *
20
+ * When an IDF map is provided, each token's contribution is weighted by its
21
+ * inverse document frequency — rare terms contribute more, common terms less.
22
+ * Falls back to uniform weighting when IDF is unavailable.
19
23
  */
20
24
  export declare function scoreDocument(tokens: string[], doc: {
21
25
  title: string;
@@ -24,7 +28,7 @@ export declare function scoreDocument(tokens: string[], doc: {
24
28
  keyPoints: string[];
25
29
  categories: string[];
26
30
  tags: string[];
27
- }): number;
31
+ }, idfMap?: IDFMap | null): number;
28
32
  /**
29
33
  * Filters out low-relevance results relative to the top score.
30
34
  */
@@ -1 +1 @@
1
- {"version":3,"file":"search-utils.d.ts","sourceRoot":"","sources":["../../src/search/search-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAK/C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAS7D;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAAC,IAAI,EAAE,MAAM,EAAE,CAAA;CAAE,GAAG,MAAM,CA2B3K;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,SAAS;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,EAC5D,OAAO,EAAE,CAAC,EAAE,EACZ,iBAAiB,SAAO,EACxB,gBAAgB,SAAI,GACnB,CAAC,EAAE,CAML;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,EAC/E,QAAQ,SAAI,EACZ,aAAa,SAAI,GAChB,MAAM,EAAE,CAuBV"}
1
+ {"version":3,"file":"search-utils.d.ts","sourceRoot":"","sources":["../../src/search/search-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAGvC;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAI/C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAS7D;AAYD;;;;;;GAMG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,EAAE,EAChB,GAAG,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAAC,IAAI,EAAE,MAAM,EAAE,CAAA;CAAE,EACnH,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,GACrB,MAAM,CAwBR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,SAAS;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,EAC5D,OAAO,EAAE,CAAC,EAAE,EACZ,iBAAiB,SAAO,EACxB,gBAAgB,SAAI,GACnB,CAAC,EAAE,CAML;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,EAC/E,QAAQ,SAAI,EACZ,aAAa,SAAI,GAChB,MAAM,EAAE,CAuBV"}
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * Text normalization and tokenization utilities for search.
3
3
  */
4
+ import { getIDFWeight } from './idf.js';
4
5
  /**
5
6
  * Normalizes text for search: lowercase, remove punctuation, normalize whitespace.
6
7
  */
@@ -16,7 +17,6 @@ export function normalizeText(text) {
16
17
  */
17
18
  export function tokenize(text) {
18
19
  const normalized = normalizeText(text);
19
- // Split on whitespace for multi-word queries
20
20
  const parts = normalized.split(/\s+/).filter(Boolean);
21
21
  return dedupeByContainment(parts);
22
22
  }
@@ -33,11 +33,23 @@ export function dedupeByContainment(terms) {
33
33
  }
34
34
  return kept;
35
35
  }
36
+ /** Positional weight multipliers for each document field */
37
+ const FIELD_WEIGHTS = {
38
+ title: 8,
39
+ keyPoints: 5,
40
+ categories: 4,
41
+ tags: 3,
42
+ excerpt: 3,
43
+ content: 1,
44
+ };
36
45
  /**
37
- * Computes a relevance score for a document against a set of query tokens.
38
- * Title matches score higher than content matches.
46
+ * Computes a relevance score for a document against query tokens.
47
+ *
48
+ * When an IDF map is provided, each token's contribution is weighted by its
49
+ * inverse document frequency — rare terms contribute more, common terms less.
50
+ * Falls back to uniform weighting when IDF is unavailable.
39
51
  */
40
- export function scoreDocument(tokens, doc) {
52
+ export function scoreDocument(tokens, doc, idfMap) {
41
53
  if (!tokens.length)
42
54
  return 0;
43
55
  let score = 0;
@@ -50,23 +62,19 @@ export function scoreDocument(tokens, doc) {
50
62
  for (const token of tokens) {
51
63
  if (!token)
52
64
  continue;
53
- // Title: highest weight
65
+ const idf = getIDFWeight(idfMap ?? null, token);
54
66
  if (title.includes(token))
55
- score += 8;
56
- // KeyPoints: high weight
67
+ score += FIELD_WEIGHTS.title * idf;
57
68
  if (keyPointsText.includes(token))
58
- score += 5;
59
- // Categories/tags: medium weight
69
+ score += FIELD_WEIGHTS.keyPoints * idf;
60
70
  if (categoriesText.includes(token))
61
- score += 4;
71
+ score += FIELD_WEIGHTS.categories * idf;
62
72
  if (tagsText.includes(token))
63
- score += 3;
64
- // Excerpt: medium weight
73
+ score += FIELD_WEIGHTS.tags * idf;
65
74
  if (excerpt.includes(token))
66
- score += 3;
67
- // Content sample: low weight
75
+ score += FIELD_WEIGHTS.excerpt * idf;
68
76
  if (contentSample.includes(token))
69
- score += 1;
77
+ score += FIELD_WEIGHTS.content * idf;
70
78
  }
71
79
  return score;
72
80
  }
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Optional TF-IDF vector reranker.
3
+ *
4
+ * When a pre-built vector index is available (from `vectorize.ts`),
5
+ * uses cosine similarity to refine the ranking of search results.
6
+ * Gracefully degrades to a no-op when no vector index is loaded.
7
+ */
8
+ import type { ArticleContext } from './types.js';
9
+ export interface VectorChunk {
10
+ postId: string;
11
+ title: string;
12
+ lang: string;
13
+ chunkIndex: number;
14
+ text: string;
15
+ vector?: number[];
16
+ }
17
+ export interface VectorIndex {
18
+ version: number;
19
+ method: 'tfidf' | 'openai';
20
+ createdAt: string;
21
+ vocabulary?: string[];
22
+ chunks: VectorChunk[];
23
+ }
24
+ export declare function loadVectorIndex(data: VectorIndex | null): void;
25
+ export declare function clearVectorIndex(): void;
26
+ export declare function hasVectorIndex(): boolean;
27
+ /**
28
+ * Reranks article search results using vector cosine similarity.
29
+ *
30
+ * For each candidate article, finds the best-matching chunk in the vector
31
+ * index and uses the cosine similarity as a reranking signal.
32
+ *
33
+ * Final score = original_score * (1 - alpha) + vector_score * alpha
34
+ *
35
+ * @param alpha - Blend factor (0 = ignore vectors, 1 = vectors only). Default 0.3
36
+ */
37
+ export declare function rerankWithVectors<T extends Pick<ArticleContext, 'url' | 'score'>>(query: string, candidates: T[], alpha?: number): T[];
38
+ //# sourceMappingURL=vector-reranker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vector-reranker.d.ts","sourceRoot":"","sources":["../../src/search/vector-reranker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAIjD,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;CACvB;AAOD,wBAAgB,eAAe,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,IAAI,CAO9D;AAED,wBAAgB,gBAAgB,IAAI,IAAI,CAGvC;AAED,wBAAgB,cAAc,IAAI,OAAO,CAExC;AAID;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,SAAS,IAAI,CAAC,cAAc,EAAE,KAAK,GAAG,OAAO,CAAC,EAC/E,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,CAAC,EAAE,EACf,KAAK,SAAM,GACV,CAAC,EAAE,CAkCL"}
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Optional TF-IDF vector reranker.
3
+ *
4
+ * When a pre-built vector index is available (from `vectorize.ts`),
5
+ * uses cosine similarity to refine the ranking of search results.
6
+ * Gracefully degrades to a no-op when no vector index is loaded.
7
+ */
8
+ // ── State ────────────────────────────────────────────────────
9
+ let loadedIndex = null;
10
+ let idfCache = null;
11
+ export function loadVectorIndex(data) {
12
+ loadedIndex = data;
13
+ idfCache = null;
14
+ if (data?.vocabulary && data.chunks.length > 0) {
15
+ idfCache = buildIDFFromVocab(data.vocabulary, data.chunks);
16
+ }
17
+ }
18
+ export function clearVectorIndex() {
19
+ loadedIndex = null;
20
+ idfCache = null;
21
+ }
22
+ export function hasVectorIndex() {
23
+ return loadedIndex !== null && loadedIndex.chunks.length > 0;
24
+ }
25
+ // ── Core: Rerank ──────────────────────────────────────────────
26
+ /**
27
+ * Reranks article search results using vector cosine similarity.
28
+ *
29
+ * For each candidate article, finds the best-matching chunk in the vector
30
+ * index and uses the cosine similarity as a reranking signal.
31
+ *
32
+ * Final score = original_score * (1 - alpha) + vector_score * alpha
33
+ *
34
+ * @param alpha - Blend factor (0 = ignore vectors, 1 = vectors only). Default 0.3
35
+ */
36
+ export function rerankWithVectors(query, candidates, alpha = 0.3) {
37
+ if (!loadedIndex || !idfCache || !loadedIndex.vocabulary) {
38
+ return candidates;
39
+ }
40
+ const queryVector = computeQueryVector(query, loadedIndex.vocabulary, idfCache);
41
+ if (!queryVector)
42
+ return candidates;
43
+ // Map postId → best chunk cosine similarity
44
+ const articleScores = new Map();
45
+ for (const chunk of loadedIndex.chunks) {
46
+ if (!chunk.vector)
47
+ continue;
48
+ const sim = cosineSimilarity(queryVector, chunk.vector);
49
+ const current = articleScores.get(chunk.postId) ?? 0;
50
+ if (sim > current) {
51
+ articleScores.set(chunk.postId, sim);
52
+ }
53
+ }
54
+ if (articleScores.size === 0)
55
+ return candidates;
56
+ // Normalize original scores to 0-1
57
+ const maxOriginal = Math.max(...candidates.map(c => c.score ?? 0), 1);
58
+ const reranked = candidates.map(article => {
59
+ const slug = extractSlugFromUrl(article.url);
60
+ const vectorScore = articleScores.get(slug) ?? 0;
61
+ const originalNorm = (article.score ?? 0) / maxOriginal;
62
+ const blended = originalNorm * (1 - alpha) + vectorScore * alpha;
63
+ return { ...article, score: blended };
64
+ });
65
+ return reranked.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
66
+ }
67
+ // ── Math Utilities ────────────────────────────────────────────
68
+ function cosineSimilarity(a, b) {
69
+ if (a.length !== b.length)
70
+ return 0;
71
+ let dot = 0, magA = 0, magB = 0;
72
+ for (let i = 0; i < a.length; i++) {
73
+ dot += a[i] * b[i];
74
+ magA += a[i] * a[i];
75
+ magB += b[i] * b[i];
76
+ }
77
+ const mag = Math.sqrt(magA) * Math.sqrt(magB);
78
+ return mag === 0 ? 0 : dot / mag;
79
+ }
80
+ function computeQueryVector(query, vocabulary, idf) {
81
+ const tokens = tokenizeForVector(query);
82
+ if (!tokens.length)
83
+ return null;
84
+ const tf = new Map();
85
+ for (const t of tokens)
86
+ tf.set(t, (tf.get(t) || 0) + 1);
87
+ const maxTf = Math.max(...tf.values(), 1);
88
+ const vector = vocabulary.map(term => {
89
+ const termTf = (tf.get(term) || 0) / maxTf;
90
+ const termIdf = idf.get(term) || 0;
91
+ return termTf * termIdf;
92
+ });
93
+ // Check if vector is all zeros
94
+ if (vector.every(v => v === 0))
95
+ return null;
96
+ return vector;
97
+ }
98
+ function tokenizeForVector(text) {
99
+ const CJK = /[\u4e00-\u9fff\u3400-\u4dbf]/g;
100
+ const cjkChars = text.match(CJK) || [];
101
+ const latin = text
102
+ .replace(CJK, ' ')
103
+ .toLowerCase()
104
+ .split(/\W+/)
105
+ .filter(w => w.length > 2);
106
+ return [...cjkChars, ...latin];
107
+ }
108
+ function buildIDFFromVocab(vocabulary, chunks) {
109
+ const N = chunks.length;
110
+ const df = new Map();
111
+ for (const chunk of chunks) {
112
+ const tokens = new Set(tokenizeForVector(chunk.text));
113
+ for (const term of vocabulary) {
114
+ if (tokens.has(term)) {
115
+ df.set(term, (df.get(term) || 0) + 1);
116
+ }
117
+ }
118
+ }
119
+ const idf = new Map();
120
+ for (const term of vocabulary) {
121
+ const docCount = df.get(term) || 0;
122
+ idf.set(term, Math.log(N / (docCount + 1)) + 1);
123
+ }
124
+ return idf;
125
+ }
126
+ /**
127
+ * Extracts a slug/postId from a URL path like "/zh/posts/some-slug/" → "zh/some-slug"
128
+ */
129
+ function extractSlugFromUrl(url) {
130
+ const path = url.replace(/^https?:\/\/[^/]+/, '');
131
+ const match = path.match(/^\/([\w-]+)\/posts\/(.+?)\/?$/);
132
+ if (match)
133
+ return `${match[1]}/${match[2]}`;
134
+ return path.replace(/^\/|\/$/g, '');
135
+ }
@@ -1 +1 @@
1
- {"version":3,"file":"chat-handler.d.ts","sourceRoot":"","sources":["../../src/server/chat-handler.ts"],"names":[],"mappings":"AA+CA,OAAO,KAAK,EAAE,kBAAkB,EAAgC,MAAM,YAAY,CAAC;AA4HnF,wBAAsB,iBAAiB,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC,CA0CtF"}
1
+ {"version":3,"file":"chat-handler.d.ts","sourceRoot":"","sources":["../../src/server/chat-handler.ts"],"names":[],"mappings":"AAiDA,OAAO,KAAK,EAAE,kBAAkB,EAAgC,MAAM,YAAY,CAAC;AA4HnF,wBAAsB,iBAAiB,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC,CA0CtF"}
@@ -1,6 +1,6 @@
1
1
  import { createUIMessageStream, createUIMessageStreamResponse, streamText, convertToModelMessages, } from 'ai';
2
2
  import { t, getLang } from '../utils/i18n.js';
3
- import { getClientIP, checkRateLimit, rateLimitResponse, searchArticles, searchProjects, getSessionCacheKey, getCachedContext, setCachedContext, shouldReuseSearchContext, buildLocalSearchQuery, shouldRunKeywordExtraction, extractSearchKeywords, KEYWORD_EXTRACTION_TIMEOUT_MS, shouldSkipAnalysis, analyzeRetrievedEvidence, buildEvidenceSection, EVIDENCE_ANALYSIS_TIMEOUT_MS, getCitationGuardPreflight, buildSystemPrompt, getAuthorContext, getVoiceProfile, mergeResults, getProviderManager, createCacheAdapter, detectPublicQuestion, getGlobalSearchCache, shouldAppendCitations, formatCitationBlock, selectCitations, setGlobalSearchCache, getGlobalCacheTTL, getResponseCache, setResponseCache, getResponseCacheConfig, rankArticlesByIntent, } from '../index.js';
3
+ import { getClientIP, checkRateLimit, rateLimitResponse, searchArticles, searchProjects, getSessionCacheKey, getCachedContext, setCachedContext, shouldReuseSearchContext, buildLocalSearchQuery, shouldRunKeywordExtraction, extractSearchKeywords, KEYWORD_EXTRACTION_TIMEOUT_MS, shouldSkipAnalysis, analyzeRetrievedEvidence, buildEvidenceSection, EVIDENCE_ANALYSIS_TIMEOUT_MS, getCitationGuardPreflight, buildSystemPrompt, getAuthorContext, getVoiceProfile, mergeResults, getProviderManager, createCacheAdapter, detectPublicQuestion, getGlobalSearchCache, shouldAppendCitations, formatCitationBlock, selectCitations, setGlobalSearchCache, getGlobalCacheTTL, getResponseCache, setResponseCache, getResponseCacheConfig, rankArticlesByIntent, matchFactsToQuery, buildFactSection, } from '../index.js';
4
4
  import { createChatStatusData } from './types.js';
5
5
  import { errors, corsPreflightResponse } from './errors.js';
6
6
  import { notifyAiChat } from './notify.js';
@@ -187,10 +187,12 @@ async function runPipeline(args) {
187
187
  let responseText = '';
188
188
  if (adapter) {
189
189
  const articlePrompt = buildArticleContextPrompt(context);
190
+ const matchedFacts = matchFactsToQuery(cachedSearch.query, lang);
191
+ const factPromptSection = buildFactSection(matchedFacts, lang);
190
192
  const systemPrompt = buildSystemPrompt({
191
193
  static: { authorName: env.SITE_AUTHOR || '博主', siteUrl: env.SITE_URL || '', lang },
192
194
  semiStatic: { authorContext: getAuthorContext(), voiceProfile: getVoiceProfile() },
193
- dynamic: { userQuery: cachedSearch.query, articles: cachedSearch.articles, projects: cachedSearch.projects, evidenceSection: articlePrompt },
195
+ dynamic: { userQuery: cachedSearch.query, articles: cachedSearch.articles, projects: cachedSearch.projects, evidenceSection: articlePrompt, factSection: factPromptSection },
194
196
  });
195
197
  const llmResult = await streamLLMResponse({ writer: w, adapter, systemPrompt, messages, lang });
196
198
  responseText = llmResult.responseText;
@@ -326,6 +328,9 @@ async function runPipeline(args) {
326
328
  projects: relatedProjects,
327
329
  lang,
328
330
  });
331
+ // ── Fact Registry ───────────────────────────────────────────
332
+ const matchedFacts = matchFactsToQuery(latestText, lang);
333
+ const factPromptSection = buildFactSection(matchedFacts, lang);
329
334
  // ── Build System Prompt ─────────────────────────────────────
330
335
  const articlePrompt = buildArticleContextPrompt(context);
331
336
  const systemPrompt = buildSystemPrompt({
@@ -345,6 +350,7 @@ async function runPipeline(args) {
345
350
  evidenceSection: articlePrompt
346
351
  ? `${evidenceSection}\n${articlePrompt}`
347
352
  : evidenceSection,
353
+ factSection: factPromptSection,
348
354
  lang,
349
355
  },
350
356
  });
@@ -1 +1 @@
1
- {"version":3,"file":"metadata-init.d.ts","sourceRoot":"","sources":["../../src/server/metadata-init.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAIjE;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,CAAC,EAAE,cAAc,GAAG,IAAI,CAoCrF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,IAAI,CAExC"}
1
+ {"version":3,"file":"metadata-init.d.ts","sourceRoot":"","sources":["../../src/server/metadata-init.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAIjE;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,CAAC,EAAE,cAAc,GAAG,IAAI,CAsCrF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,IAAI,CAExC"}
@@ -12,6 +12,8 @@ export function initializeMetadata(config, env) {
12
12
  summaries: config.summaries,
13
13
  authorContext: config.authorContext,
14
14
  voiceProfile: config.voiceProfile,
15
+ factRegistry: (config.factRegistry ?? null),
16
+ vectorIndex: (config.vectorIndex ?? null),
15
17
  });
16
18
  const authorCtx = getAuthorContext();
17
19
  const allSummaries = getAllSummaries();
@@ -52,6 +52,8 @@ export interface MetadataConfig {
52
52
  summaries: unknown;
53
53
  authorContext: unknown;
54
54
  voiceProfile: unknown;
55
+ factRegistry?: unknown;
56
+ vectorIndex?: unknown;
55
57
  siteUrl?: string;
56
58
  }
57
59
  //# sourceMappingURL=types.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/server/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAIlD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC5B,OAAO,CAAC,EAAE,kBAAkB,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAID,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,SAAS,EAAE,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAe,SAAQ,kBAAkB,EAAE,QAAQ;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,cAAc,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC;CACjD;AAID,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,CAAC;AAE/D,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,eAAe,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,IAAI,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG;IAAE,IAAI,CAAC,EAAE,OAAO,CAAA;CAAE,GAChE,cAAc,CAMhB;AAED,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAIxE;AAID,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;IACvB,YAAY,EAAE,OAAO,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/server/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAIlD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC5B,OAAO,CAAC,EAAE,kBAAkB,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAID,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,SAAS,EAAE,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAe,SAAQ,kBAAkB,EAAE,QAAQ;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,cAAc,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC;CACjD;AAID,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,CAAC;AAE/D,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,eAAe,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,IAAI,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG;IAAE,IAAI,CAAC,EAAE,OAAO,CAAA;CAAE,GAChE,cAAc,CAMhB;AAED,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAIxE;AAID,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;IACvB,YAAY,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@astro-minimax/ai",
3
- "version": "0.7.1",
3
+ "version": "0.7.2",
4
4
  "type": "module",
5
5
  "description": "Vendor-agnostic AI integration package with full RAG pipeline for astro-minimax blogs — supports OpenAI, Cloudflare AI, and custom providers.",
6
6
  "author": "Souloss",
@@ -57,6 +57,10 @@
57
57
  "types": "./dist/data/index.d.ts",
58
58
  "import": "./dist/data/index.js"
59
59
  },
60
+ "./fact-registry": {
61
+ "types": "./dist/fact-registry/index.d.ts",
62
+ "import": "./dist/fact-registry/index.js"
63
+ },
60
64
  "./stream": {
61
65
  "types": "./dist/stream/index.d.ts",
62
66
  "import": "./dist/stream/index.js"
@@ -80,7 +84,7 @@
80
84
  "@ai-sdk/openai-compatible": "^2.0.35",
81
85
  "ai": "^6.0.116",
82
86
  "workers-ai-provider": "^3.1.2",
83
- "@astro-minimax/notify": "0.7.1"
87
+ "@astro-minimax/notify": "0.7.2"
84
88
  },
85
89
  "optionalDependencies": {
86
90
  "undici": "^6.0.0"
@@ -9,7 +9,7 @@ interface Props {
9
9
  }
10
10
 
11
11
  const { lang = SITE.lang ?? "zh", articleContext } = Astro.props;
12
- const aiEnabled = SITE.ai?.enabled ?? false;
12
+ const aiEnabled = SITE.aiEnabled ?? false;
13
13
 
14
14
  const aiConfig = {
15
15
  enabled: aiEnabled,