@astro-minimax/ai 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/data/index.d.ts +1 -0
- package/dist/data/index.d.ts.map +1 -1
- package/dist/data/metadata-loader.d.ts +2 -2
- package/dist/data/metadata-loader.d.ts.map +1 -1
- package/dist/data/metadata-loader.js +15 -3
- package/dist/data/types.d.ts +2 -0
- package/dist/data/types.d.ts.map +1 -1
- package/dist/fact-registry/fact-matcher.d.ts +12 -0
- package/dist/fact-registry/fact-matcher.d.ts.map +1 -0
- package/dist/fact-registry/fact-matcher.js +94 -0
- package/dist/fact-registry/index.d.ts +5 -0
- package/dist/fact-registry/index.d.ts.map +1 -0
- package/dist/fact-registry/index.js +3 -0
- package/dist/fact-registry/prompt-injector.d.ts +7 -0
- package/dist/fact-registry/prompt-injector.d.ts.map +1 -0
- package/dist/fact-registry/prompt-injector.js +57 -0
- package/dist/fact-registry/registry.d.ts +10 -0
- package/dist/fact-registry/registry.d.ts.map +1 -0
- package/dist/fact-registry/registry.js +38 -0
- package/dist/fact-registry/types.d.ts +46 -0
- package/dist/fact-registry/types.d.ts.map +1 -0
- package/dist/fact-registry/types.js +5 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/prompt/dynamic-layer.d.ts.map +1 -1
- package/dist/prompt/dynamic-layer.js +6 -2
- package/dist/prompt/types.d.ts +2 -0
- package/dist/prompt/types.d.ts.map +1 -1
- package/dist/search/idf.d.ts +18 -0
- package/dist/search/idf.d.ts.map +1 -0
- package/dist/search/idf.js +31 -0
- package/dist/search/index.d.ts +5 -0
- package/dist/search/index.d.ts.map +1 -1
- package/dist/search/index.js +3 -0
- package/dist/search/search-api.d.ts.map +1 -1
- package/dist/search/search-api.js +10 -3
- package/dist/search/search-index.d.ts +7 -1
- package/dist/search/search-index.d.ts.map +1 -1
- package/dist/search/search-index.js +15 -2
- package/dist/search/search-utils.d.ts +7 -3
- package/dist/search/search-utils.d.ts.map +1 -1
- package/dist/search/search-utils.js +23 -15
- package/dist/search/vector-reranker.d.ts +38 -0
- package/dist/search/vector-reranker.d.ts.map +1 -0
- package/dist/search/vector-reranker.js +135 -0
- package/dist/server/chat-handler.d.ts.map +1 -1
- package/dist/server/chat-handler.js +8 -2
- package/dist/server/metadata-init.d.ts.map +1 -1
- package/dist/server/metadata-init.js +2 -0
- package/dist/server/types.d.ts +2 -0
- package/dist/server/types.d.ts.map +1 -1
- package/package.json +10 -3
- package/src/components/AIChatContainer.tsx +4 -3
- package/src/components/AIChatWidget.astro +1 -1
- package/src/components/ChatPanel.tsx +5 -4
- package/src/providers/mock.ts +240 -0
- package/src/server/types.ts +89 -0
- package/src/utils/i18n.ts +238 -0
|
@@ -1,12 +1,25 @@
|
|
|
1
1
|
import { normalizeText } from './search-utils.js';
|
|
2
|
+
import { buildIDFMap } from './idf.js';
|
|
3
|
+
let cachedIDFMap = null;
|
|
2
4
|
/**
|
|
3
|
-
* Builds an in-memory inverted index from a list of documents
|
|
5
|
+
* Builds an in-memory inverted index from a list of documents
|
|
6
|
+
* and computes IDF weights across the corpus.
|
|
7
|
+
*
|
|
8
|
+
* IDF map is only updated when the document set is non-empty,
|
|
9
|
+
* preventing an empty index (e.g. projects) from wiping article IDF.
|
|
4
10
|
*/
|
|
5
11
|
export function buildSearchIndex(documents) {
|
|
6
|
-
|
|
12
|
+
const indexed = documents.map(doc => ({
|
|
7
13
|
...doc,
|
|
8
14
|
tokens: buildDocumentTokens(doc),
|
|
9
15
|
}));
|
|
16
|
+
if (indexed.length > 0) {
|
|
17
|
+
cachedIDFMap = buildIDFMap(indexed);
|
|
18
|
+
}
|
|
19
|
+
return indexed;
|
|
20
|
+
}
|
|
21
|
+
export function getIDFMapForIndex() {
|
|
22
|
+
return cachedIDFMap;
|
|
10
23
|
}
|
|
11
24
|
function buildDocumentTokens(doc) {
|
|
12
25
|
const parts = [
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Text normalization and tokenization utilities for search.
|
|
3
3
|
*/
|
|
4
|
+
import type { IDFMap } from './idf.js';
|
|
4
5
|
/**
|
|
5
6
|
* Normalizes text for search: lowercase, remove punctuation, normalize whitespace.
|
|
6
7
|
*/
|
|
@@ -14,8 +15,11 @@ export declare function tokenize(text: string): string[];
|
|
|
14
15
|
*/
|
|
15
16
|
export declare function dedupeByContainment(terms: string[]): string[];
|
|
16
17
|
/**
|
|
17
|
-
* Computes a relevance score for a document against
|
|
18
|
-
*
|
|
18
|
+
* Computes a relevance score for a document against query tokens.
|
|
19
|
+
*
|
|
20
|
+
* When an IDF map is provided, each token's contribution is weighted by its
|
|
21
|
+
* inverse document frequency — rare terms contribute more, common terms less.
|
|
22
|
+
* Falls back to uniform weighting when IDF is unavailable.
|
|
19
23
|
*/
|
|
20
24
|
export declare function scoreDocument(tokens: string[], doc: {
|
|
21
25
|
title: string;
|
|
@@ -24,7 +28,7 @@ export declare function scoreDocument(tokens: string[], doc: {
|
|
|
24
28
|
keyPoints: string[];
|
|
25
29
|
categories: string[];
|
|
26
30
|
tags: string[];
|
|
27
|
-
}): number;
|
|
31
|
+
}, idfMap?: IDFMap | null): number;
|
|
28
32
|
/**
|
|
29
33
|
* Filters out low-relevance results relative to the top score.
|
|
30
34
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search-utils.d.ts","sourceRoot":"","sources":["../../src/search/search-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"search-utils.d.ts","sourceRoot":"","sources":["../../src/search/search-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAGvC;;GAEG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAI/C;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAS7D;AAYD;;;;;;GAMG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE,MAAM,EAAE,EAChB,GAAG,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAC;IAAC,IAAI,EAAE,MAAM,EAAE,CAAA;CAAE,EACnH,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,GACrB,MAAM,CAwBR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,CAAC,SAAS;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,EAC5D,OAAO,EAAE,CAAC,EAAE,EACZ,iBAAiB,SAAO,EACxB,gBAAgB,SAAI,GACnB,CAAC,EAAE,CAML;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAAC,EAC/E,QAAQ,SAAI,EACZ,aAAa,SAAI,GAChB,MAAM,EAAE,CAuBV"}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Text normalization and tokenization utilities for search.
|
|
3
3
|
*/
|
|
4
|
+
import { getIDFWeight } from './idf.js';
|
|
4
5
|
/**
|
|
5
6
|
* Normalizes text for search: lowercase, remove punctuation, normalize whitespace.
|
|
6
7
|
*/
|
|
@@ -16,7 +17,6 @@ export function normalizeText(text) {
|
|
|
16
17
|
*/
|
|
17
18
|
export function tokenize(text) {
|
|
18
19
|
const normalized = normalizeText(text);
|
|
19
|
-
// Split on whitespace for multi-word queries
|
|
20
20
|
const parts = normalized.split(/\s+/).filter(Boolean);
|
|
21
21
|
return dedupeByContainment(parts);
|
|
22
22
|
}
|
|
@@ -33,11 +33,23 @@ export function dedupeByContainment(terms) {
|
|
|
33
33
|
}
|
|
34
34
|
return kept;
|
|
35
35
|
}
|
|
36
|
+
/** Positional weight multipliers for each document field */
|
|
37
|
+
const FIELD_WEIGHTS = {
|
|
38
|
+
title: 8,
|
|
39
|
+
keyPoints: 5,
|
|
40
|
+
categories: 4,
|
|
41
|
+
tags: 3,
|
|
42
|
+
excerpt: 3,
|
|
43
|
+
content: 1,
|
|
44
|
+
};
|
|
36
45
|
/**
|
|
37
|
-
* Computes a relevance score for a document against
|
|
38
|
-
*
|
|
46
|
+
* Computes a relevance score for a document against query tokens.
|
|
47
|
+
*
|
|
48
|
+
* When an IDF map is provided, each token's contribution is weighted by its
|
|
49
|
+
* inverse document frequency — rare terms contribute more, common terms less.
|
|
50
|
+
* Falls back to uniform weighting when IDF is unavailable.
|
|
39
51
|
*/
|
|
40
|
-
export function scoreDocument(tokens, doc) {
|
|
52
|
+
export function scoreDocument(tokens, doc, idfMap) {
|
|
41
53
|
if (!tokens.length)
|
|
42
54
|
return 0;
|
|
43
55
|
let score = 0;
|
|
@@ -50,23 +62,19 @@ export function scoreDocument(tokens, doc) {
|
|
|
50
62
|
for (const token of tokens) {
|
|
51
63
|
if (!token)
|
|
52
64
|
continue;
|
|
53
|
-
|
|
65
|
+
const idf = getIDFWeight(idfMap ?? null, token);
|
|
54
66
|
if (title.includes(token))
|
|
55
|
-
score +=
|
|
56
|
-
// KeyPoints: high weight
|
|
67
|
+
score += FIELD_WEIGHTS.title * idf;
|
|
57
68
|
if (keyPointsText.includes(token))
|
|
58
|
-
score +=
|
|
59
|
-
// Categories/tags: medium weight
|
|
69
|
+
score += FIELD_WEIGHTS.keyPoints * idf;
|
|
60
70
|
if (categoriesText.includes(token))
|
|
61
|
-
score +=
|
|
71
|
+
score += FIELD_WEIGHTS.categories * idf;
|
|
62
72
|
if (tagsText.includes(token))
|
|
63
|
-
score +=
|
|
64
|
-
// Excerpt: medium weight
|
|
73
|
+
score += FIELD_WEIGHTS.tags * idf;
|
|
65
74
|
if (excerpt.includes(token))
|
|
66
|
-
score +=
|
|
67
|
-
// Content sample: low weight
|
|
75
|
+
score += FIELD_WEIGHTS.excerpt * idf;
|
|
68
76
|
if (contentSample.includes(token))
|
|
69
|
-
score +=
|
|
77
|
+
score += FIELD_WEIGHTS.content * idf;
|
|
70
78
|
}
|
|
71
79
|
return score;
|
|
72
80
|
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Optional TF-IDF vector reranker.
|
|
3
|
+
*
|
|
4
|
+
* When a pre-built vector index is available (from `vectorize.ts`),
|
|
5
|
+
* uses cosine similarity to refine the ranking of search results.
|
|
6
|
+
* Gracefully degrades to a no-op when no vector index is loaded.
|
|
7
|
+
*/
|
|
8
|
+
import type { ArticleContext } from './types.js';
|
|
9
|
+
export interface VectorChunk {
|
|
10
|
+
postId: string;
|
|
11
|
+
title: string;
|
|
12
|
+
lang: string;
|
|
13
|
+
chunkIndex: number;
|
|
14
|
+
text: string;
|
|
15
|
+
vector?: number[];
|
|
16
|
+
}
|
|
17
|
+
export interface VectorIndex {
|
|
18
|
+
version: number;
|
|
19
|
+
method: 'tfidf' | 'openai';
|
|
20
|
+
createdAt: string;
|
|
21
|
+
vocabulary?: string[];
|
|
22
|
+
chunks: VectorChunk[];
|
|
23
|
+
}
|
|
24
|
+
export declare function loadVectorIndex(data: VectorIndex | null): void;
|
|
25
|
+
export declare function clearVectorIndex(): void;
|
|
26
|
+
export declare function hasVectorIndex(): boolean;
|
|
27
|
+
/**
|
|
28
|
+
* Reranks article search results using vector cosine similarity.
|
|
29
|
+
*
|
|
30
|
+
* For each candidate article, finds the best-matching chunk in the vector
|
|
31
|
+
* index and uses the cosine similarity as a reranking signal.
|
|
32
|
+
*
|
|
33
|
+
* Final score = original_score * (1 - alpha) + vector_score * alpha
|
|
34
|
+
*
|
|
35
|
+
* @param alpha - Blend factor (0 = ignore vectors, 1 = vectors only). Default 0.3
|
|
36
|
+
*/
|
|
37
|
+
export declare function rerankWithVectors<T extends Pick<ArticleContext, 'url' | 'score'>>(query: string, candidates: T[], alpha?: number): T[];
|
|
38
|
+
//# sourceMappingURL=vector-reranker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"vector-reranker.d.ts","sourceRoot":"","sources":["../../src/search/vector-reranker.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAIjD,MAAM,WAAW,WAAW;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,EAAE,CAAC;CACnB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,GAAG,QAAQ,CAAC;IAC3B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,MAAM,EAAE,WAAW,EAAE,CAAC;CACvB;AAOD,wBAAgB,eAAe,CAAC,IAAI,EAAE,WAAW,GAAG,IAAI,GAAG,IAAI,CAO9D;AAED,wBAAgB,gBAAgB,IAAI,IAAI,CAGvC;AAED,wBAAgB,cAAc,IAAI,OAAO,CAExC;AAID;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,SAAS,IAAI,CAAC,cAAc,EAAE,KAAK,GAAG,OAAO,CAAC,EAC/E,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,CAAC,EAAE,EACf,KAAK,SAAM,GACV,CAAC,EAAE,CAkCL"}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Optional TF-IDF vector reranker.
|
|
3
|
+
*
|
|
4
|
+
* When a pre-built vector index is available (from `vectorize.ts`),
|
|
5
|
+
* uses cosine similarity to refine the ranking of search results.
|
|
6
|
+
* Gracefully degrades to a no-op when no vector index is loaded.
|
|
7
|
+
*/
|
|
8
|
+
// ── State ────────────────────────────────────────────────────
|
|
9
|
+
let loadedIndex = null;
|
|
10
|
+
let idfCache = null;
|
|
11
|
+
export function loadVectorIndex(data) {
|
|
12
|
+
loadedIndex = data;
|
|
13
|
+
idfCache = null;
|
|
14
|
+
if (data?.vocabulary && data.chunks.length > 0) {
|
|
15
|
+
idfCache = buildIDFFromVocab(data.vocabulary, data.chunks);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export function clearVectorIndex() {
|
|
19
|
+
loadedIndex = null;
|
|
20
|
+
idfCache = null;
|
|
21
|
+
}
|
|
22
|
+
export function hasVectorIndex() {
|
|
23
|
+
return loadedIndex !== null && loadedIndex.chunks.length > 0;
|
|
24
|
+
}
|
|
25
|
+
// ── Core: Rerank ──────────────────────────────────────────────
|
|
26
|
+
/**
|
|
27
|
+
* Reranks article search results using vector cosine similarity.
|
|
28
|
+
*
|
|
29
|
+
* For each candidate article, finds the best-matching chunk in the vector
|
|
30
|
+
* index and uses the cosine similarity as a reranking signal.
|
|
31
|
+
*
|
|
32
|
+
* Final score = original_score * (1 - alpha) + vector_score * alpha
|
|
33
|
+
*
|
|
34
|
+
* @param alpha - Blend factor (0 = ignore vectors, 1 = vectors only). Default 0.3
|
|
35
|
+
*/
|
|
36
|
+
export function rerankWithVectors(query, candidates, alpha = 0.3) {
|
|
37
|
+
if (!loadedIndex || !idfCache || !loadedIndex.vocabulary) {
|
|
38
|
+
return candidates;
|
|
39
|
+
}
|
|
40
|
+
const queryVector = computeQueryVector(query, loadedIndex.vocabulary, idfCache);
|
|
41
|
+
if (!queryVector)
|
|
42
|
+
return candidates;
|
|
43
|
+
// Map postId → best chunk cosine similarity
|
|
44
|
+
const articleScores = new Map();
|
|
45
|
+
for (const chunk of loadedIndex.chunks) {
|
|
46
|
+
if (!chunk.vector)
|
|
47
|
+
continue;
|
|
48
|
+
const sim = cosineSimilarity(queryVector, chunk.vector);
|
|
49
|
+
const current = articleScores.get(chunk.postId) ?? 0;
|
|
50
|
+
if (sim > current) {
|
|
51
|
+
articleScores.set(chunk.postId, sim);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (articleScores.size === 0)
|
|
55
|
+
return candidates;
|
|
56
|
+
// Normalize original scores to 0-1
|
|
57
|
+
const maxOriginal = Math.max(...candidates.map(c => c.score ?? 0), 1);
|
|
58
|
+
const reranked = candidates.map(article => {
|
|
59
|
+
const slug = extractSlugFromUrl(article.url);
|
|
60
|
+
const vectorScore = articleScores.get(slug) ?? 0;
|
|
61
|
+
const originalNorm = (article.score ?? 0) / maxOriginal;
|
|
62
|
+
const blended = originalNorm * (1 - alpha) + vectorScore * alpha;
|
|
63
|
+
return { ...article, score: blended };
|
|
64
|
+
});
|
|
65
|
+
return reranked.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
66
|
+
}
|
|
67
|
+
// ── Math Utilities ────────────────────────────────────────────
|
|
68
|
+
function cosineSimilarity(a, b) {
|
|
69
|
+
if (a.length !== b.length)
|
|
70
|
+
return 0;
|
|
71
|
+
let dot = 0, magA = 0, magB = 0;
|
|
72
|
+
for (let i = 0; i < a.length; i++) {
|
|
73
|
+
dot += a[i] * b[i];
|
|
74
|
+
magA += a[i] * a[i];
|
|
75
|
+
magB += b[i] * b[i];
|
|
76
|
+
}
|
|
77
|
+
const mag = Math.sqrt(magA) * Math.sqrt(magB);
|
|
78
|
+
return mag === 0 ? 0 : dot / mag;
|
|
79
|
+
}
|
|
80
|
+
function computeQueryVector(query, vocabulary, idf) {
|
|
81
|
+
const tokens = tokenizeForVector(query);
|
|
82
|
+
if (!tokens.length)
|
|
83
|
+
return null;
|
|
84
|
+
const tf = new Map();
|
|
85
|
+
for (const t of tokens)
|
|
86
|
+
tf.set(t, (tf.get(t) || 0) + 1);
|
|
87
|
+
const maxTf = Math.max(...tf.values(), 1);
|
|
88
|
+
const vector = vocabulary.map(term => {
|
|
89
|
+
const termTf = (tf.get(term) || 0) / maxTf;
|
|
90
|
+
const termIdf = idf.get(term) || 0;
|
|
91
|
+
return termTf * termIdf;
|
|
92
|
+
});
|
|
93
|
+
// Check if vector is all zeros
|
|
94
|
+
if (vector.every(v => v === 0))
|
|
95
|
+
return null;
|
|
96
|
+
return vector;
|
|
97
|
+
}
|
|
98
|
+
function tokenizeForVector(text) {
|
|
99
|
+
const CJK = /[\u4e00-\u9fff\u3400-\u4dbf]/g;
|
|
100
|
+
const cjkChars = text.match(CJK) || [];
|
|
101
|
+
const latin = text
|
|
102
|
+
.replace(CJK, ' ')
|
|
103
|
+
.toLowerCase()
|
|
104
|
+
.split(/\W+/)
|
|
105
|
+
.filter(w => w.length > 2);
|
|
106
|
+
return [...cjkChars, ...latin];
|
|
107
|
+
}
|
|
108
|
+
function buildIDFFromVocab(vocabulary, chunks) {
|
|
109
|
+
const N = chunks.length;
|
|
110
|
+
const df = new Map();
|
|
111
|
+
for (const chunk of chunks) {
|
|
112
|
+
const tokens = new Set(tokenizeForVector(chunk.text));
|
|
113
|
+
for (const term of vocabulary) {
|
|
114
|
+
if (tokens.has(term)) {
|
|
115
|
+
df.set(term, (df.get(term) || 0) + 1);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
const idf = new Map();
|
|
120
|
+
for (const term of vocabulary) {
|
|
121
|
+
const docCount = df.get(term) || 0;
|
|
122
|
+
idf.set(term, Math.log(N / (docCount + 1)) + 1);
|
|
123
|
+
}
|
|
124
|
+
return idf;
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Extracts a slug/postId from a URL path like "/zh/posts/some-slug/" → "zh/some-slug"
|
|
128
|
+
*/
|
|
129
|
+
function extractSlugFromUrl(url) {
|
|
130
|
+
const path = url.replace(/^https?:\/\/[^/]+/, '');
|
|
131
|
+
const match = path.match(/^\/([\w-]+)\/posts\/(.+?)\/?$/);
|
|
132
|
+
if (match)
|
|
133
|
+
return `${match[1]}/${match[2]}`;
|
|
134
|
+
return path.replace(/^\/|\/$/g, '');
|
|
135
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chat-handler.d.ts","sourceRoot":"","sources":["../../src/server/chat-handler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"chat-handler.d.ts","sourceRoot":"","sources":["../../src/server/chat-handler.ts"],"names":[],"mappings":"AAiDA,OAAO,KAAK,EAAE,kBAAkB,EAAgC,MAAM,YAAY,CAAC;AA4HnF,wBAAsB,iBAAiB,CAAC,OAAO,EAAE,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC,CA0CtF"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { createUIMessageStream, createUIMessageStreamResponse, streamText, convertToModelMessages, } from 'ai';
|
|
2
2
|
import { t, getLang } from '../utils/i18n.js';
|
|
3
|
-
import { getClientIP, checkRateLimit, rateLimitResponse, searchArticles, searchProjects, getSessionCacheKey, getCachedContext, setCachedContext, shouldReuseSearchContext, buildLocalSearchQuery, shouldRunKeywordExtraction, extractSearchKeywords, KEYWORD_EXTRACTION_TIMEOUT_MS, shouldSkipAnalysis, analyzeRetrievedEvidence, buildEvidenceSection, EVIDENCE_ANALYSIS_TIMEOUT_MS, getCitationGuardPreflight, buildSystemPrompt, getAuthorContext, getVoiceProfile, mergeResults, getProviderManager, createCacheAdapter, detectPublicQuestion, getGlobalSearchCache, shouldAppendCitations, formatCitationBlock, selectCitations, setGlobalSearchCache, getGlobalCacheTTL, getResponseCache, setResponseCache, getResponseCacheConfig, rankArticlesByIntent, } from '../index.js';
|
|
3
|
+
import { getClientIP, checkRateLimit, rateLimitResponse, searchArticles, searchProjects, getSessionCacheKey, getCachedContext, setCachedContext, shouldReuseSearchContext, buildLocalSearchQuery, shouldRunKeywordExtraction, extractSearchKeywords, KEYWORD_EXTRACTION_TIMEOUT_MS, shouldSkipAnalysis, analyzeRetrievedEvidence, buildEvidenceSection, EVIDENCE_ANALYSIS_TIMEOUT_MS, getCitationGuardPreflight, buildSystemPrompt, getAuthorContext, getVoiceProfile, mergeResults, getProviderManager, createCacheAdapter, detectPublicQuestion, getGlobalSearchCache, shouldAppendCitations, formatCitationBlock, selectCitations, setGlobalSearchCache, getGlobalCacheTTL, getResponseCache, setResponseCache, getResponseCacheConfig, rankArticlesByIntent, matchFactsToQuery, buildFactSection, } from '../index.js';
|
|
4
4
|
import { createChatStatusData } from './types.js';
|
|
5
5
|
import { errors, corsPreflightResponse } from './errors.js';
|
|
6
6
|
import { notifyAiChat } from './notify.js';
|
|
@@ -187,10 +187,12 @@ async function runPipeline(args) {
|
|
|
187
187
|
let responseText = '';
|
|
188
188
|
if (adapter) {
|
|
189
189
|
const articlePrompt = buildArticleContextPrompt(context);
|
|
190
|
+
const matchedFacts = matchFactsToQuery(cachedSearch.query, lang);
|
|
191
|
+
const factPromptSection = buildFactSection(matchedFacts, lang);
|
|
190
192
|
const systemPrompt = buildSystemPrompt({
|
|
191
193
|
static: { authorName: env.SITE_AUTHOR || '博主', siteUrl: env.SITE_URL || '', lang },
|
|
192
194
|
semiStatic: { authorContext: getAuthorContext(), voiceProfile: getVoiceProfile() },
|
|
193
|
-
dynamic: { userQuery: cachedSearch.query, articles: cachedSearch.articles, projects: cachedSearch.projects, evidenceSection: articlePrompt },
|
|
195
|
+
dynamic: { userQuery: cachedSearch.query, articles: cachedSearch.articles, projects: cachedSearch.projects, evidenceSection: articlePrompt, factSection: factPromptSection },
|
|
194
196
|
});
|
|
195
197
|
const llmResult = await streamLLMResponse({ writer: w, adapter, systemPrompt, messages, lang });
|
|
196
198
|
responseText = llmResult.responseText;
|
|
@@ -326,6 +328,9 @@ async function runPipeline(args) {
|
|
|
326
328
|
projects: relatedProjects,
|
|
327
329
|
lang,
|
|
328
330
|
});
|
|
331
|
+
// ── Fact Registry ───────────────────────────────────────────
|
|
332
|
+
const matchedFacts = matchFactsToQuery(latestText, lang);
|
|
333
|
+
const factPromptSection = buildFactSection(matchedFacts, lang);
|
|
329
334
|
// ── Build System Prompt ─────────────────────────────────────
|
|
330
335
|
const articlePrompt = buildArticleContextPrompt(context);
|
|
331
336
|
const systemPrompt = buildSystemPrompt({
|
|
@@ -345,6 +350,7 @@ async function runPipeline(args) {
|
|
|
345
350
|
evidenceSection: articlePrompt
|
|
346
351
|
? `${evidenceSection}\n${articlePrompt}`
|
|
347
352
|
: evidenceSection,
|
|
353
|
+
factSection: factPromptSection,
|
|
348
354
|
lang,
|
|
349
355
|
},
|
|
350
356
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metadata-init.d.ts","sourceRoot":"","sources":["../../src/server/metadata-init.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAIjE;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,CAAC,EAAE,cAAc,GAAG,IAAI,
|
|
1
|
+
{"version":3,"file":"metadata-init.d.ts","sourceRoot":"","sources":["../../src/server/metadata-init.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAIjE;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,CAAC,EAAE,cAAc,GAAG,IAAI,CAsCrF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,IAAI,CAExC"}
|
|
@@ -12,6 +12,8 @@ export function initializeMetadata(config, env) {
|
|
|
12
12
|
summaries: config.summaries,
|
|
13
13
|
authorContext: config.authorContext,
|
|
14
14
|
voiceProfile: config.voiceProfile,
|
|
15
|
+
factRegistry: (config.factRegistry ?? null),
|
|
16
|
+
vectorIndex: (config.vectorIndex ?? null),
|
|
15
17
|
});
|
|
16
18
|
const authorCtx = getAuthorContext();
|
|
17
19
|
const allSummaries = getAllSummaries();
|
package/dist/server/types.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/server/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAIlD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC5B,OAAO,CAAC,EAAE,kBAAkB,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAID,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,SAAS,EAAE,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAe,SAAQ,kBAAkB,EAAE,QAAQ;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,cAAc,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC;CACjD;AAID,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,CAAC;AAE/D,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,eAAe,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,IAAI,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG;IAAE,IAAI,CAAC,EAAE,OAAO,CAAA;CAAE,GAChE,cAAc,CAMhB;AAED,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAIxE;AAID,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;IACvB,YAAY,EAAE,OAAO,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/server/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAIlD,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;IAC5B,OAAO,CAAC,EAAE,kBAAkB,CAAC;CAC9B;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAID,MAAM,WAAW,eAAe;IAC9B,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,SAAS,EAAE,CAAC;IACtB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,WAAW,cAAe,SAAQ,kBAAkB,EAAE,QAAQ;IAClE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,kBAAkB;IACjC,GAAG,EAAE,cAAc,CAAC;IACpB,OAAO,EAAE,OAAO,CAAC;IACjB,SAAS,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC;CACjD;AAID,MAAM,MAAM,eAAe,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,CAAC;AAE/D,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,eAAe,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,IAAI,CAAC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC,GAAG;IAAE,IAAI,CAAC,EAAE,OAAO,CAAA;CAAE,GAChE,cAAc,CAMhB;AAED,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,cAAc,CAIxE;AAID,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,OAAO,CAAC;IACnB,aAAa,EAAE,OAAO,CAAC;IACvB,YAAY,EAAE,OAAO,CAAC;IACtB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@astro-minimax/ai",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Vendor-agnostic AI integration package with full RAG pipeline for astro-minimax blogs — supports OpenAI, Cloudflare AI, and custom providers.",
|
|
6
6
|
"author": "Souloss",
|
|
@@ -57,6 +57,10 @@
|
|
|
57
57
|
"types": "./dist/data/index.d.ts",
|
|
58
58
|
"import": "./dist/data/index.js"
|
|
59
59
|
},
|
|
60
|
+
"./fact-registry": {
|
|
61
|
+
"types": "./dist/fact-registry/index.d.ts",
|
|
62
|
+
"import": "./dist/fact-registry/index.js"
|
|
63
|
+
},
|
|
60
64
|
"./stream": {
|
|
61
65
|
"types": "./dist/stream/index.d.ts",
|
|
62
66
|
"import": "./dist/stream/index.js"
|
|
@@ -72,6 +76,9 @@
|
|
|
72
76
|
"files": [
|
|
73
77
|
"dist/",
|
|
74
78
|
"src/components/",
|
|
79
|
+
"src/providers/mock.ts",
|
|
80
|
+
"src/server/types.ts",
|
|
81
|
+
"src/utils/i18n.ts",
|
|
75
82
|
"src/styles/",
|
|
76
83
|
"README.md"
|
|
77
84
|
],
|
|
@@ -80,7 +87,7 @@
|
|
|
80
87
|
"@ai-sdk/openai-compatible": "^2.0.35",
|
|
81
88
|
"ai": "^6.0.116",
|
|
82
89
|
"workers-ai-provider": "^3.1.2",
|
|
83
|
-
"@astro-minimax/notify": "0.7.
|
|
90
|
+
"@astro-minimax/notify": "0.7.3"
|
|
84
91
|
},
|
|
85
92
|
"optionalDependencies": {
|
|
86
93
|
"undici": "^6.0.0"
|
|
@@ -108,7 +115,7 @@
|
|
|
108
115
|
"pnpm": ">=9.0.0"
|
|
109
116
|
},
|
|
110
117
|
"scripts": {
|
|
111
|
-
"build": "tsc -p tsconfig.build.json",
|
|
118
|
+
"build": "tsc -p tsconfig.build.json && chmod +x dist/server/dev-server.js",
|
|
112
119
|
"build:watch": "tsc -p tsconfig.build.json --watch",
|
|
113
120
|
"typecheck": "tsc --noEmit",
|
|
114
121
|
"clean": "rm -rf dist"
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
/** @jsxImportSource preact */
|
|
1
2
|
import { useState, useCallback } from 'preact/hooks';
|
|
2
|
-
import { ChatPanel } from './ChatPanel.
|
|
3
|
-
import type { AIChatConfig } from './ChatPanel.
|
|
4
|
-
import type { ArticleChatContext } from '../server/types.
|
|
3
|
+
import { ChatPanel } from './ChatPanel.tsx';
|
|
4
|
+
import type { AIChatConfig } from './ChatPanel.tsx';
|
|
5
|
+
import type { ArticleChatContext } from '../server/types.ts';
|
|
5
6
|
|
|
6
7
|
interface Props {
|
|
7
8
|
config: AIChatConfig;
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
/** @jsxImportSource preact */
|
|
1
2
|
import { useCallback, useEffect, useMemo, useRef, useState } from 'preact/hooks';
|
|
2
3
|
import { useChat } from '@ai-sdk/react';
|
|
3
4
|
import { DefaultChatTransport } from 'ai';
|
|
4
5
|
import type { UIMessage } from 'ai';
|
|
5
|
-
import { getMockResponse, createMockStream } from '../providers/mock.
|
|
6
|
-
import type { ArticleChatContext, ChatStatusData } from '../server/types.
|
|
7
|
-
import { isChatStatusData } from '../server/types.
|
|
8
|
-
import { t, getLang } from '../utils/i18n.
|
|
6
|
+
import { getMockResponse, createMockStream } from '../providers/mock.ts';
|
|
7
|
+
import type { ArticleChatContext, ChatStatusData } from '../server/types.ts';
|
|
8
|
+
import { isChatStatusData } from '../server/types.ts';
|
|
9
|
+
import { t, getLang } from '../utils/i18n.ts';
|
|
9
10
|
|
|
10
11
|
export interface AIChatConfig {
|
|
11
12
|
enabled?: boolean;
|