@astro-minimax/ai 0.7.5 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/components/AIChatContainer.d.ts +9 -0
- package/dist/components/AIChatContainer.d.ts.map +1 -0
- package/dist/components/AIChatContainer.js +936 -0
- package/{src → dist}/components/AIChatWidget.astro +1 -1
- package/dist/components/ChatPanel.d.ts +19 -0
- package/dist/components/ChatPanel.d.ts.map +1 -0
- package/dist/components/ChatPanel.js +914 -0
- package/dist/data/index.js +18 -1
- package/dist/fact-registry/index.js +16 -3
- package/dist/index.js +11 -30
- package/dist/intelligence/evidence-analysis.d.ts.map +1 -1
- package/dist/intelligence/index.js +56 -5
- package/dist/intelligence/keyword-extract.d.ts.map +1 -1
- package/dist/middleware/index.js +10 -1
- package/dist/prompt/index.js +10 -4
- package/dist/provider-manager/base.d.ts +1 -0
- package/dist/provider-manager/base.d.ts.map +1 -1
- package/dist/provider-manager/types.d.ts +1 -0
- package/dist/provider-manager/types.d.ts.map +1 -1
- package/dist/providers/index.js +5 -1
- package/dist/search/index.js +48 -6
- package/dist/server/dev-server.js +236 -259
- package/dist/server/index.js +39 -6
- package/dist/stream/index.js +8 -2
- package/package.json +16 -10
- package/dist/cache/global-cache.js +0 -141
- package/dist/cache/index.js +0 -62
- package/dist/cache/kv-adapter.js +0 -102
- package/dist/cache/memory-adapter.js +0 -95
- package/dist/cache/response-cache.js +0 -85
- package/dist/cache/types.js +0 -16
- package/dist/data/metadata-loader.js +0 -66
- package/dist/data/types.js +0 -1
- package/dist/fact-registry/fact-matcher.js +0 -94
- package/dist/fact-registry/prompt-injector.js +0 -57
- package/dist/fact-registry/registry.js +0 -38
- package/dist/fact-registry/types.js +0 -5
- package/dist/intelligence/citation-appender.js +0 -65
- package/dist/intelligence/citation-guard.js +0 -125
- package/dist/intelligence/evidence-analysis.js +0 -88
- package/dist/intelligence/intent-detect.js +0 -131
- package/dist/intelligence/keyword-extract.js +0 -114
- package/dist/intelligence/response-templates.js +0 -116
- package/dist/intelligence/types.js +0 -1
- package/dist/middleware/rate-limiter.js +0 -129
- package/dist/prompt/dynamic-layer.js +0 -67
- package/dist/prompt/prompt-builder.js +0 -12
- package/dist/prompt/semi-static-layer.js +0 -29
- package/dist/prompt/static-layer.js +0 -150
- package/dist/prompt/types.js +0 -1
- package/dist/provider-manager/base.js +0 -47
- package/dist/provider-manager/config.js +0 -134
- package/dist/provider-manager/index.js +0 -6
- package/dist/provider-manager/manager.js +0 -121
- package/dist/provider-manager/mock.js +0 -56
- package/dist/provider-manager/openai.js +0 -112
- package/dist/provider-manager/types.js +0 -6
- package/dist/provider-manager/workers.js +0 -74
- package/dist/providers/mock.js +0 -234
- package/dist/search/idf.js +0 -31
- package/dist/search/search-api.js +0 -119
- package/dist/search/search-index.js +0 -35
- package/dist/search/search-utils.js +0 -122
- package/dist/search/session-cache.js +0 -92
- package/dist/search/types.js +0 -1
- package/dist/search/vector-reranker.js +0 -135
- package/dist/server/chat-handler.js +0 -590
- package/dist/server/errors.js +0 -41
- package/dist/server/metadata-init.js +0 -47
- package/dist/server/notify.js +0 -74
- package/dist/server/stream-helpers.js +0 -197
- package/dist/server/types.js +0 -13
- package/dist/stream/mock-stream.js +0 -27
- package/dist/stream/response.js +0 -22
- package/dist/utils/i18n.js +0 -164
- package/src/components/AIChatContainer.tsx +0 -31
- package/src/components/ChatPanel.tsx +0 -866
- package/src/providers/mock.ts +0 -240
- package/src/server/types.ts +0 -89
- package/src/utils/i18n.ts +0 -238
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
import { queryFacts } from './registry.js';
|
|
2
|
-
/**
|
|
3
|
-
* Category detection keywords — when any keyword appears in the user query,
|
|
4
|
-
* the corresponding fact category is considered relevant.
|
|
5
|
-
*/
|
|
6
|
-
const CATEGORY_KEYWORDS = {
|
|
7
|
-
author: [
|
|
8
|
-
'作者', '博主', '谁', '关于我', '自我介绍', '个人',
|
|
9
|
-
'author', 'who', 'about me', 'introduce',
|
|
10
|
-
],
|
|
11
|
-
blog: [
|
|
12
|
-
'博客', '文章', '多少', '数量', '统计', '总共', '分类', '标签', '语言',
|
|
13
|
-
'blog', 'post', 'how many', 'count', 'statistic', 'category', 'tag',
|
|
14
|
-
],
|
|
15
|
-
content: [
|
|
16
|
-
'写过', '提到', '讨论', '观点', '主题', '话题', '涵盖', '领域',
|
|
17
|
-
'wrote', 'mention', 'discuss', 'topic', 'cover', 'area', 'opinion',
|
|
18
|
-
],
|
|
19
|
-
project: [
|
|
20
|
-
'项目', '开源', '仓库', '工具', '产品',
|
|
21
|
-
'project', 'open source', 'repo', 'github', 'tool', 'product',
|
|
22
|
-
],
|
|
23
|
-
tech: [
|
|
24
|
-
'技术', '技术栈', '框架', '库', '编程语言', '前端', '后端',
|
|
25
|
-
'tech', 'stack', 'framework', 'library', 'language', 'frontend', 'backend',
|
|
26
|
-
],
|
|
27
|
-
};
|
|
28
|
-
/**
|
|
29
|
-
* Detect which fact categories are relevant to the user query.
|
|
30
|
-
*/
|
|
31
|
-
function detectRelevantCategories(query) {
|
|
32
|
-
const q = query.toLowerCase();
|
|
33
|
-
const matched = [];
|
|
34
|
-
for (const [category, keywords] of Object.entries(CATEGORY_KEYWORDS)) {
|
|
35
|
-
if (keywords.some(kw => q.includes(kw))) {
|
|
36
|
-
matched.push(category);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
return matched;
|
|
40
|
-
}
|
|
41
|
-
/**
|
|
42
|
-
* Extract potential matching tags from the query by splitting into tokens.
|
|
43
|
-
*/
|
|
44
|
-
function extractQueryTags(query) {
|
|
45
|
-
const tokens = query.match(/[A-Za-z][A-Za-z0-9.+#-]{1,}|[\u4e00-\u9fa5]{2,6}/g);
|
|
46
|
-
return tokens?.map(t => t.toLowerCase()) ?? [];
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Selects facts most relevant to the user's query.
|
|
50
|
-
*
|
|
51
|
-
* Strategy:
|
|
52
|
-
* 1. Always include very-high-confidence core facts (confidence >= 0.95)
|
|
53
|
-
* 2. Add category-matched facts based on query keywords
|
|
54
|
-
* 3. Add tag-matched facts for more specific queries
|
|
55
|
-
* 4. Deduplicate and cap total count
|
|
56
|
-
*/
|
|
57
|
-
export function matchFactsToQuery(query, lang, maxFacts = 15) {
|
|
58
|
-
const categories = detectRelevantCategories(query);
|
|
59
|
-
const queryTags = extractQueryTags(query);
|
|
60
|
-
// Layer 1: always-present core facts (highest confidence)
|
|
61
|
-
const coreFacts = queryFacts({
|
|
62
|
-
minConfidence: 0.95,
|
|
63
|
-
lang,
|
|
64
|
-
limit: 5,
|
|
65
|
-
});
|
|
66
|
-
// Layer 2: category-matched facts
|
|
67
|
-
const categoryFacts = categories.length > 0
|
|
68
|
-
? queryFacts({
|
|
69
|
-
categories,
|
|
70
|
-
minConfidence: 0.7,
|
|
71
|
-
lang,
|
|
72
|
-
limit: 10,
|
|
73
|
-
})
|
|
74
|
-
: [];
|
|
75
|
-
// Layer 3: tag-matched facts (for specificity)
|
|
76
|
-
const tagFacts = queryTags.length > 0
|
|
77
|
-
? queryFacts({
|
|
78
|
-
tags: queryTags,
|
|
79
|
-
minConfidence: 0.6,
|
|
80
|
-
lang,
|
|
81
|
-
limit: 5,
|
|
82
|
-
})
|
|
83
|
-
: [];
|
|
84
|
-
// Merge with deduplication, preserving priority order
|
|
85
|
-
const seen = new Set();
|
|
86
|
-
const result = [];
|
|
87
|
-
for (const fact of [...categoryFacts, ...tagFacts, ...coreFacts]) {
|
|
88
|
-
if (!seen.has(fact.id)) {
|
|
89
|
-
seen.add(fact.id);
|
|
90
|
-
result.push(fact);
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
return result.slice(0, maxFacts);
|
|
94
|
-
}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
const CATEGORY_LABELS = {
|
|
2
|
-
zh: {
|
|
3
|
-
author: '关于作者',
|
|
4
|
-
blog: '博客数据',
|
|
5
|
-
content: '内容事实',
|
|
6
|
-
project: '项目信息',
|
|
7
|
-
tech: '技术相关',
|
|
8
|
-
},
|
|
9
|
-
en: {
|
|
10
|
-
author: 'About the Author',
|
|
11
|
-
blog: 'Blog Statistics',
|
|
12
|
-
content: 'Content Facts',
|
|
13
|
-
project: 'Project Info',
|
|
14
|
-
tech: 'Tech Related',
|
|
15
|
-
},
|
|
16
|
-
};
|
|
17
|
-
const SECTION_TEXT = {
|
|
18
|
-
zh: {
|
|
19
|
-
title: '已验证事实(基于博客真实数据)',
|
|
20
|
-
instruction: '以上事实来自博客的真实数据。回答时优先使用这些已验证的事实,不要编造与之矛盾的信息。如果某个问题的答案不在已验证事实中,请如实说明。',
|
|
21
|
-
},
|
|
22
|
-
en: {
|
|
23
|
-
title: 'Verified Facts (based on real blog data)',
|
|
24
|
-
instruction: 'The above facts are derived from real blog data. Prioritize these verified facts when answering. Do not fabricate information that contradicts them. If the answer is not among verified facts, state that honestly.',
|
|
25
|
-
},
|
|
26
|
-
};
|
|
27
|
-
/**
|
|
28
|
-
* Formats matched facts into a prompt section ready for injection.
|
|
29
|
-
* Groups facts by category with clear structure.
|
|
30
|
-
*/
|
|
31
|
-
export function buildFactSection(facts, lang = 'zh') {
|
|
32
|
-
if (!facts.length)
|
|
33
|
-
return '';
|
|
34
|
-
const l = lang === 'zh' ? 'zh' : 'en';
|
|
35
|
-
const labels = CATEGORY_LABELS[l];
|
|
36
|
-
const text = SECTION_TEXT[l];
|
|
37
|
-
// Group by category
|
|
38
|
-
const grouped = new Map();
|
|
39
|
-
for (const fact of facts) {
|
|
40
|
-
const group = grouped.get(fact.category) ?? [];
|
|
41
|
-
group.push(fact);
|
|
42
|
-
grouped.set(fact.category, group);
|
|
43
|
-
}
|
|
44
|
-
const lines = [];
|
|
45
|
-
lines.push(`## ${text.title}`);
|
|
46
|
-
for (const [category, categoryFacts] of grouped) {
|
|
47
|
-
const label = labels[category] ?? category;
|
|
48
|
-
lines.push('');
|
|
49
|
-
lines.push(`### ${label}`);
|
|
50
|
-
for (const fact of categoryFacts) {
|
|
51
|
-
lines.push(`- ${fact.statement}`);
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
lines.push('');
|
|
55
|
-
lines.push(`> ${text.instruction}`);
|
|
56
|
-
return lines.join('\n');
|
|
57
|
-
}
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
let cachedRegistry = null;
|
|
2
|
-
export function loadFactRegistry(data) {
|
|
3
|
-
cachedRegistry = data;
|
|
4
|
-
}
|
|
5
|
-
export function clearFactRegistry() {
|
|
6
|
-
cachedRegistry = null;
|
|
7
|
-
}
|
|
8
|
-
export function getFactRegistry() {
|
|
9
|
-
return cachedRegistry;
|
|
10
|
-
}
|
|
11
|
-
/**
|
|
12
|
-
* Query facts with optional filters.
|
|
13
|
-
* Returns facts sorted by confidence (highest first).
|
|
14
|
-
*/
|
|
15
|
-
export function queryFacts(options = {}) {
|
|
16
|
-
if (!cachedRegistry?.facts.length)
|
|
17
|
-
return [];
|
|
18
|
-
let facts = cachedRegistry.facts;
|
|
19
|
-
if (options.categories?.length) {
|
|
20
|
-
const cats = new Set(options.categories);
|
|
21
|
-
facts = facts.filter(f => cats.has(f.category));
|
|
22
|
-
}
|
|
23
|
-
if (options.lang) {
|
|
24
|
-
facts = facts.filter(f => f.lang === options.lang || f.lang === 'all');
|
|
25
|
-
}
|
|
26
|
-
if (options.minConfidence !== undefined) {
|
|
27
|
-
facts = facts.filter(f => f.confidence >= options.minConfidence);
|
|
28
|
-
}
|
|
29
|
-
if (options.tags?.length) {
|
|
30
|
-
const tagSet = new Set(options.tags.map(t => t.toLowerCase()));
|
|
31
|
-
facts = facts.filter(f => f.tags.some(t => tagSet.has(t.toLowerCase())));
|
|
32
|
-
}
|
|
33
|
-
facts = [...facts].sort((a, b) => b.confidence - a.confidence);
|
|
34
|
-
if (options.limit && options.limit > 0) {
|
|
35
|
-
facts = facts.slice(0, options.limit);
|
|
36
|
-
}
|
|
37
|
-
return facts;
|
|
38
|
-
}
|
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
function hasExistingCitations(text, validUrls) {
|
|
2
|
-
const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
3
|
-
const matches = [...text.matchAll(linkPattern)];
|
|
4
|
-
return matches.some(m => validUrls.has(m[2]));
|
|
5
|
-
}
|
|
6
|
-
export function selectCitations(articles, projects, maxCitations, minScore) {
|
|
7
|
-
const candidates = [
|
|
8
|
-
...articles
|
|
9
|
-
.filter(a => (a.score ?? 0) >= minScore)
|
|
10
|
-
.map(a => ({ title: a.title, url: a.url, score: a.score ?? 0 })),
|
|
11
|
-
...projects
|
|
12
|
-
.filter(p => (p.score ?? 0) >= minScore)
|
|
13
|
-
.map(p => ({ title: p.name, url: p.url, score: p.score ?? 0 })),
|
|
14
|
-
];
|
|
15
|
-
return candidates
|
|
16
|
-
.sort((a, b) => b.score - a.score)
|
|
17
|
-
.slice(0, maxCitations);
|
|
18
|
-
}
|
|
19
|
-
export function formatCitationBlock(citations, lang) {
|
|
20
|
-
if (citations.length === 0)
|
|
21
|
-
return '';
|
|
22
|
-
const heading = lang === 'zh' ? '延伸阅读' : 'Further Reading';
|
|
23
|
-
const lines = [
|
|
24
|
-
'',
|
|
25
|
-
`**${heading}:**`,
|
|
26
|
-
...citations.map(c => `- [${c.title}](${c.url})`),
|
|
27
|
-
];
|
|
28
|
-
return lines.join('\n');
|
|
29
|
-
}
|
|
30
|
-
export function createCitationAppenderTransform(config) {
|
|
31
|
-
const { articles, projects, lang, maxCitations = 3, minScore = 5 } = config;
|
|
32
|
-
const validUrls = new Set([
|
|
33
|
-
...articles.map(a => a.url),
|
|
34
|
-
...projects.map(p => p.url),
|
|
35
|
-
]);
|
|
36
|
-
return (stream) => {
|
|
37
|
-
let fullText = '';
|
|
38
|
-
const transform = new TransformStream({
|
|
39
|
-
transform(chunk, controller) {
|
|
40
|
-
fullText += chunk;
|
|
41
|
-
controller.enqueue(chunk);
|
|
42
|
-
},
|
|
43
|
-
flush(controller) {
|
|
44
|
-
if (hasExistingCitations(fullText, validUrls)) {
|
|
45
|
-
return;
|
|
46
|
-
}
|
|
47
|
-
const citations = selectCitations(articles, projects, maxCitations, minScore);
|
|
48
|
-
if (citations.length === 0) {
|
|
49
|
-
return;
|
|
50
|
-
}
|
|
51
|
-
const citationBlock = formatCitationBlock(citations, lang);
|
|
52
|
-
controller.enqueue(citationBlock);
|
|
53
|
-
},
|
|
54
|
-
});
|
|
55
|
-
return stream.pipeThrough(transform);
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
export function shouldAppendCitations(response, articles, projects) {
|
|
59
|
-
const validUrls = new Set([
|
|
60
|
-
...articles.map(a => a.url),
|
|
61
|
-
...projects.map(p => p.url),
|
|
62
|
-
]);
|
|
63
|
-
return !hasExistingCitations(response, validUrls) &&
|
|
64
|
-
[...articles, ...projects].some(item => (item.score ?? 0) >= 5);
|
|
65
|
-
}
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
import { PRIVACY_REFUSAL_TEMPLATES, NO_ARTICLE_TEMPLATES, ARTICLE_COUNT_TEMPLATES, pickTemplate, pickTemplateWithVars, } from './response-templates.js';
|
|
2
|
-
const PRIVACY_PATTERNS = [
|
|
3
|
-
{ regex: /(住址|地址|住在哪|address|where.*live)/iu, key: 'address' },
|
|
4
|
-
{ regex: /(收入|工资|薪资|salary|income|earn)/iu, key: 'income' },
|
|
5
|
-
{ regex: /(家人|妻子|丈夫|孩子|父母|family|wife|husband|children|parent)/iu, key: 'family' },
|
|
6
|
-
{ regex: /(电话|手机号|phone|mobile)/iu, key: 'phone' },
|
|
7
|
-
{ regex: /(身份证|id\s*card|passport)/iu, key: 'id' },
|
|
8
|
-
{ regex: /(年龄|多大了|几岁|how old|age)/iu, key: 'age' },
|
|
9
|
-
];
|
|
10
|
-
/**
|
|
11
|
-
* Resolves the expected answer mode from the user query.
|
|
12
|
-
* Helps the system decide how to structure the response.
|
|
13
|
-
*/
|
|
14
|
-
export function resolveAnswerMode(query) {
|
|
15
|
-
const q = query.toLowerCase();
|
|
16
|
-
if (/几次|多少|几篇|数量|count|how many/u.test(q))
|
|
17
|
-
return 'count';
|
|
18
|
-
if (/哪些|哪几个|列表|列举|list|what are/u.test(q))
|
|
19
|
-
return 'list';
|
|
20
|
-
if (/怎么看|怎么想|看法|观点|opinion|think about/u.test(q))
|
|
21
|
-
return 'opinion';
|
|
22
|
-
if (/推荐|建议|suggest|recommend/u.test(q))
|
|
23
|
-
return 'recommendation';
|
|
24
|
-
if (/是什么|什么是|介绍|解释|what is|explain/u.test(q))
|
|
25
|
-
return 'fact';
|
|
26
|
-
if (/有没有|是否|是不是|真的吗|does|is there/u.test(q))
|
|
27
|
-
return 'fact';
|
|
28
|
-
return 'general';
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* Checks if the query is asking for sensitive personal information.
|
|
32
|
-
* Returns a privacy refusal if matched.
|
|
33
|
-
*/
|
|
34
|
-
function checkPrivacyRefusal(query, lang) {
|
|
35
|
-
for (const pattern of PRIVACY_PATTERNS) {
|
|
36
|
-
if (pattern.regex.test(query)) {
|
|
37
|
-
const templates = PRIVACY_REFUSAL_TEMPLATES[pattern.key];
|
|
38
|
-
const text = templates ? pickTemplate(templates, lang) : '';
|
|
39
|
-
return {
|
|
40
|
-
text,
|
|
41
|
-
actions: ['preflight_reject'],
|
|
42
|
-
};
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
return null;
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Pre-flight check: if the user is asking about something that can be
|
|
49
|
-
* answered directly from the available context without an LLM, return it.
|
|
50
|
-
* This prevents hallucination for specific factual queries.
|
|
51
|
-
*/
|
|
52
|
-
export function getCitationGuardPreflight(params) {
|
|
53
|
-
const { userQuery, articles, projects, lang = 'zh' } = params;
|
|
54
|
-
const q = userQuery.toLowerCase();
|
|
55
|
-
const privacyRefusal = checkPrivacyRefusal(userQuery, lang);
|
|
56
|
-
if (privacyRefusal)
|
|
57
|
-
return privacyRefusal;
|
|
58
|
-
if (/有几篇|有多少篇|文章数量|总共.*文章|how many.*article/u.test(q)) {
|
|
59
|
-
const total = articles.length;
|
|
60
|
-
if (total > 0) {
|
|
61
|
-
const text = pickTemplateWithVars(ARTICLE_COUNT_TEMPLATES, lang, { count: total });
|
|
62
|
-
return { text, actions: ['preflight_reject'] };
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
if (/有没有|是否有|有.*文章|写过.*吗|is there|any.*article/u.test(q)) {
|
|
66
|
-
if (articles.length === 0 && projects.length === 0) {
|
|
67
|
-
const text = pickTemplate(NO_ARTICLE_TEMPLATES, lang);
|
|
68
|
-
return { text, actions: ['preflight_reject'] };
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
return null;
|
|
72
|
-
}
|
|
73
|
-
/**
|
|
74
|
-
* Creates a transform stream that monitors the AI output for hallucinated references.
|
|
75
|
-
* Rewrites or suppresses fabricated article/project links.
|
|
76
|
-
*/
|
|
77
|
-
export function createCitationGuardTransform(params) {
|
|
78
|
-
const { articles, projects, onApplied } = params;
|
|
79
|
-
const validUrls = new Set([
|
|
80
|
-
...articles.map(a => a.url),
|
|
81
|
-
...projects.map(p => p.url),
|
|
82
|
-
]);
|
|
83
|
-
return (stream) => {
|
|
84
|
-
const actions = [];
|
|
85
|
-
let buffer = '';
|
|
86
|
-
const transform = new TransformStream({
|
|
87
|
-
transform(chunk, controller) {
|
|
88
|
-
buffer += chunk;
|
|
89
|
-
// Check for Markdown links: [text](url)
|
|
90
|
-
const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
91
|
-
let match;
|
|
92
|
-
let lastIndex = 0;
|
|
93
|
-
let output = '';
|
|
94
|
-
while ((match = linkPattern.exec(buffer)) !== null) {
|
|
95
|
-
const [fullMatch, text, url] = match;
|
|
96
|
-
output += buffer.slice(lastIndex, match.index);
|
|
97
|
-
if (url.startsWith('http') && !validUrls.has(url)) {
|
|
98
|
-
// Fabricated external URL — keep the text, remove the link
|
|
99
|
-
output += text;
|
|
100
|
-
actions.push('stream_rewrite');
|
|
101
|
-
}
|
|
102
|
-
else {
|
|
103
|
-
output += fullMatch;
|
|
104
|
-
}
|
|
105
|
-
lastIndex = match.index + fullMatch.length;
|
|
106
|
-
}
|
|
107
|
-
// Keep unparsed remainder in buffer (may be mid-link)
|
|
108
|
-
buffer = buffer.slice(lastIndex);
|
|
109
|
-
if (output) {
|
|
110
|
-
controller.enqueue(output);
|
|
111
|
-
}
|
|
112
|
-
},
|
|
113
|
-
flush(controller) {
|
|
114
|
-
if (buffer) {
|
|
115
|
-
controller.enqueue(buffer);
|
|
116
|
-
buffer = '';
|
|
117
|
-
}
|
|
118
|
-
if (actions.length > 0) {
|
|
119
|
-
onApplied?.({ actions });
|
|
120
|
-
}
|
|
121
|
-
},
|
|
122
|
-
});
|
|
123
|
-
return stream.pipeThrough(transform);
|
|
124
|
-
};
|
|
125
|
-
}
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
import { generateText } from 'ai';
|
|
2
|
-
export const EVIDENCE_ANALYSIS_TIMEOUT_MS = 8000;
|
|
3
|
-
export const EVIDENCE_ANALYSIS_MAX_TOKENS = 360;
|
|
4
|
-
/**
|
|
5
|
-
* Determines whether evidence analysis should be skipped.
|
|
6
|
-
* Skips for simple queries or when there's insufficient content to analyze.
|
|
7
|
-
*/
|
|
8
|
-
export function shouldSkipAnalysis(latestText, articleCount, complexity) {
|
|
9
|
-
if (articleCount < 2)
|
|
10
|
-
return true;
|
|
11
|
-
if (complexity === 'simple')
|
|
12
|
-
return true;
|
|
13
|
-
if (latestText.length < 15)
|
|
14
|
-
return true;
|
|
15
|
-
return false;
|
|
16
|
-
}
|
|
17
|
-
/**
|
|
18
|
-
* Uses an LLM to pre-analyze retrieved evidence and identify the most relevant pieces.
|
|
19
|
-
* This improves the quality of the final system prompt by pre-filtering noise.
|
|
20
|
-
*/
|
|
21
|
-
export async function analyzeRetrievedEvidence(params) {
|
|
22
|
-
const { userQuery, articles, projects, provider, model, maxOutputTokens = EVIDENCE_ANALYSIS_MAX_TOKENS, abortSignal } = params;
|
|
23
|
-
const evidenceSummary = buildEvidenceSummary(articles, projects);
|
|
24
|
-
const prompt = `用户问题:${userQuery}
|
|
25
|
-
|
|
26
|
-
检索到的相关内容:
|
|
27
|
-
${evidenceSummary}
|
|
28
|
-
|
|
29
|
-
请分析这些内容,提取与用户问题最相关的2-3个关键信息点。格式:
|
|
30
|
-
<evidence>
|
|
31
|
-
[关键信息点1]
|
|
32
|
-
[关键信息点2]
|
|
33
|
-
</evidence>
|
|
34
|
-
|
|
35
|
-
只返回evidence标签内的内容,简洁准确。`;
|
|
36
|
-
try {
|
|
37
|
-
const result = await generateText({
|
|
38
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
39
|
-
model: provider.chatModel(model),
|
|
40
|
-
prompt,
|
|
41
|
-
maxOutputTokens,
|
|
42
|
-
temperature: 0.1,
|
|
43
|
-
abortSignal,
|
|
44
|
-
});
|
|
45
|
-
const rawText = result.text?.trim() ?? '';
|
|
46
|
-
const match = rawText.match(/<evidence>([\s\S]*?)<\/evidence>/);
|
|
47
|
-
const analysis = match?.[1]?.trim();
|
|
48
|
-
const u = result.usage;
|
|
49
|
-
return {
|
|
50
|
-
analysis,
|
|
51
|
-
parseStatus: analysis ? 'ok' : 'no_match',
|
|
52
|
-
rawText,
|
|
53
|
-
usage: u ? {
|
|
54
|
-
inputTokens: u.inputTokens ?? 0,
|
|
55
|
-
outputTokens: u.outputTokens ?? 0,
|
|
56
|
-
totalTokens: (u.inputTokens ?? 0) + (u.outputTokens ?? 0),
|
|
57
|
-
} : undefined,
|
|
58
|
-
};
|
|
59
|
-
}
|
|
60
|
-
catch (error) {
|
|
61
|
-
return {
|
|
62
|
-
parseStatus: 'error',
|
|
63
|
-
error: error instanceof Error ? error.message : String(error),
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Formats the evidence analysis for injection into the system prompt.
|
|
69
|
-
*/
|
|
70
|
-
export function buildEvidenceSection(analysis) {
|
|
71
|
-
if (!analysis.trim())
|
|
72
|
-
return '';
|
|
73
|
-
return `\n## 关键证据分析\n${analysis}\n`;
|
|
74
|
-
}
|
|
75
|
-
function buildEvidenceSummary(articles, projects) {
|
|
76
|
-
const lines = [];
|
|
77
|
-
for (const article of articles.slice(0, 6)) {
|
|
78
|
-
lines.push(`文章: ${article.title}`);
|
|
79
|
-
if (article.summary)
|
|
80
|
-
lines.push(` 摘要: ${article.summary}`);
|
|
81
|
-
if (article.keyPoints.length)
|
|
82
|
-
lines.push(` 要点: ${article.keyPoints.slice(0, 3).join(', ')}`);
|
|
83
|
-
}
|
|
84
|
-
for (const project of projects.slice(0, 3)) {
|
|
85
|
-
lines.push(`项目: ${project.name} - ${project.description.slice(0, 100)}`);
|
|
86
|
-
}
|
|
87
|
-
return lines.join('\n');
|
|
88
|
-
}
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
import { tokenize, normalizeText } from '../search/search-utils.js';
|
|
2
|
-
import { SESSION_CACHE_TTL_MS } from '../search/session-cache.js';
|
|
3
|
-
const MAX_FOLLOW_UP_LENGTH = 48;
|
|
4
|
-
const INTENT_KEYWORDS = {
|
|
5
|
-
setup: ['搭建', '创建', '安装', 'install', 'setup', 'create', 'init', 'scaffold', '新建', '开始'],
|
|
6
|
-
config: ['配置', '设置', 'config', 'settings', '环境变量', '.env', 'wrangler', 'tsconfig', '主题色', '颜色'],
|
|
7
|
-
content: ['文章', '博客', '写作', 'markdown', 'mdx', '标签', '分类', '摘要', '封面', '翻译'],
|
|
8
|
-
feature: ['功能', '特性', 'feature', '支持', 'AI', 'RAG', '搜索', '评论', 'RSS', '暗色', '深色'],
|
|
9
|
-
deployment: ['部署', 'deploy', 'cloudflare', 'vercel', 'netlify', 'build', '构建', 'CI', 'CD'],
|
|
10
|
-
troubleshooting: ['报错', '错误', 'error', 'bug', '问题', '不工作', '失败', 'fail', '修复', 'fix'],
|
|
11
|
-
general: [],
|
|
12
|
-
};
|
|
13
|
-
/**
|
|
14
|
-
* Classifies the user query into an intent category.
|
|
15
|
-
* Used to adjust search relevance scoring.
|
|
16
|
-
*/
|
|
17
|
-
export function classifyIntent(query) {
|
|
18
|
-
const q = query.toLowerCase();
|
|
19
|
-
const scores = {};
|
|
20
|
-
for (const [intent, keywords] of Object.entries(INTENT_KEYWORDS)) {
|
|
21
|
-
if (intent === 'general')
|
|
22
|
-
continue;
|
|
23
|
-
const score = keywords.reduce((acc, kw) => acc + (q.includes(kw.toLowerCase()) ? 1 : 0), 0);
|
|
24
|
-
if (score > 0)
|
|
25
|
-
scores[intent] = score;
|
|
26
|
-
}
|
|
27
|
-
const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
|
|
28
|
-
return sorted[0]?.[0] || 'general';
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* Re-ranks articles by intent relevance.
|
|
32
|
-
* Boosts articles whose title/categories/keyPoints match the detected intent.
|
|
33
|
-
*/
|
|
34
|
-
function countKeywordHits(text, keywords) {
|
|
35
|
-
if (!text)
|
|
36
|
-
return 0;
|
|
37
|
-
const lower = text.toLowerCase();
|
|
38
|
-
return keywords.reduce((hits, kw) => hits + (lower.includes(kw.toLowerCase()) ? 1 : 0), 0);
|
|
39
|
-
}
|
|
40
|
-
function isRecent(dateTime) {
|
|
41
|
-
if (!dateTime || !Number.isFinite(dateTime))
|
|
42
|
-
return false;
|
|
43
|
-
return Date.now() - dateTime <= 365 * 24 * 60 * 60 * 1000;
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* Re-ranks articles by intent relevance with weighted multi-dimension scoring.
|
|
47
|
-
* Scoring: title(+3) / categories(+2) / summary(+2) / keyPoints(+1) / recency(+1)
|
|
48
|
-
*/
|
|
49
|
-
export function rankArticlesByIntent(query, articles) {
|
|
50
|
-
const intent = classifyIntent(query);
|
|
51
|
-
if (intent === 'general' || articles.length <= 1)
|
|
52
|
-
return articles;
|
|
53
|
-
const keywords = INTENT_KEYWORDS[intent];
|
|
54
|
-
if (!keywords.length)
|
|
55
|
-
return articles;
|
|
56
|
-
const scored = articles.map((article, index) => {
|
|
57
|
-
const titleHit = countKeywordHits(article.title, keywords) > 0 ? 3 : 0;
|
|
58
|
-
const categoryHit = (article.categories ?? []).some(c => countKeywordHits(c, keywords) > 0) ? 2 : 0;
|
|
59
|
-
const summaryHit = countKeywordHits(article.summary, keywords) > 0 ? 2 : 0;
|
|
60
|
-
const keyPointHit = article.keyPoints.some(kp => countKeywordHits(kp, keywords) > 0) ? 1 : 0;
|
|
61
|
-
const recentHit = isRecent(article.dateTime) ? 1 : 0;
|
|
62
|
-
return { article, index, score: titleHit + categoryHit + summaryHit + keyPointHit + recentHit };
|
|
63
|
-
});
|
|
64
|
-
const maxScore = Math.max(...scored.map(s => s.score), 0);
|
|
65
|
-
if (maxScore === 0)
|
|
66
|
-
return articles;
|
|
67
|
-
scored.sort((a, b) => b.score - a.score || a.index - b.index);
|
|
68
|
-
return scored.map(s => s.article);
|
|
69
|
-
}
|
|
70
|
-
// ── Follow-up Detection ──────────────────────────────────────
|
|
71
|
-
/**
|
|
72
|
-
* Determines if the latest message is likely a follow-up to the previous context.
|
|
73
|
-
* Uses heuristics: message length, punctuation, word count.
|
|
74
|
-
*/
|
|
75
|
-
export function isLikelyFollowUp(message) {
|
|
76
|
-
const text = message.trim();
|
|
77
|
-
if (!text || text.length > MAX_FOLLOW_UP_LENGTH)
|
|
78
|
-
return false;
|
|
79
|
-
const hasTerminalPunctuation = /[??!!。.…]$/.test(text);
|
|
80
|
-
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
81
|
-
if (text.length <= 16)
|
|
82
|
-
return true;
|
|
83
|
-
if (!/\s/.test(text) && text.length <= 24)
|
|
84
|
-
return true;
|
|
85
|
-
return hasTerminalPunctuation && wordCount <= 6 && text.length <= 36;
|
|
86
|
-
}
|
|
87
|
-
/**
|
|
88
|
-
* Checks whether the current query contains significant new tokens
|
|
89
|
-
* that aren't present in the cached query.
|
|
90
|
-
*/
|
|
91
|
-
export function hasNewSignificantTokens(currentQuery, cachedQuery) {
|
|
92
|
-
const currentTokens = new Set(tokenize(currentQuery));
|
|
93
|
-
const cachedTokens = new Set(tokenize(cachedQuery));
|
|
94
|
-
const newTokens = [...currentTokens].filter(t => !cachedTokens.has(t) && t.length >= 2);
|
|
95
|
-
return newTokens.length > 0;
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Checks whether the current query overlaps significantly with the cached query.
|
|
99
|
-
*/
|
|
100
|
-
export function hasQueryOverlap(currentQuery, cachedQuery) {
|
|
101
|
-
const currentTokens = tokenize(currentQuery);
|
|
102
|
-
const cachedNorm = normalizeText(cachedQuery);
|
|
103
|
-
if (!currentTokens.length || !cachedNorm)
|
|
104
|
-
return false;
|
|
105
|
-
return currentTokens.some(t => cachedNorm.includes(t));
|
|
106
|
-
}
|
|
107
|
-
/**
|
|
108
|
-
* Determines whether to reuse the cached search context for this request.
|
|
109
|
-
*/
|
|
110
|
-
export function shouldReuseSearchContext(params) {
|
|
111
|
-
const { latestText, cachedContext, userTurnCount, now } = params;
|
|
112
|
-
if (!cachedContext)
|
|
113
|
-
return false;
|
|
114
|
-
if (userTurnCount <= 1)
|
|
115
|
-
return false;
|
|
116
|
-
if (now - cachedContext.updatedAt > SESSION_CACHE_TTL_MS)
|
|
117
|
-
return false;
|
|
118
|
-
if (!isLikelyFollowUp(latestText))
|
|
119
|
-
return false;
|
|
120
|
-
if (!hasQueryOverlap(latestText, cachedContext.query))
|
|
121
|
-
return false;
|
|
122
|
-
if (hasNewSignificantTokens(latestText, cachedContext.query))
|
|
123
|
-
return false;
|
|
124
|
-
return true;
|
|
125
|
-
}
|
|
126
|
-
/**
|
|
127
|
-
* Builds a normalized local search query from the latest message.
|
|
128
|
-
*/
|
|
129
|
-
export function buildLocalSearchQuery(latestText) {
|
|
130
|
-
return tokenize(latestText).join(' ');
|
|
131
|
-
}
|