@astro-minimax/ai 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache/global-cache.js +145 -0
- package/dist/cache/index.js +96 -0
- package/dist/cache/kv-adapter.js +99 -0
- package/dist/cache/memory-adapter.js +97 -0
- package/dist/cache/response-cache.js +87 -0
- package/dist/cache/types.js +8 -0
- package/dist/data/metadata-loader.js +48 -0
- package/dist/data/types.js +0 -0
- package/dist/fact-registry/fact-matcher.js +128 -0
- package/dist/fact-registry/prompt-injector.js +54 -0
- package/dist/fact-registry/registry.js +41 -0
- package/dist/fact-registry/types.js +0 -0
- package/dist/intelligence/citation-appender.js +63 -0
- package/dist/intelligence/citation-guard.js +108 -0
- package/dist/intelligence/evidence-analysis.js +79 -0
- package/dist/intelligence/intent-detect.js +93 -0
- package/dist/intelligence/keyword-extract.js +89 -0
- package/dist/intelligence/response-templates.js +117 -0
- package/dist/intelligence/types.js +0 -0
- package/dist/middleware/rate-limiter.js +110 -0
- package/dist/prompt/dynamic-layer.js +64 -0
- package/dist/prompt/prompt-builder.js +15 -0
- package/dist/prompt/semi-static-layer.js +28 -0
- package/dist/prompt/static-layer.js +153 -0
- package/dist/prompt/types.js +0 -0
- package/dist/provider-manager/base.js +53 -0
- package/dist/provider-manager/config.js +135 -0
- package/dist/provider-manager/index.js +19 -0
- package/dist/provider-manager/manager.js +122 -0
- package/dist/provider-manager/mock.js +77 -0
- package/dist/provider-manager/openai.js +106 -0
- package/dist/provider-manager/types.js +0 -0
- package/dist/provider-manager/workers.js +76 -0
- package/dist/providers/mock.js +227 -0
- package/dist/search/idf.js +24 -0
- package/dist/search/search-api.js +94 -0
- package/dist/search/search-index.js +32 -0
- package/dist/search/search-utils.js +81 -0
- package/dist/search/session-cache.js +96 -0
- package/dist/search/types.js +0 -0
- package/dist/search/vector-reranker.js +103 -0
- package/dist/server/chat-handler.js +603 -0
- package/dist/server/errors.js +46 -0
- package/dist/server/metadata-init.js +49 -0
- package/dist/server/notify.js +70 -0
- package/dist/server/stream-helpers.js +202 -0
- package/dist/server/types.js +16 -0
- package/dist/stream/mock-stream.js +26 -0
- package/dist/stream/response.js +21 -0
- package/dist/utils/i18n.js +154 -0
- package/package.json +3 -3
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
const CATEGORY_LABELS = {
|
|
2
|
+
zh: {
|
|
3
|
+
author: "\u5173\u4E8E\u4F5C\u8005",
|
|
4
|
+
blog: "\u535A\u5BA2\u6570\u636E",
|
|
5
|
+
content: "\u5185\u5BB9\u4E8B\u5B9E",
|
|
6
|
+
project: "\u9879\u76EE\u4FE1\u606F",
|
|
7
|
+
tech: "\u6280\u672F\u76F8\u5173"
|
|
8
|
+
},
|
|
9
|
+
en: {
|
|
10
|
+
author: "About the Author",
|
|
11
|
+
blog: "Blog Statistics",
|
|
12
|
+
content: "Content Facts",
|
|
13
|
+
project: "Project Info",
|
|
14
|
+
tech: "Tech Related"
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
const SECTION_TEXT = {
|
|
18
|
+
zh: {
|
|
19
|
+
title: "\u5DF2\u9A8C\u8BC1\u4E8B\u5B9E\uFF08\u57FA\u4E8E\u535A\u5BA2\u771F\u5B9E\u6570\u636E\uFF09",
|
|
20
|
+
instruction: "\u4EE5\u4E0A\u4E8B\u5B9E\u6765\u81EA\u535A\u5BA2\u7684\u771F\u5B9E\u6570\u636E\u3002\u56DE\u7B54\u65F6\u4F18\u5148\u4F7F\u7528\u8FD9\u4E9B\u5DF2\u9A8C\u8BC1\u7684\u4E8B\u5B9E\uFF0C\u4E0D\u8981\u7F16\u9020\u4E0E\u4E4B\u77DB\u76FE\u7684\u4FE1\u606F\u3002\u5982\u679C\u67D0\u4E2A\u95EE\u9898\u7684\u7B54\u6848\u4E0D\u5728\u5DF2\u9A8C\u8BC1\u4E8B\u5B9E\u4E2D\uFF0C\u8BF7\u5982\u5B9E\u8BF4\u660E\u3002"
|
|
21
|
+
},
|
|
22
|
+
en: {
|
|
23
|
+
title: "Verified Facts (based on real blog data)",
|
|
24
|
+
instruction: "The above facts are derived from real blog data. Prioritize these verified facts when answering. Do not fabricate information that contradicts them. If the answer is not among verified facts, state that honestly."
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
function buildFactSection(facts, lang = "zh") {
|
|
28
|
+
if (!facts.length) return "";
|
|
29
|
+
const l = lang === "zh" ? "zh" : "en";
|
|
30
|
+
const labels = CATEGORY_LABELS[l];
|
|
31
|
+
const text = SECTION_TEXT[l];
|
|
32
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
33
|
+
for (const fact of facts) {
|
|
34
|
+
const group = grouped.get(fact.category) ?? [];
|
|
35
|
+
group.push(fact);
|
|
36
|
+
grouped.set(fact.category, group);
|
|
37
|
+
}
|
|
38
|
+
const lines = [];
|
|
39
|
+
lines.push(`## ${text.title}`);
|
|
40
|
+
for (const [category, categoryFacts] of grouped) {
|
|
41
|
+
const label = labels[category] ?? category;
|
|
42
|
+
lines.push("");
|
|
43
|
+
lines.push(`### ${label}`);
|
|
44
|
+
for (const fact of categoryFacts) {
|
|
45
|
+
lines.push(`- ${fact.statement}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
lines.push("");
|
|
49
|
+
lines.push(`> ${text.instruction}`);
|
|
50
|
+
return lines.join("\n");
|
|
51
|
+
}
|
|
52
|
+
export {
|
|
53
|
+
buildFactSection
|
|
54
|
+
};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
let cachedRegistry = null;
|
|
2
|
+
function loadFactRegistry(data) {
|
|
3
|
+
cachedRegistry = data;
|
|
4
|
+
}
|
|
5
|
+
function clearFactRegistry() {
|
|
6
|
+
cachedRegistry = null;
|
|
7
|
+
}
|
|
8
|
+
function getFactRegistry() {
|
|
9
|
+
return cachedRegistry;
|
|
10
|
+
}
|
|
11
|
+
function queryFacts(options = {}) {
|
|
12
|
+
if (!cachedRegistry?.facts.length) return [];
|
|
13
|
+
let facts = cachedRegistry.facts;
|
|
14
|
+
if (options.categories?.length) {
|
|
15
|
+
const cats = new Set(options.categories);
|
|
16
|
+
facts = facts.filter((f) => cats.has(f.category));
|
|
17
|
+
}
|
|
18
|
+
if (options.lang) {
|
|
19
|
+
facts = facts.filter((f) => f.lang === options.lang || f.lang === "all");
|
|
20
|
+
}
|
|
21
|
+
if (options.minConfidence !== void 0) {
|
|
22
|
+
facts = facts.filter((f) => f.confidence >= options.minConfidence);
|
|
23
|
+
}
|
|
24
|
+
if (options.tags?.length) {
|
|
25
|
+
const tagSet = new Set(options.tags.map((t) => t.toLowerCase()));
|
|
26
|
+
facts = facts.filter(
|
|
27
|
+
(f) => f.tags.some((t) => tagSet.has(t.toLowerCase()))
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
facts = [...facts].sort((a, b) => b.confidence - a.confidence);
|
|
31
|
+
if (options.limit && options.limit > 0) {
|
|
32
|
+
facts = facts.slice(0, options.limit);
|
|
33
|
+
}
|
|
34
|
+
return facts;
|
|
35
|
+
}
|
|
36
|
+
export {
|
|
37
|
+
clearFactRegistry,
|
|
38
|
+
getFactRegistry,
|
|
39
|
+
loadFactRegistry,
|
|
40
|
+
queryFacts
|
|
41
|
+
};
|
|
File without changes
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
function hasExistingCitations(text, validUrls) {
|
|
2
|
+
const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
3
|
+
const matches = [...text.matchAll(linkPattern)];
|
|
4
|
+
return matches.some((m) => validUrls.has(m[2]));
|
|
5
|
+
}
|
|
6
|
+
function selectCitations(articles, projects, maxCitations, minScore) {
|
|
7
|
+
const candidates = [
|
|
8
|
+
...articles.filter((a) => (a.score ?? 0) >= minScore).map((a) => ({ title: a.title, url: a.url, score: a.score ?? 0 })),
|
|
9
|
+
...projects.filter((p) => (p.score ?? 0) >= minScore).map((p) => ({ title: p.name, url: p.url, score: p.score ?? 0 }))
|
|
10
|
+
];
|
|
11
|
+
return candidates.sort((a, b) => b.score - a.score).slice(0, maxCitations);
|
|
12
|
+
}
|
|
13
|
+
function formatCitationBlock(citations, lang) {
|
|
14
|
+
if (citations.length === 0) return "";
|
|
15
|
+
const heading = lang === "zh" ? "\u5EF6\u4F38\u9605\u8BFB" : "Further Reading";
|
|
16
|
+
const lines = [
|
|
17
|
+
"",
|
|
18
|
+
`**${heading}:**`,
|
|
19
|
+
...citations.map((c) => `- [${c.title}](${c.url})`)
|
|
20
|
+
];
|
|
21
|
+
return lines.join("\n");
|
|
22
|
+
}
|
|
23
|
+
function createCitationAppenderTransform(config) {
|
|
24
|
+
const { articles, projects, lang, maxCitations = 3, minScore = 5 } = config;
|
|
25
|
+
const validUrls = /* @__PURE__ */ new Set([
|
|
26
|
+
...articles.map((a) => a.url),
|
|
27
|
+
...projects.map((p) => p.url)
|
|
28
|
+
]);
|
|
29
|
+
return (stream) => {
|
|
30
|
+
let fullText = "";
|
|
31
|
+
const transform = new TransformStream({
|
|
32
|
+
transform(chunk, controller) {
|
|
33
|
+
fullText += chunk;
|
|
34
|
+
controller.enqueue(chunk);
|
|
35
|
+
},
|
|
36
|
+
flush(controller) {
|
|
37
|
+
if (hasExistingCitations(fullText, validUrls)) {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const citations = selectCitations(articles, projects, maxCitations, minScore);
|
|
41
|
+
if (citations.length === 0) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const citationBlock = formatCitationBlock(citations, lang);
|
|
45
|
+
controller.enqueue(citationBlock);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
return stream.pipeThrough(transform);
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
function shouldAppendCitations(response, articles, projects) {
|
|
52
|
+
const validUrls = /* @__PURE__ */ new Set([
|
|
53
|
+
...articles.map((a) => a.url),
|
|
54
|
+
...projects.map((p) => p.url)
|
|
55
|
+
]);
|
|
56
|
+
return !hasExistingCitations(response, validUrls) && [...articles, ...projects].some((item) => (item.score ?? 0) >= 5);
|
|
57
|
+
}
|
|
58
|
+
export {
|
|
59
|
+
createCitationAppenderTransform,
|
|
60
|
+
formatCitationBlock,
|
|
61
|
+
selectCitations,
|
|
62
|
+
shouldAppendCitations
|
|
63
|
+
};
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import {
|
|
2
|
+
PRIVACY_REFUSAL_TEMPLATES,
|
|
3
|
+
NO_ARTICLE_TEMPLATES,
|
|
4
|
+
ARTICLE_COUNT_TEMPLATES,
|
|
5
|
+
pickTemplate,
|
|
6
|
+
pickTemplateWithVars
|
|
7
|
+
} from "./response-templates.js";
|
|
8
|
+
const PRIVACY_PATTERNS = [
|
|
9
|
+
{ regex: /(住址|地址|住在哪|address|where.*live)/iu, key: "address" },
|
|
10
|
+
{ regex: /(收入|工资|薪资|salary|income|earn)/iu, key: "income" },
|
|
11
|
+
{ regex: /(家人|妻子|丈夫|孩子|父母|family|wife|husband|children|parent)/iu, key: "family" },
|
|
12
|
+
{ regex: /(电话|手机号|phone|mobile)/iu, key: "phone" },
|
|
13
|
+
{ regex: /(身份证|id\s*card|passport)/iu, key: "id" },
|
|
14
|
+
{ regex: /(年龄|多大了|几岁|how old|age)/iu, key: "age" }
|
|
15
|
+
];
|
|
16
|
+
function resolveAnswerMode(query) {
|
|
17
|
+
const q = query.toLowerCase();
|
|
18
|
+
if (/几次|多少|几篇|数量|count|how many/u.test(q)) return "count";
|
|
19
|
+
if (/哪些|哪几个|列表|列举|list|what are/u.test(q)) return "list";
|
|
20
|
+
if (/怎么看|怎么想|看法|观点|opinion|think about/u.test(q)) return "opinion";
|
|
21
|
+
if (/推荐|建议|suggest|recommend/u.test(q)) return "recommendation";
|
|
22
|
+
if (/是什么|什么是|介绍|解释|what is|explain/u.test(q)) return "fact";
|
|
23
|
+
if (/有没有|是否|是不是|真的吗|does|is there/u.test(q)) return "fact";
|
|
24
|
+
return "general";
|
|
25
|
+
}
|
|
26
|
+
function checkPrivacyRefusal(query, lang) {
|
|
27
|
+
for (const pattern of PRIVACY_PATTERNS) {
|
|
28
|
+
if (pattern.regex.test(query)) {
|
|
29
|
+
const templates = PRIVACY_REFUSAL_TEMPLATES[pattern.key];
|
|
30
|
+
const text = templates ? pickTemplate(templates, lang) : "";
|
|
31
|
+
return {
|
|
32
|
+
text,
|
|
33
|
+
actions: ["preflight_reject"]
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
function getCitationGuardPreflight(params) {
|
|
40
|
+
const { userQuery, articles, projects, lang = "zh" } = params;
|
|
41
|
+
const q = userQuery.toLowerCase();
|
|
42
|
+
const privacyRefusal = checkPrivacyRefusal(userQuery, lang);
|
|
43
|
+
if (privacyRefusal) return privacyRefusal;
|
|
44
|
+
if (/有几篇|有多少篇|文章数量|总共.*文章|how many.*article/u.test(q)) {
|
|
45
|
+
const total = articles.length;
|
|
46
|
+
if (total > 0) {
|
|
47
|
+
const text = pickTemplateWithVars(ARTICLE_COUNT_TEMPLATES, lang, { count: total });
|
|
48
|
+
return { text, actions: ["preflight_reject"] };
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (/有没有|是否有|有.*文章|写过.*吗|is there|any.*article/u.test(q)) {
|
|
52
|
+
if (articles.length === 0 && projects.length === 0) {
|
|
53
|
+
const text = pickTemplate(NO_ARTICLE_TEMPLATES, lang);
|
|
54
|
+
return { text, actions: ["preflight_reject"] };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
function createCitationGuardTransform(params) {
|
|
60
|
+
const { articles, projects, onApplied } = params;
|
|
61
|
+
const validUrls = /* @__PURE__ */ new Set([
|
|
62
|
+
...articles.map((a) => a.url),
|
|
63
|
+
...projects.map((p) => p.url)
|
|
64
|
+
]);
|
|
65
|
+
return (stream) => {
|
|
66
|
+
const actions = [];
|
|
67
|
+
let buffer = "";
|
|
68
|
+
const transform = new TransformStream({
|
|
69
|
+
transform(chunk, controller) {
|
|
70
|
+
buffer += chunk;
|
|
71
|
+
const linkPattern = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
72
|
+
let match;
|
|
73
|
+
let lastIndex = 0;
|
|
74
|
+
let output = "";
|
|
75
|
+
while ((match = linkPattern.exec(buffer)) !== null) {
|
|
76
|
+
const [fullMatch, text, url] = match;
|
|
77
|
+
output += buffer.slice(lastIndex, match.index);
|
|
78
|
+
if (url.startsWith("http") && !validUrls.has(url)) {
|
|
79
|
+
output += text;
|
|
80
|
+
actions.push("stream_rewrite");
|
|
81
|
+
} else {
|
|
82
|
+
output += fullMatch;
|
|
83
|
+
}
|
|
84
|
+
lastIndex = match.index + fullMatch.length;
|
|
85
|
+
}
|
|
86
|
+
buffer = buffer.slice(lastIndex);
|
|
87
|
+
if (output) {
|
|
88
|
+
controller.enqueue(output);
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
flush(controller) {
|
|
92
|
+
if (buffer) {
|
|
93
|
+
controller.enqueue(buffer);
|
|
94
|
+
buffer = "";
|
|
95
|
+
}
|
|
96
|
+
if (actions.length > 0) {
|
|
97
|
+
onApplied?.({ actions });
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
return stream.pipeThrough(transform);
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
export {
|
|
105
|
+
createCitationGuardTransform,
|
|
106
|
+
getCitationGuardPreflight,
|
|
107
|
+
resolveAnswerMode
|
|
108
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
const EVIDENCE_ANALYSIS_TIMEOUT_MS = 8e3;
|
|
3
|
+
const EVIDENCE_ANALYSIS_MAX_TOKENS = 360;
|
|
4
|
+
function shouldSkipAnalysis(latestText, articleCount, complexity) {
|
|
5
|
+
if (articleCount < 2) return true;
|
|
6
|
+
if (complexity === "simple") return true;
|
|
7
|
+
if (latestText.length < 15) return true;
|
|
8
|
+
return false;
|
|
9
|
+
}
|
|
10
|
+
async function analyzeRetrievedEvidence(params) {
|
|
11
|
+
const { userQuery, articles, projects, provider, model, maxOutputTokens = EVIDENCE_ANALYSIS_MAX_TOKENS, abortSignal } = params;
|
|
12
|
+
const evidenceSummary = buildEvidenceSummary(articles, projects);
|
|
13
|
+
const prompt = `\u7528\u6237\u95EE\u9898\uFF1A${userQuery}
|
|
14
|
+
|
|
15
|
+
\u68C0\u7D22\u5230\u7684\u76F8\u5173\u5185\u5BB9\uFF1A
|
|
16
|
+
${evidenceSummary}
|
|
17
|
+
|
|
18
|
+
\u8BF7\u5206\u6790\u8FD9\u4E9B\u5185\u5BB9\uFF0C\u63D0\u53D6\u4E0E\u7528\u6237\u95EE\u9898\u6700\u76F8\u5173\u76842-3\u4E2A\u5173\u952E\u4FE1\u606F\u70B9\u3002\u683C\u5F0F\uFF1A
|
|
19
|
+
<evidence>
|
|
20
|
+
[\u5173\u952E\u4FE1\u606F\u70B91]
|
|
21
|
+
[\u5173\u952E\u4FE1\u606F\u70B92]
|
|
22
|
+
</evidence>
|
|
23
|
+
|
|
24
|
+
\u53EA\u8FD4\u56DEevidence\u6807\u7B7E\u5185\u7684\u5185\u5BB9\uFF0C\u7B80\u6D01\u51C6\u786E\u3002`;
|
|
25
|
+
try {
|
|
26
|
+
const result = await generateText({
|
|
27
|
+
model: provider.chatModel(model),
|
|
28
|
+
prompt,
|
|
29
|
+
maxOutputTokens,
|
|
30
|
+
temperature: 0.1,
|
|
31
|
+
abortSignal
|
|
32
|
+
});
|
|
33
|
+
const rawText = result.text?.trim() ?? "";
|
|
34
|
+
const match = rawText.match(/<evidence>([\s\S]*?)<\/evidence>/);
|
|
35
|
+
const analysis = match?.[1]?.trim();
|
|
36
|
+
const u = result.usage;
|
|
37
|
+
return {
|
|
38
|
+
analysis,
|
|
39
|
+
parseStatus: analysis ? "ok" : "no_match",
|
|
40
|
+
rawText,
|
|
41
|
+
usage: u ? {
|
|
42
|
+
inputTokens: u.inputTokens ?? 0,
|
|
43
|
+
outputTokens: u.outputTokens ?? 0,
|
|
44
|
+
totalTokens: (u.inputTokens ?? 0) + (u.outputTokens ?? 0)
|
|
45
|
+
} : void 0
|
|
46
|
+
};
|
|
47
|
+
} catch (error) {
|
|
48
|
+
return {
|
|
49
|
+
parseStatus: "error",
|
|
50
|
+
error: error instanceof Error ? error.message : String(error)
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function buildEvidenceSection(analysis) {
|
|
55
|
+
if (!analysis.trim()) return "";
|
|
56
|
+
return `
|
|
57
|
+
## \u5173\u952E\u8BC1\u636E\u5206\u6790
|
|
58
|
+
${analysis}
|
|
59
|
+
`;
|
|
60
|
+
}
|
|
61
|
+
function buildEvidenceSummary(articles, projects) {
|
|
62
|
+
const lines = [];
|
|
63
|
+
for (const article of articles.slice(0, 6)) {
|
|
64
|
+
lines.push(`\u6587\u7AE0: ${article.title}`);
|
|
65
|
+
if (article.summary) lines.push(` \u6458\u8981: ${article.summary}`);
|
|
66
|
+
if (article.keyPoints.length) lines.push(` \u8981\u70B9: ${article.keyPoints.slice(0, 3).join(", ")}`);
|
|
67
|
+
}
|
|
68
|
+
for (const project of projects.slice(0, 3)) {
|
|
69
|
+
lines.push(`\u9879\u76EE: ${project.name} - ${project.description.slice(0, 100)}`);
|
|
70
|
+
}
|
|
71
|
+
return lines.join("\n");
|
|
72
|
+
}
|
|
73
|
+
export {
|
|
74
|
+
EVIDENCE_ANALYSIS_MAX_TOKENS,
|
|
75
|
+
EVIDENCE_ANALYSIS_TIMEOUT_MS,
|
|
76
|
+
analyzeRetrievedEvidence,
|
|
77
|
+
buildEvidenceSection,
|
|
78
|
+
shouldSkipAnalysis
|
|
79
|
+
};
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { tokenize, normalizeText } from "../search/search-utils.js";
|
|
2
|
+
import { SESSION_CACHE_TTL_MS } from "../search/session-cache.js";
|
|
3
|
+
const MAX_FOLLOW_UP_LENGTH = 48;
|
|
4
|
+
const INTENT_KEYWORDS = {
|
|
5
|
+
setup: ["\u642D\u5EFA", "\u521B\u5EFA", "\u5B89\u88C5", "install", "setup", "create", "init", "scaffold", "\u65B0\u5EFA", "\u5F00\u59CB"],
|
|
6
|
+
config: ["\u914D\u7F6E", "\u8BBE\u7F6E", "config", "settings", "\u73AF\u5883\u53D8\u91CF", ".env", "wrangler", "tsconfig", "\u4E3B\u9898\u8272", "\u989C\u8272"],
|
|
7
|
+
content: ["\u6587\u7AE0", "\u535A\u5BA2", "\u5199\u4F5C", "markdown", "mdx", "\u6807\u7B7E", "\u5206\u7C7B", "\u6458\u8981", "\u5C01\u9762", "\u7FFB\u8BD1"],
|
|
8
|
+
feature: ["\u529F\u80FD", "\u7279\u6027", "feature", "\u652F\u6301", "AI", "RAG", "\u641C\u7D22", "\u8BC4\u8BBA", "RSS", "\u6697\u8272", "\u6DF1\u8272"],
|
|
9
|
+
deployment: ["\u90E8\u7F72", "deploy", "cloudflare", "vercel", "netlify", "build", "\u6784\u5EFA", "CI", "CD"],
|
|
10
|
+
troubleshooting: ["\u62A5\u9519", "\u9519\u8BEF", "error", "bug", "\u95EE\u9898", "\u4E0D\u5DE5\u4F5C", "\u5931\u8D25", "fail", "\u4FEE\u590D", "fix"],
|
|
11
|
+
general: []
|
|
12
|
+
};
|
|
13
|
+
function classifyIntent(query) {
|
|
14
|
+
const q = query.toLowerCase();
|
|
15
|
+
const scores = {};
|
|
16
|
+
for (const [intent, keywords] of Object.entries(INTENT_KEYWORDS)) {
|
|
17
|
+
if (intent === "general") continue;
|
|
18
|
+
const score = keywords.reduce((acc, kw) => acc + (q.includes(kw.toLowerCase()) ? 1 : 0), 0);
|
|
19
|
+
if (score > 0) scores[intent] = score;
|
|
20
|
+
}
|
|
21
|
+
const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
|
|
22
|
+
return sorted[0]?.[0] || "general";
|
|
23
|
+
}
|
|
24
|
+
function countKeywordHits(text, keywords) {
|
|
25
|
+
if (!text) return 0;
|
|
26
|
+
const lower = text.toLowerCase();
|
|
27
|
+
return keywords.reduce((hits, kw) => hits + (lower.includes(kw.toLowerCase()) ? 1 : 0), 0);
|
|
28
|
+
}
|
|
29
|
+
function isRecent(dateTime) {
|
|
30
|
+
if (!dateTime || !Number.isFinite(dateTime)) return false;
|
|
31
|
+
return Date.now() - dateTime <= 365 * 24 * 60 * 60 * 1e3;
|
|
32
|
+
}
|
|
33
|
+
function rankArticlesByIntent(query, articles) {
|
|
34
|
+
const intent = classifyIntent(query);
|
|
35
|
+
if (intent === "general" || articles.length <= 1) return articles;
|
|
36
|
+
const keywords = INTENT_KEYWORDS[intent];
|
|
37
|
+
if (!keywords.length) return articles;
|
|
38
|
+
const scored = articles.map((article, index) => {
|
|
39
|
+
const titleHit = countKeywordHits(article.title, keywords) > 0 ? 3 : 0;
|
|
40
|
+
const categoryHit = (article.categories ?? []).some((c) => countKeywordHits(c, keywords) > 0) ? 2 : 0;
|
|
41
|
+
const summaryHit = countKeywordHits(article.summary, keywords) > 0 ? 2 : 0;
|
|
42
|
+
const keyPointHit = article.keyPoints.some((kp) => countKeywordHits(kp, keywords) > 0) ? 1 : 0;
|
|
43
|
+
const recentHit = isRecent(article.dateTime) ? 1 : 0;
|
|
44
|
+
return { article, index, score: titleHit + categoryHit + summaryHit + keyPointHit + recentHit };
|
|
45
|
+
});
|
|
46
|
+
const maxScore = Math.max(...scored.map((s) => s.score), 0);
|
|
47
|
+
if (maxScore === 0) return articles;
|
|
48
|
+
scored.sort((a, b) => b.score - a.score || a.index - b.index);
|
|
49
|
+
return scored.map((s) => s.article);
|
|
50
|
+
}
|
|
51
|
+
function isLikelyFollowUp(message) {
|
|
52
|
+
const text = message.trim();
|
|
53
|
+
if (!text || text.length > MAX_FOLLOW_UP_LENGTH) return false;
|
|
54
|
+
const hasTerminalPunctuation = /[??!!。.…]$/.test(text);
|
|
55
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
56
|
+
if (text.length <= 16) return true;
|
|
57
|
+
if (!/\s/.test(text) && text.length <= 24) return true;
|
|
58
|
+
return hasTerminalPunctuation && wordCount <= 6 && text.length <= 36;
|
|
59
|
+
}
|
|
60
|
+
function hasNewSignificantTokens(currentQuery, cachedQuery) {
|
|
61
|
+
const currentTokens = new Set(tokenize(currentQuery));
|
|
62
|
+
const cachedTokens = new Set(tokenize(cachedQuery));
|
|
63
|
+
const newTokens = [...currentTokens].filter((t) => !cachedTokens.has(t) && t.length >= 2);
|
|
64
|
+
return newTokens.length > 0;
|
|
65
|
+
}
|
|
66
|
+
function hasQueryOverlap(currentQuery, cachedQuery) {
|
|
67
|
+
const currentTokens = tokenize(currentQuery);
|
|
68
|
+
const cachedNorm = normalizeText(cachedQuery);
|
|
69
|
+
if (!currentTokens.length || !cachedNorm) return false;
|
|
70
|
+
return currentTokens.some((t) => cachedNorm.includes(t));
|
|
71
|
+
}
|
|
72
|
+
function shouldReuseSearchContext(params) {
|
|
73
|
+
const { latestText, cachedContext, userTurnCount, now } = params;
|
|
74
|
+
if (!cachedContext) return false;
|
|
75
|
+
if (userTurnCount <= 1) return false;
|
|
76
|
+
if (now - cachedContext.updatedAt > SESSION_CACHE_TTL_MS) return false;
|
|
77
|
+
if (!isLikelyFollowUp(latestText)) return false;
|
|
78
|
+
if (!hasQueryOverlap(latestText, cachedContext.query)) return false;
|
|
79
|
+
if (hasNewSignificantTokens(latestText, cachedContext.query)) return false;
|
|
80
|
+
return true;
|
|
81
|
+
}
|
|
82
|
+
function buildLocalSearchQuery(latestText) {
|
|
83
|
+
return tokenize(latestText).join(" ");
|
|
84
|
+
}
|
|
85
|
+
export {
|
|
86
|
+
buildLocalSearchQuery,
|
|
87
|
+
classifyIntent,
|
|
88
|
+
hasNewSignificantTokens,
|
|
89
|
+
hasQueryOverlap,
|
|
90
|
+
isLikelyFollowUp,
|
|
91
|
+
rankArticlesByIntent,
|
|
92
|
+
shouldReuseSearchContext
|
|
93
|
+
};
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
import { tokenize } from "../search/search-utils.js";
|
|
3
|
+
const KEYWORD_EXTRACTION_TIMEOUT_MS = 5e3;
|
|
4
|
+
function shouldRunKeywordExtraction(params) {
|
|
5
|
+
const { messageCount, localQuery, latestText } = params;
|
|
6
|
+
if (messageCount < 3) return false;
|
|
7
|
+
if (latestText.length < 10) return false;
|
|
8
|
+
const tokens = tokenize(localQuery || latestText);
|
|
9
|
+
if (tokens.length >= 3) return false;
|
|
10
|
+
return true;
|
|
11
|
+
}
|
|
12
|
+
function classifyComplexity(text) {
|
|
13
|
+
const tokens = tokenize(text);
|
|
14
|
+
if (tokens.length <= 1 || text.length <= 10) return "simple";
|
|
15
|
+
if (tokens.length >= 5 || text.length > 80) return "complex";
|
|
16
|
+
return "moderate";
|
|
17
|
+
}
|
|
18
|
+
async function extractSearchKeywords(params) {
|
|
19
|
+
const { messages, provider, model, abortSignal } = params;
|
|
20
|
+
const latestMessage = messages[messages.length - 1];
|
|
21
|
+
const latestText = getMessageText(latestMessage);
|
|
22
|
+
const complexity = classifyComplexity(latestText);
|
|
23
|
+
const conversationText = messages.slice(-6).map((m) => `${m.role}: ${getMessageText(m)}`).join("\n");
|
|
24
|
+
const prompt = `\u4F60\u662F\u4E00\u4E2A\u641C\u7D22\u5173\u952E\u8BCD\u63D0\u53D6\u52A9\u624B\u3002\u5206\u6790\u4EE5\u4E0B\u5BF9\u8BDD\uFF0C\u63D0\u53D6\u6700\u4F73\u641C\u7D22\u5173\u952E\u8BCD\u3002
|
|
25
|
+
|
|
26
|
+
\u5BF9\u8BDD:
|
|
27
|
+
${conversationText}
|
|
28
|
+
|
|
29
|
+
\u8BF7\u63D0\u53D6\uFF1A
|
|
30
|
+
1. \u4E3B\u67E5\u8BE2\u8BCD\uFF08\u6700\u91CD\u8981\u76841-2\u4E2A\u5173\u952E\u8BCD\uFF0C\u7528\u7A7A\u683C\u5206\u9694\uFF09
|
|
31
|
+
2. \u8865\u5145\u67E5\u8BE2\u8BCD\uFF08\u53EF\u9009\u7684\u8F85\u52A9\u5173\u952E\u8BCD\uFF09
|
|
32
|
+
|
|
33
|
+
\u4EC5\u8FD4\u56DEJSON\u683C\u5F0F\uFF0C\u4E0D\u8981\u5176\u4ED6\u5185\u5BB9\uFF1A
|
|
34
|
+
{"query": "\u4E3B\u67E5\u8BE2\u8BCD", "primaryQuery": "\u6838\u5FC3\u8BCD"}`;
|
|
35
|
+
try {
|
|
36
|
+
const result = await generateText({
|
|
37
|
+
model: provider.chatModel(model),
|
|
38
|
+
prompt,
|
|
39
|
+
maxOutputTokens: 100,
|
|
40
|
+
temperature: 0,
|
|
41
|
+
abortSignal
|
|
42
|
+
});
|
|
43
|
+
const rawText = result.text?.trim() ?? "";
|
|
44
|
+
const jsonMatch = rawText.match(/\{[^}]+\}/);
|
|
45
|
+
if (jsonMatch) {
|
|
46
|
+
try {
|
|
47
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
48
|
+
const query = (parsed.query ?? "").trim();
|
|
49
|
+
const primaryQuery = (parsed.primaryQuery ?? query).trim();
|
|
50
|
+
if (query) {
|
|
51
|
+
const u = result.usage;
|
|
52
|
+
return {
|
|
53
|
+
query,
|
|
54
|
+
primaryQuery,
|
|
55
|
+
complexity,
|
|
56
|
+
usedFallback: false,
|
|
57
|
+
usage: u ? {
|
|
58
|
+
inputTokens: u.inputTokens ?? 0,
|
|
59
|
+
outputTokens: u.outputTokens ?? 0,
|
|
60
|
+
totalTokens: (u.inputTokens ?? 0) + (u.outputTokens ?? 0)
|
|
61
|
+
} : void 0
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
} catch {
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
return buildFallback(latestText, complexity, "json_parse_failed");
|
|
68
|
+
} catch (error) {
|
|
69
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
70
|
+
return buildFallback(latestText, complexity, message);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
function buildFallback(latestText, complexity, error) {
|
|
74
|
+
const tokens = tokenize(latestText);
|
|
75
|
+
const query = tokens.slice(0, 3).join(" ") || latestText.slice(0, 30);
|
|
76
|
+
return { query, primaryQuery: query, complexity, usedFallback: true, error };
|
|
77
|
+
}
|
|
78
|
+
function getMessageText(message) {
|
|
79
|
+
if (message.content && typeof message.content === "string") return message.content;
|
|
80
|
+
if (Array.isArray(message.parts)) {
|
|
81
|
+
return message.parts.filter((p) => p.type === "text" && typeof p.text === "string").map((p) => p.text).join("");
|
|
82
|
+
}
|
|
83
|
+
return "";
|
|
84
|
+
}
|
|
85
|
+
export {
|
|
86
|
+
KEYWORD_EXTRACTION_TIMEOUT_MS,
|
|
87
|
+
extractSearchKeywords,
|
|
88
|
+
shouldRunKeywordExtraction
|
|
89
|
+
};
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
const PRIVACY_REFUSAL_TEMPLATES = {
|
|
2
|
+
address: {
|
|
3
|
+
zh: [
|
|
4
|
+
"\u5177\u4F53\u4F4F\u5740\u662F\u79C1\u4EBA\u4FE1\u606F\uFF0C\u672A\u5728\u535A\u5BA2\u4E2D\u516C\u5F00\u3002",
|
|
5
|
+
"\u5173\u4E8E\u4F4F\u5740\u4FE1\u606F\uFF0C\u535A\u5BA2\u4E2D\u6CA1\u6709\u76F8\u5173\u5185\u5BB9\u3002",
|
|
6
|
+
"\u8FD9\u4E2A\u4FE1\u606F\u6D89\u53CA\u9690\u79C1\uFF0C\u535A\u4E3B\u6CA1\u6709\u5728\u535A\u5BA2\u4E2D\u5206\u4EAB\u3002"
|
|
7
|
+
],
|
|
8
|
+
en: [
|
|
9
|
+
"Address is private and not disclosed on the blog.",
|
|
10
|
+
"The blogger has not shared address information publicly.",
|
|
11
|
+
"This is private information that is not available on the blog."
|
|
12
|
+
]
|
|
13
|
+
},
|
|
14
|
+
income: {
|
|
15
|
+
zh: [
|
|
16
|
+
"\u6536\u5165\u4FE1\u606F\u672A\u5728\u535A\u5BA2\u4E2D\u516C\u5F00\u3002",
|
|
17
|
+
"\u5173\u4E8E\u6536\u5165\uFF0C\u535A\u5BA2\u4E2D\u6CA1\u6709\u76F8\u5173\u5185\u5BB9\u3002",
|
|
18
|
+
"\u8FD9\u4E2A\u4FE1\u606F\u5C5E\u4E8E\u9690\u79C1\u8303\u7574\uFF0C\u535A\u4E3B\u6CA1\u6709\u516C\u5F00\u3002"
|
|
19
|
+
],
|
|
20
|
+
en: [
|
|
21
|
+
"Income information is not disclosed on the blog.",
|
|
22
|
+
"The blogger has not shared income details publicly.",
|
|
23
|
+
"This is private financial information not available on the blog."
|
|
24
|
+
]
|
|
25
|
+
},
|
|
26
|
+
family: {
|
|
27
|
+
zh: [
|
|
28
|
+
"\u5BB6\u4EBA\u4FE1\u606F\u672A\u5728\u535A\u5BA2\u4E2D\u516C\u5F00\u3002",
|
|
29
|
+
"\u5173\u4E8E\u5BB6\u4EBA\uFF0C\u535A\u5BA2\u4E2D\u6CA1\u6709\u8BE6\u7EC6\u4ECB\u7ECD\u3002",
|
|
30
|
+
"\u8FD9\u5C5E\u4E8E\u79C1\u4EBA\u751F\u6D3B\u8303\u7574\uFF0C\u535A\u4E3B\u9009\u62E9\u4E0D\u516C\u5F00\u3002"
|
|
31
|
+
],
|
|
32
|
+
en: [
|
|
33
|
+
"Family information is not disclosed on the blog.",
|
|
34
|
+
"The blogger keeps family matters private.",
|
|
35
|
+
"Details about family members are not shared publicly."
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
phone: {
|
|
39
|
+
zh: [
|
|
40
|
+
"\u8054\u7CFB\u7535\u8BDD\u672A\u5728\u535A\u5BA2\u4E2D\u516C\u5F00\u3002",
|
|
41
|
+
"\u535A\u4E3B\u7684\u8054\u7CFB\u65B9\u5F0F\u6CA1\u6709\u5728\u535A\u5BA2\u4E2D\u5206\u4EAB\u3002",
|
|
42
|
+
"\u7535\u8BDD\u53F7\u7801\u5C5E\u4E8E\u9690\u79C1\u4FE1\u606F\uFF0C\u65E0\u6CD5\u63D0\u4F9B\u3002"
|
|
43
|
+
],
|
|
44
|
+
en: [
|
|
45
|
+
"Phone number is not disclosed on the blog.",
|
|
46
|
+
"Contact details are not shared publicly on the blog.",
|
|
47
|
+
"Phone numbers are private information not available here."
|
|
48
|
+
]
|
|
49
|
+
},
|
|
50
|
+
id: {
|
|
51
|
+
zh: [
|
|
52
|
+
"\u8EAB\u4EFD\u8BC1\u4EF6\u4FE1\u606F\u672A\u5728\u535A\u5BA2\u4E2D\u516C\u5F00\u3002",
|
|
53
|
+
"\u8FD9\u5C5E\u4E8E\u654F\u611F\u4E2A\u4EBA\u4FE1\u606F\uFF0C\u535A\u4E3B\u6CA1\u6709\u516C\u5F00\u3002",
|
|
54
|
+
"\u8EAB\u4EFD\u8BC1\u4EF6\u4FE1\u606F\u53D7\u4FDD\u62A4\uFF0C\u4E0D\u5728\u535A\u5BA2\u5185\u5BB9\u4E2D\u3002"
|
|
55
|
+
],
|
|
56
|
+
en: [
|
|
57
|
+
"ID information is not disclosed on the blog.",
|
|
58
|
+
"Identity document details are private and not shared.",
|
|
59
|
+
"This is sensitive personal information not available publicly."
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
age: {
|
|
63
|
+
zh: [
|
|
64
|
+
"\u5E74\u9F84\u4FE1\u606F\u672A\u5728\u535A\u5BA2\u4E2D\u516C\u5F00\u3002",
|
|
65
|
+
"\u5173\u4E8E\u5E74\u9F84\uFF0C\u535A\u5BA2\u4E2D\u6CA1\u6709\u660E\u786E\u63D0\u53CA\u3002",
|
|
66
|
+
"\u8FD9\u4E2A\u4FE1\u606F\u535A\u4E3B\u6CA1\u6709\u5728\u535A\u5BA2\u4E2D\u5206\u4EAB\u3002"
|
|
67
|
+
],
|
|
68
|
+
en: [
|
|
69
|
+
"Age information is not disclosed on the blog.",
|
|
70
|
+
"The blogger has not shared age details publicly.",
|
|
71
|
+
"Age is not mentioned in the blog content."
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
const NO_ARTICLE_TEMPLATES = {
|
|
76
|
+
zh: [
|
|
77
|
+
"\u6839\u636E\u535A\u5BA2\u5185\u5BB9\u641C\u7D22\uFF0C\u76EE\u524D\u6CA1\u6709\u627E\u5230\u4E0E\u8FD9\u4E2A\u4E3B\u9898\u76F4\u63A5\u76F8\u5173\u7684\u6587\u7AE0\u3002\u4F60\u53EF\u4EE5\u5C1D\u8BD5\u7528\u5176\u4ED6\u5173\u952E\u8BCD\u641C\u7D22\uFF0C\u6216\u8005\u95EE\u6211\u5176\u4ED6\u95EE\u9898\u3002",
|
|
78
|
+
"\u6211\u5728\u535A\u5BA2\u4E2D\u6CA1\u6709\u627E\u5230\u76F8\u5173\u7684\u5185\u5BB9\u3002\u8BD5\u8BD5\u6362\u4E2A\u65B9\u5F0F\u63D0\u95EE\uFF0C\u6216\u8005\u6D4F\u89C8\u5176\u4ED6\u8BDD\u9898\u3002",
|
|
79
|
+
"\u62B1\u6B49\uFF0C\u535A\u5BA2\u91CC\u6682\u65F6\u6CA1\u6709\u6D89\u53CA\u8FD9\u4E2A\u8BDD\u9898\u7684\u6587\u7AE0\u3002\u4F60\u53EF\u4EE5\u95EE\u6211\u5176\u4ED6\u95EE\u9898\uFF0C\u6211\u5C3D\u529B\u5E2E\u4F60\u627E\u7B54\u6848\u3002"
|
|
80
|
+
],
|
|
81
|
+
en: [
|
|
82
|
+
"No articles directly related to this topic were found. Try different keywords or ask another question.",
|
|
83
|
+
"I could not find relevant content in the blog. Try rephrasing your question or exploring other topics.",
|
|
84
|
+
"Sorry, there are no articles on this topic in the blog. Feel free to ask about something else."
|
|
85
|
+
]
|
|
86
|
+
};
|
|
87
|
+
const ARTICLE_COUNT_TEMPLATES = {
|
|
88
|
+
zh: [
|
|
89
|
+
"\u6839\u636E\u6211\u68C0\u7D22\u5230\u7684\u4FE1\u606F\uFF0C\u5F53\u524D\u5171\u627E\u5230 {count} \u7BC7\u76F8\u5173\u6587\u7AE0\u3002",
|
|
90
|
+
"\u641C\u7D22\u7ED3\u679C\u663E\u793A\uFF0C\u6709 {count} \u7BC7\u6587\u7AE0\u4E0E\u4F60\u7684\u95EE\u9898\u76F8\u5173\u3002",
|
|
91
|
+
"\u6211\u627E\u5230\u4E86 {count} \u7BC7\u53EF\u80FD\u5BF9\u4F60\u6709\u5E2E\u52A9\u7684\u6587\u7AE0\u3002"
|
|
92
|
+
],
|
|
93
|
+
en: [
|
|
94
|
+
"Based on my search, I found {count} related articles.",
|
|
95
|
+
"The search returned {count} articles that may be relevant.",
|
|
96
|
+
"I discovered {count} articles related to your query."
|
|
97
|
+
]
|
|
98
|
+
};
|
|
99
|
+
function pickTemplate(templates, lang) {
|
|
100
|
+
const options = lang === "en" ? templates.en : templates.zh;
|
|
101
|
+
const index = Math.floor(Math.random() * options.length);
|
|
102
|
+
return options[index];
|
|
103
|
+
}
|
|
104
|
+
function pickTemplateWithVars(templates, lang, vars) {
|
|
105
|
+
let text = pickTemplate(templates, lang);
|
|
106
|
+
for (const [key, value] of Object.entries(vars)) {
|
|
107
|
+
text = text.replace(new RegExp(`\\{${key}\\}`, "g"), String(value));
|
|
108
|
+
}
|
|
109
|
+
return text;
|
|
110
|
+
}
|
|
111
|
+
export {
|
|
112
|
+
ARTICLE_COUNT_TEMPLATES,
|
|
113
|
+
NO_ARTICLE_TEMPLATES,
|
|
114
|
+
PRIVACY_REFUSAL_TEMPLATES,
|
|
115
|
+
pickTemplate,
|
|
116
|
+
pickTemplateWithVars
|
|
117
|
+
};
|
|
File without changes
|