@llangtop/pwiki-core 0.3.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/WikiEngine.d.ts +25 -37
- package/dist/WikiEngine.d.ts.map +1 -1
- package/dist/WikiEngine.js +157 -298
- package/dist/WikiEngine.js.map +1 -1
- package/dist/ast-chunker.d.ts +23 -0
- package/dist/ast-chunker.d.ts.map +1 -0
- package/dist/ast-chunker.js +434 -0
- package/dist/ast-chunker.js.map +1 -0
- package/dist/content-cache.d.ts +13 -0
- package/dist/content-cache.d.ts.map +1 -0
- package/dist/content-cache.js +33 -0
- package/dist/content-cache.js.map +1 -0
- package/dist/embedder.d.ts +38 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +267 -0
- package/dist/embedder.js.map +1 -0
- package/dist/file-manifest.d.ts +46 -0
- package/dist/file-manifest.d.ts.map +1 -0
- package/dist/file-manifest.js +121 -0
- package/dist/file-manifest.js.map +1 -0
- package/dist/index.d.ts +18 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +25 -7
- package/dist/index.js.map +1 -1
- package/dist/indexer-compile.d.ts +20 -0
- package/dist/indexer-compile.d.ts.map +1 -0
- package/dist/indexer-compile.js +198 -0
- package/dist/indexer-compile.js.map +1 -0
- package/dist/indexer-embed.d.ts +21 -0
- package/dist/indexer-embed.d.ts.map +1 -0
- package/dist/indexer-embed.js +248 -0
- package/dist/indexer-embed.js.map +1 -0
- package/dist/indexer-scan.d.ts +4 -0
- package/dist/indexer-scan.d.ts.map +1 -0
- package/dist/indexer-scan.js +51 -0
- package/dist/indexer-scan.js.map +1 -0
- package/dist/indexer.d.ts +4 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +7 -0
- package/dist/indexer.js.map +1 -0
- package/dist/model-registry.d.ts +32 -0
- package/dist/model-registry.d.ts.map +1 -0
- package/dist/model-registry.js +82 -0
- package/dist/model-registry.js.map +1 -0
- package/dist/parser.d.ts +9 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +54 -0
- package/dist/parser.js.map +1 -0
- package/dist/preprocessor.d.ts +36 -0
- package/dist/preprocessor.d.ts.map +1 -0
- package/dist/preprocessor.js +209 -0
- package/dist/preprocessor.js.map +1 -0
- package/dist/search.d.ts +6 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +91 -0
- package/dist/search.js.map +1 -0
- package/dist/semantic-compiler.d.ts +44 -0
- package/dist/semantic-compiler.d.ts.map +1 -0
- package/dist/semantic-compiler.js +376 -0
- package/dist/semantic-compiler.js.map +1 -0
- package/dist/semantic-search.d.ts +11 -0
- package/dist/semantic-search.d.ts.map +1 -0
- package/dist/semantic-search.js +217 -0
- package/dist/semantic-search.js.map +1 -0
- package/dist/store-settings.d.ts +32 -0
- package/dist/store-settings.d.ts.map +1 -0
- package/dist/store-settings.js +138 -0
- package/dist/store-settings.js.map +1 -0
- package/dist/store-vectors.d.ts +13 -0
- package/dist/store-vectors.d.ts.map +1 -0
- package/dist/store-vectors.js +101 -0
- package/dist/store-vectors.js.map +1 -0
- package/dist/store.d.ts +11 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +28 -0
- package/dist/store.js.map +1 -0
- package/dist/types.d.ts +75 -92
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/types.js.map +1 -1
- package/dist/wiki-paths.d.ts +3 -0
- package/dist/wiki-paths.d.ts.map +1 -0
- package/dist/wiki-paths.js +13 -0
- package/dist/wiki-paths.js.map +1 -0
- package/package.json +38 -38
- package/dist/compile/compiler.d.ts +0 -39
- package/dist/compile/compiler.d.ts.map +0 -1
- package/dist/compile/compiler.js +0 -227
- package/dist/compile/compiler.js.map +0 -1
- package/dist/compile/index.d.ts +0 -3
- package/dist/compile/index.d.ts.map +0 -1
- package/dist/compile/index.js +0 -2
- package/dist/compile/index.js.map +0 -1
- package/dist/embed/WikiEmbedder.d.ts +0 -28
- package/dist/embed/WikiEmbedder.d.ts.map +0 -1
- package/dist/embed/WikiEmbedder.js +0 -147
- package/dist/embed/WikiEmbedder.js.map +0 -1
- package/dist/embed/index.d.ts +0 -2
- package/dist/embed/index.d.ts.map +0 -1
- package/dist/embed/index.js +0 -2
- package/dist/embed/index.js.map +0 -1
- package/dist/llm/WikiLLM.d.ts +0 -24
- package/dist/llm/WikiLLM.d.ts.map +0 -1
- package/dist/llm/WikiLLM.js +0 -46
- package/dist/llm/WikiLLM.js.map +0 -1
- package/dist/llm/index.d.ts +0 -3
- package/dist/llm/index.d.ts.map +0 -1
- package/dist/llm/index.js +0 -2
- package/dist/llm/index.js.map +0 -1
- package/dist/models.d.ts +0 -5
- package/dist/models.d.ts.map +0 -1
- package/dist/models.js +0 -54
- package/dist/models.js.map +0 -1
- package/dist/search/WikiSearch.d.ts +0 -14
- package/dist/search/WikiSearch.d.ts.map +0 -1
- package/dist/search/WikiSearch.js +0 -223
- package/dist/search/WikiSearch.js.map +0 -1
- package/dist/search/index.d.ts +0 -2
- package/dist/search/index.d.ts.map +0 -1
- package/dist/search/index.js +0 -2
- package/dist/search/index.js.map +0 -1
- package/dist/store/WikiStore.d.ts +0 -47
- package/dist/store/WikiStore.d.ts.map +0 -1
- package/dist/store/WikiStore.js +0 -301
- package/dist/store/WikiStore.js.map +0 -1
- package/dist/store/index.d.ts +0 -2
- package/dist/store/index.d.ts.map +0 -1
- package/dist/store/index.js +0 -2
- package/dist/store/index.js.map +0 -1
- package/dist/util/fs.d.ts +0 -7
- package/dist/util/fs.d.ts.map +0 -1
- package/dist/util/fs.js +0 -36
- package/dist/util/fs.js.map +0 -1
- package/dist/util/index.d.ts +0 -3
- package/dist/util/index.d.ts.map +0 -1
- package/dist/util/index.js +0 -3
- package/dist/util/index.js.map +0 -1
- package/dist/util/paths.d.ts +0 -17
- package/dist/util/paths.d.ts.map +0 -1
- package/dist/util/paths.js +0 -31
- package/dist/util/paths.js.map +0 -1
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// model-registry.ts — 模型中间层 (v1.0)
|
|
2
|
+
//
|
|
3
|
+
// 提供内置模型目录 + 选择/查询 API,让 wiki 搜索与具体模型解耦。
|
|
4
|
+
// store.ts 只存 currentModelId 字符串,所有模型元信息由此 registry 提供。
|
|
5
|
+
//
|
|
6
|
+
// 添加新模型: 在 BUILTIN_MODELS 数组中追加一条即可。
|
|
7
|
+
// embedder / indexer / management 通过 getCurrentModel() 自动适配。
|
|
8
|
+
import { readModelId, writeModelId } from "./store.js";
|
|
9
|
+
// ---- 内置模型目录 ----
|
|
10
|
+
export const BUILTIN_MODELS = [
|
|
11
|
+
{
|
|
12
|
+
id: "bge-base-zh-v1.5",
|
|
13
|
+
name: "BGE Base Chinese v1.5",
|
|
14
|
+
hfRepo: "Xenova/bge-base-zh-v1.5",
|
|
15
|
+
dim: 768,
|
|
16
|
+
description: "BAAI 中文优化,MTEB 中文榜单领先,适合中文技术文档语义搜索",
|
|
17
|
+
languages: ["zh", "en"],
|
|
18
|
+
maxTokens: 512,
|
|
19
|
+
int8Size: 130_000_000, // ~130 MB
|
|
20
|
+
fp32Size: 390_000_000, // ~390 MB
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
id: "bge-large-zh-v1.5",
|
|
24
|
+
name: "BGE Large Chinese v1.5",
|
|
25
|
+
hfRepo: "Xenova/bge-large-zh-v1.5",
|
|
26
|
+
dim: 1024,
|
|
27
|
+
description: "BAAI 中文大模型,1024 维高精度,适合对中文精度有较高要求的笔记 wiki 化",
|
|
28
|
+
languages: ["zh", "en"],
|
|
29
|
+
maxTokens: 512,
|
|
30
|
+
int8Size: 324_000_000, // ~324 MB
|
|
31
|
+
fp32Size: 1_300_000_000, // ~1.3 GB
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
id: "paraphrase-multilingual",
|
|
35
|
+
name: "Paraphrase Multilingual MiniLM",
|
|
36
|
+
hfRepo: "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
|
|
37
|
+
dim: 384,
|
|
38
|
+
description: "轻量多语言模型,50+ 语言,适合混合语言知识库",
|
|
39
|
+
languages: ["zh", "en", "fr", "de", "ja", "ko", "..."],
|
|
40
|
+
maxTokens: 128,
|
|
41
|
+
int8Size: 118_000_000, // ~118 MB
|
|
42
|
+
fp32Size: 470_000_000, // ~470 MB
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
id: "bge-m3",
|
|
46
|
+
name: "BGE M3",
|
|
47
|
+
hfRepo: "Xenova/bge-m3",
|
|
48
|
+
dim: 1024,
|
|
49
|
+
description: "BAAI 多语言多粒度模型,100+ 语言,支持长文本 (8192 token),中英混合笔记首选",
|
|
50
|
+
languages: ["zh", "en", "fr", "de", "ja", "ko", "es", "ru", "ar", "..."],
|
|
51
|
+
maxTokens: 8192,
|
|
52
|
+
int8Size: 340_000_000, // ~340 MB (O4 量化)
|
|
53
|
+
fp32Size: 2_200_000_000, // ~2.2 GB
|
|
54
|
+
},
|
|
55
|
+
];
|
|
56
|
+
// ---- 查询 API ----
|
|
57
|
+
/** 获取所有内置模型 */
|
|
58
|
+
export function getBuiltinModels() {
|
|
59
|
+
return BUILTIN_MODELS;
|
|
60
|
+
}
|
|
61
|
+
/** 按 id 查找模型 */
|
|
62
|
+
export function findModel(id) {
|
|
63
|
+
return BUILTIN_MODELS.find(m => m.id === id);
|
|
64
|
+
}
|
|
65
|
+
/** 获取当前选中的模型元信息 */
|
|
66
|
+
export function getCurrentModel() {
|
|
67
|
+
const id = readModelId();
|
|
68
|
+
return findModel(id) ?? BUILTIN_MODELS[0];
|
|
69
|
+
}
|
|
70
|
+
/** 切换模型 — 返回新模型信息,若 id 不存在返回 null */
|
|
71
|
+
export function selectModel(id) {
|
|
72
|
+
const m = findModel(id);
|
|
73
|
+
if (!m)
|
|
74
|
+
return null;
|
|
75
|
+
writeModelId(m.id);
|
|
76
|
+
return m;
|
|
77
|
+
}
|
|
78
|
+
/** 默认模型 id */
|
|
79
|
+
export function getDefaultModelId() {
|
|
80
|
+
return BUILTIN_MODELS[0].id;
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=model-registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-registry.js","sourceRoot":"","sources":["../src/model-registry.ts"],"names":[],"mappings":"AAAA,mCAAmC;AACnC,EAAE;AACF,yCAAyC;AACzC,wDAAwD;AACxD,EAAE;AACF,qCAAqC;AACrC,6DAA6D;AAE7D,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAyBvD,mBAAmB;AAEnB,MAAM,CAAC,MAAM,cAAc,GAAgB;IACzC;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,uBAAuB;QAC7B,MAAM,EAAE,yBAAyB;QACjC,GAAG,EAAE,GAAG;QACR,WAAW,EAAE,oCAAoC;QACjD,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;QACvB,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,WAAW,EAAI,UAAU;QACnC,QAAQ,EAAE,WAAW,EAAI,UAAU;KACpC;IACD;QACE,EAAE,EAAE,mBAAmB;QACvB,IAAI,EAAE,wBAAwB;QAC9B,MAAM,EAAE,0BAA0B;QAClC,GAAG,EAAE,IAAI;QACT,WAAW,EAAE,6CAA6C;QAC1D,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,CAAC;QACvB,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,WAAW,EAAI,UAAU;QACnC,QAAQ,EAAE,aAAa,EAAE,UAAU;KACpC;IACD;QACE,EAAE,EAAE,yBAAyB;QAC7B,IAAI,EAAE,gCAAgC;QACtC,MAAM,EAAE,8CAA8C;QACtD,GAAG,EAAE,GAAG;QACR,WAAW,EAAE,0BAA0B;QACvC,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC;QACtD,SAAS,EAAE,GAAG;QACd,QAAQ,EAAE,WAAW,EAAI,UAAU;QACnC,QAAQ,EAAE,WAAW,EAAI,UAAU;KACpC;IACD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,QAAQ;QACd,MAAM,EAAE,eAAe;QACvB,GAAG,EAAE,IAAI;QACT,WAAW,EAAE,mDAAmD;QAChE,SAAS,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC;QACxE,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,WAAW,EAAI,kBAAkB;QAC3C,QAAQ,EAAE,aAAa,EAAE,UAAU;KACpC;CACF,CAAC;AAEF,mBAAmB;AAEnB,eAAe;AACf,MAAM,UAAU,gBAAgB;IAC9B,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,gBAAgB;AAChB,MAAM,UAAU,SAAS,CAAC,EAAsB;IAC9C,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AAC/C,CAAC;AAED,mBAAmB;AACnB,MAAM,UAAU,eAAe;IAC7B,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,OAAO,SAAS,CAAC,EAAE,CAAC,IAAI,cAAc,CAAC,CAAC,CAAC,CAAC;AAC5C,CAAC;AAED,qCAAqC;AACrC,MAAM,UAAU,WAAW,CAAC,EAAU;IACpC,MAAM,CAAC,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;IACxB,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACnB,OAAO,CAAC,CAAC;AACX,CAAC;AAED,cAAc;AACd,MAAM,UAAU,iBAAiB;IAC/B,OAAO,cAAc,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AAC9B,CAAC"}
|
package/dist/parser.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { FileEntry } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* 解析单个 .md 文件为 FileEntry
|
|
4
|
+
* @param root 数据源根目录
|
|
5
|
+
* @param filePath 文件绝对路径
|
|
6
|
+
* @param mtime 文件修改时间(可选;不传则通过 statSync 自动获取)
|
|
7
|
+
*/
|
|
8
|
+
export declare function parseFileEntry(root: string, filePath: string, mtime?: string): FileEntry | null;
|
|
9
|
+
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAE5C;;;;;GAKG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,KAAK,CAAC,EAAE,MAAM,GACb,SAAS,GAAG,IAAI,CA2ClB"}
|
package/dist/parser.js
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
// parser.ts — 统一的 .md 文件元信息解析
|
|
2
|
+
// 提取 frontmatter (title/tags) + H1 标题
|
|
3
|
+
// 供 indexer.ts 和 management.ts 共用
|
|
4
|
+
import { readFileSync, statSync } from "node:fs";
|
|
5
|
+
import { relative, basename } from "node:path";
|
|
6
|
+
/**
|
|
7
|
+
* 解析单个 .md 文件为 FileEntry
|
|
8
|
+
* @param root 数据源根目录
|
|
9
|
+
* @param filePath 文件绝对路径
|
|
10
|
+
* @param mtime 文件修改时间(可选;不传则通过 statSync 自动获取)
|
|
11
|
+
*/
|
|
12
|
+
export function parseFileEntry(root, filePath, mtime) {
|
|
13
|
+
try {
|
|
14
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
15
|
+
const relPath = relative(root, filePath).replace(/\\/g, "/");
|
|
16
|
+
let title = basename(filePath, ".md");
|
|
17
|
+
const tags = [];
|
|
18
|
+
// --------------------------------------------------
|
|
19
|
+
// frontmatter
|
|
20
|
+
// --------------------------------------------------
|
|
21
|
+
const fmMatch = raw.match(/^---\n([\s\S]*?)\n---/);
|
|
22
|
+
if (fmMatch) {
|
|
23
|
+
for (const line of fmMatch[1].split("\n")) {
|
|
24
|
+
const ci = line.indexOf(":");
|
|
25
|
+
if (ci < 0)
|
|
26
|
+
continue;
|
|
27
|
+
const k = line.slice(0, ci).trim();
|
|
28
|
+
const v = line.slice(ci + 1).trim().replace(/['"]/g, "");
|
|
29
|
+
if (k === "title")
|
|
30
|
+
title = v;
|
|
31
|
+
if (k === "tags" && v.startsWith("[") && v.endsWith("]")) {
|
|
32
|
+
tags.push(...v
|
|
33
|
+
.slice(1, -1)
|
|
34
|
+
.split(",")
|
|
35
|
+
.map((s) => s.trim().replace(/['"]/g, "")));
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// --------------------------------------------------
|
|
40
|
+
// 无 frontmatter title → 回退到第一个 # 标题
|
|
41
|
+
// --------------------------------------------------
|
|
42
|
+
if (!fmMatch || !raw.match(/^---\n[\s\S]*?\n---\n*\n*# /)) {
|
|
43
|
+
const h1 = raw.match(/^# (.+)$/m);
|
|
44
|
+
if (h1)
|
|
45
|
+
title = h1[1].trim();
|
|
46
|
+
}
|
|
47
|
+
const finalMtime = mtime ?? statSync(filePath).mtime.toISOString();
|
|
48
|
+
return { title, tags, sourceDir: root, relPath, mtime: finalMtime };
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,8BAA8B;AAC9B,sCAAsC;AACtC,kCAAkC;AAElC,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AACjD,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAG/C;;;;;GAKG;AACH,MAAM,UAAU,cAAc,CAC5B,IAAY,EACZ,QAAgB,EAChB,KAAc;IAEd,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;QAC7D,IAAI,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACtC,MAAM,IAAI,GAAa,EAAE,CAAC;QAE1B,qDAAqD;QACrD,cAAc;QACd,qDAAqD;QACrD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACnD,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1C,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAC7B,IAAI,EAAE,GAAG,CAAC;oBAAE,SAAS;gBACrB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;gBACzD,IAAI,CAAC,KAAK,OAAO;oBAAE,KAAK,GAAG,CAAC,CAAC;gBAC7B,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzD,IAAI,CAAC,IAAI,CACP,GAAG,CAAC;yBACD,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;yBACZ,KAAK,CAAC,GAAG,CAAC;yBACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAC7C,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,qDAAqD;QACrD,oCAAoC;QACpC,qDAAqD;QACrD,IAAI,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,6BAA6B,CAAC,EAAE,CAAC;YAC1D,MAAM,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YAClC,IAAI,EAAE;gBAAE,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC/B,CAAC;QAED,MAAM,UAAU,GAAG,KAAK,IAAI,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;QAEnE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;IACtE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/** chunkType: markdown 特征推断 */
|
|
2
|
+
export declare function inferChunkType(text: string, heading: string): string;
|
|
3
|
+
/** contentClass: 文件路径推断 */
|
|
4
|
+
export declare function inferContentClass(relPath: string): string;
|
|
5
|
+
/** importance: 启发式打分 (0.1-1.0) */
|
|
6
|
+
export declare function inferImportance(text: string, heading: string): number;
|
|
7
|
+
/** temporalAnchor: 正则提取第一个 YYYY-MM-DD */
|
|
8
|
+
export declare function inferTemporalAnchor(text: string): string | undefined;
|
|
9
|
+
/** confidence: 基于文本长度的预设置信度 */
|
|
10
|
+
export declare function inferConfidence(text: string): number;
|
|
11
|
+
/** summary: 清洗后取前 30 字 */
|
|
12
|
+
export declare function inferSummary(text: string): string;
|
|
13
|
+
/** keywords: 英文标识符 + 中文高频词预提取 */
|
|
14
|
+
export declare function inferKeywords(text: string): string[];
|
|
15
|
+
/** 预处理器对文件全文的输出 */
|
|
16
|
+
export interface PreprocessedChunk {
|
|
17
|
+
/** 程序切分的块(按标题简单切,仅作兜底) */
|
|
18
|
+
heading: string;
|
|
19
|
+
level: number;
|
|
20
|
+
text: string;
|
|
21
|
+
/** 程序提取的元数据 */
|
|
22
|
+
chunkType: string;
|
|
23
|
+
contentClass: string;
|
|
24
|
+
importance: number;
|
|
25
|
+
temporalAnchor?: string;
|
|
26
|
+
confidence: number;
|
|
27
|
+
summary: string;
|
|
28
|
+
keywords: string[];
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* 对文件全文做预处理: AST 分块 + 提取每段元数据。
|
|
32
|
+
* v5.3: 复用 ast-chunker 替代 regex 逐行扫描。
|
|
33
|
+
* 这是兜底分块 — LLM 在文件级编译时可能输出不同的 segments。
|
|
34
|
+
*/
|
|
35
|
+
export declare function preprocessFile(relPath: string, fullText: string, defaultTitle: string): Promise<PreprocessedChunk[]>;
|
|
36
|
+
//# sourceMappingURL=preprocessor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"preprocessor.d.ts","sourceRoot":"","sources":["../src/preprocessor.ts"],"names":[],"mappings":"AAkBA,+BAA+B;AAC/B,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAsBpE;AAED,2BAA2B;AAC3B,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAkBzD;AAED,kCAAkC;AAClC,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,GACd,MAAM,CAeR;AAED,yCAAyC;AACzC,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,GACX,MAAM,GAAG,SAAS,CAKpB;AAED,+BAA+B;AAC/B,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAIpD;AAED,0BAA0B;AAC1B,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CASjD;AAED,iCAAiC;AACjC,wBAAgB,aAAa,CAC3B,IAAI,EAAE,MAAM,GACX,MAAM,EAAE,CAmDV;AAMD,mBAAmB;AACnB,MAAM,WAAW,iBAAiB;IAChC,0BAA0B;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,eAAe;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;GAIG;AACH,wBAAsB,cAAc,CAClC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,GACnB,OAAO,CAAC,iBAAiB,EAAE,CAAC,CA+D9B"}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// preprocessor.ts — 程序化元数据提取 (v5.2)
|
|
2
|
+
//
|
|
3
|
+
// 原则: 程序做能做的一切,LLM 只做需要语义理解的活。
|
|
4
|
+
//
|
|
5
|
+
// 7 个字段由规则/启发式/正则自动提取:
|
|
6
|
+
// chunkType, contentClass, importance, temporalAnchor,
|
|
7
|
+
// confidence, summary, keywords
|
|
8
|
+
//
|
|
9
|
+
// LLM 只负责 4 个语义字段:
|
|
10
|
+
// topic, normalizedText, concepts, aliases
|
|
11
|
+
import { extractChunksAST } from "./ast-chunker.js";
|
|
12
|
+
// ============================================================
|
|
13
|
+
// 推断规则
|
|
14
|
+
// ============================================================
|
|
15
|
+
/** chunkType: markdown 特征推断 */
|
|
16
|
+
export function inferChunkType(text, heading) {
|
|
17
|
+
if (/```[\s\S]*?```/.test(text))
|
|
18
|
+
return "code";
|
|
19
|
+
if (/^\s*[-*]\s*\[ \]/m.test(text))
|
|
20
|
+
return "todo";
|
|
21
|
+
if (/^\s*[-*]\s*\[x\]/im.test(text))
|
|
22
|
+
return "log";
|
|
23
|
+
if (/^#{1,4}\s*(架构|拓扑|结构|方案)/.test(heading))
|
|
24
|
+
return "architecture";
|
|
25
|
+
if (/^#{1,4}\s*(决定|决策|结论|决议)/.test(heading))
|
|
26
|
+
return "decision";
|
|
27
|
+
if (/^#{1,4}\s*(参考|链接|相关|资源|附录)/.test(heading))
|
|
28
|
+
return "reference";
|
|
29
|
+
if (/^#{1,4}\s*(问题|排查|故障|报错|异常)/.test(heading))
|
|
30
|
+
return "question";
|
|
31
|
+
if (/^#{1,4}\s*(日志|记录|日报|周报|流水)/.test(heading))
|
|
32
|
+
return "log";
|
|
33
|
+
if (/^#{1,4}\s*(想法|思路|灵感|idea)/i.test(heading))
|
|
34
|
+
return "idea";
|
|
35
|
+
if (/^#{1,4}\s*(研究|调研|分析|探索)/.test(heading))
|
|
36
|
+
return "research";
|
|
37
|
+
if (text.length < 50)
|
|
38
|
+
return "reference";
|
|
39
|
+
return "note";
|
|
40
|
+
}
|
|
41
|
+
/** contentClass: 文件路径推断 */
|
|
42
|
+
export function inferContentClass(relPath) {
|
|
43
|
+
if (/chatgpt|聊天|对话|conversation/i.test(relPath))
|
|
44
|
+
return "conversation";
|
|
45
|
+
if (/日报|更新|会议|流水|日志|记录|周报/i.test(relPath))
|
|
46
|
+
return "event";
|
|
47
|
+
if (/知识点|规范|标准|原理|手册|指南|教程|总结/i.test(relPath))
|
|
48
|
+
return "knowledge";
|
|
49
|
+
return "reference";
|
|
50
|
+
}
|
|
51
|
+
/** importance: 启发式打分 (0.1-1.0) */
|
|
52
|
+
export function inferImportance(text, heading) {
|
|
53
|
+
let score = 0.3;
|
|
54
|
+
if (text.length > 200)
|
|
55
|
+
score += 0.2;
|
|
56
|
+
if (text.length > 500)
|
|
57
|
+
score += 0.1;
|
|
58
|
+
if (/```/.test(text))
|
|
59
|
+
score += 0.15;
|
|
60
|
+
if (/^#{1,3}\s/.test(heading))
|
|
61
|
+
score += 0.1;
|
|
62
|
+
if (/错误|异常|故障|问题|解决|修复|排查/.test(text))
|
|
63
|
+
score += 0.1;
|
|
64
|
+
if (/TODO|待办|以后|FIXME|临时|暂存/i.test(text))
|
|
65
|
+
score -= 0.3;
|
|
66
|
+
return Math.max(0.1, Math.min(1, score));
|
|
67
|
+
}
|
|
68
|
+
/** temporalAnchor: 正则提取第一个 YYYY-MM-DD */
|
|
69
|
+
export function inferTemporalAnchor(text) {
|
|
70
|
+
const m = text.match(/\b(20\d{2}[-/]\d{1,2}[-/]\d{1,2})\b/);
|
|
71
|
+
return m ? m[1].replace(/\//g, "-") : undefined;
|
|
72
|
+
}
|
|
73
|
+
/** confidence: 基于文本长度的预设置信度 */
|
|
74
|
+
export function inferConfidence(text) {
|
|
75
|
+
if (text.length < 20)
|
|
76
|
+
return 0.3;
|
|
77
|
+
if (text.length < 80)
|
|
78
|
+
return 0.6;
|
|
79
|
+
return 0.85;
|
|
80
|
+
}
|
|
81
|
+
/** summary: 清洗后取前 30 字 */
|
|
82
|
+
export function inferSummary(text) {
|
|
83
|
+
const cleaned = text
|
|
84
|
+
.replace(/^#{1,6}\s+/gm, "")
|
|
85
|
+
.replace(/```[\s\S]*?```/g, "[代码]")
|
|
86
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, "$1")
|
|
87
|
+
.replace(/\*\*|__|\*|_|`|~~/g, "")
|
|
88
|
+
.replace(/\n/g, " ")
|
|
89
|
+
.trim();
|
|
90
|
+
return cleaned.slice(0, 30);
|
|
91
|
+
}
|
|
92
|
+
/** keywords: 英文标识符 + 中文高频词预提取 */
|
|
93
|
+
export function inferKeywords(text) {
|
|
94
|
+
const en = text.match(/\b[a-z_]{3,}\b/gi) || [];
|
|
95
|
+
const zh = text.match(/[\u4e00-\u9fff]{2,4}/g) || [];
|
|
96
|
+
const all = [...new Set([...en, ...zh])];
|
|
97
|
+
// 停用词过滤
|
|
98
|
+
const stop = new Set([
|
|
99
|
+
"可以",
|
|
100
|
+
"一个",
|
|
101
|
+
"这个",
|
|
102
|
+
"不是",
|
|
103
|
+
"还是",
|
|
104
|
+
"如果",
|
|
105
|
+
"因为",
|
|
106
|
+
"所以",
|
|
107
|
+
"但是",
|
|
108
|
+
"而且",
|
|
109
|
+
"或者",
|
|
110
|
+
"以及",
|
|
111
|
+
"就是",
|
|
112
|
+
"没有",
|
|
113
|
+
"已经",
|
|
114
|
+
"什么",
|
|
115
|
+
"怎么",
|
|
116
|
+
"这样",
|
|
117
|
+
"那样",
|
|
118
|
+
"时候",
|
|
119
|
+
"问题",
|
|
120
|
+
"需要",
|
|
121
|
+
"通过",
|
|
122
|
+
"进行",
|
|
123
|
+
"使用",
|
|
124
|
+
"用于",
|
|
125
|
+
"可能",
|
|
126
|
+
"应该",
|
|
127
|
+
"然后",
|
|
128
|
+
"the",
|
|
129
|
+
"and",
|
|
130
|
+
"for",
|
|
131
|
+
"from",
|
|
132
|
+
"with",
|
|
133
|
+
"that",
|
|
134
|
+
"this",
|
|
135
|
+
"are",
|
|
136
|
+
"not",
|
|
137
|
+
"but",
|
|
138
|
+
"has",
|
|
139
|
+
"was",
|
|
140
|
+
]);
|
|
141
|
+
return all.filter((w) => !stop.has(w)).slice(0, 8);
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* 对文件全文做预处理: AST 分块 + 提取每段元数据。
|
|
145
|
+
* v5.3: 复用 ast-chunker 替代 regex 逐行扫描。
|
|
146
|
+
* 这是兜底分块 — LLM 在文件级编译时可能输出不同的 segments。
|
|
147
|
+
*/
|
|
148
|
+
export async function preprocessFile(relPath, fullText, defaultTitle) {
|
|
149
|
+
// v5.3: 优先 AST 分块
|
|
150
|
+
const astChunks = await extractChunksAST(fullText, relPath, defaultTitle);
|
|
151
|
+
// 如果 AST 成功,直接映射
|
|
152
|
+
if (astChunks.length > 0) {
|
|
153
|
+
return astChunks.map((c) => ({
|
|
154
|
+
heading: c.heading,
|
|
155
|
+
level: c.level,
|
|
156
|
+
text: c.rawText,
|
|
157
|
+
chunkType: c.chunkTypeHint || inferChunkType(c.rawText, c.heading),
|
|
158
|
+
contentClass: inferContentClass(relPath),
|
|
159
|
+
importance: inferImportance(c.rawText, c.heading),
|
|
160
|
+
temporalAnchor: inferTemporalAnchor(c.rawText),
|
|
161
|
+
confidence: inferConfidence(c.rawText),
|
|
162
|
+
summary: inferSummary(c.rawText),
|
|
163
|
+
keywords: inferKeywords(c.rawText),
|
|
164
|
+
}));
|
|
165
|
+
}
|
|
166
|
+
// ── fallback: regex 逐行扫描 ──
|
|
167
|
+
const lines = fullText.split("\n");
|
|
168
|
+
const sections = [];
|
|
169
|
+
for (const line of lines) {
|
|
170
|
+
const m = line.match(/^#{1,4}\s/);
|
|
171
|
+
if (m) {
|
|
172
|
+
const heading = line.trim();
|
|
173
|
+
const level = heading.match(/^#+/)[0].length;
|
|
174
|
+
sections.push({ heading, level, lines: [] });
|
|
175
|
+
}
|
|
176
|
+
else if (sections.length > 0) {
|
|
177
|
+
sections[sections.length - 1].lines.push(line);
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
sections.push({ heading: defaultTitle, level: 0, lines: [] });
|
|
181
|
+
sections[0].lines.push(line);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
if (sections.length === 0) {
|
|
185
|
+
sections.push({ heading: defaultTitle, level: 0, lines });
|
|
186
|
+
}
|
|
187
|
+
// 跳过 frontmatter
|
|
188
|
+
if (sections[0]?.lines[0]?.trim() === "---" || sections[0]?.heading === "---") {
|
|
189
|
+
const fmEnd = sections[0].lines.findIndex((l) => l.trim() === "---", 1);
|
|
190
|
+
if (fmEnd > 0)
|
|
191
|
+
sections[0].lines = sections[0].lines.slice(fmEnd + 1);
|
|
192
|
+
}
|
|
193
|
+
return sections.map((s) => {
|
|
194
|
+
const text = s.lines.join("\n").trim();
|
|
195
|
+
return {
|
|
196
|
+
heading: s.heading,
|
|
197
|
+
level: s.level,
|
|
198
|
+
text,
|
|
199
|
+
chunkType: inferChunkType(text, s.heading),
|
|
200
|
+
contentClass: inferContentClass(relPath),
|
|
201
|
+
importance: inferImportance(text, s.heading),
|
|
202
|
+
temporalAnchor: inferTemporalAnchor(text),
|
|
203
|
+
confidence: inferConfidence(text),
|
|
204
|
+
summary: inferSummary(text),
|
|
205
|
+
keywords: inferKeywords(text),
|
|
206
|
+
};
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
//# sourceMappingURL=preprocessor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"preprocessor.js","sourceRoot":"","sources":["../src/preprocessor.ts"],"names":[],"mappings":"AAAA,oCAAoC;AACpC,EAAE;AACF,+BAA+B;AAC/B,EAAE;AACF,uBAAuB;AACvB,yDAAyD;AACzD,kCAAkC;AAClC,EAAE;AACF,mBAAmB;AACnB,6CAA6C;AAG7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAEpD,+DAA+D;AAC/D,OAAO;AACP,+DAA+D;AAE/D,+BAA+B;AAC/B,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,OAAe;IAC1D,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAC/C,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IAClD,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAClD,IAAI,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC;QACzC,OAAO,cAAc,CAAC;IACxB,IAAI,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC;QACzC,OAAO,UAAU,CAAC;IACpB,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,WAAW,CAAC;IACrB,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,UAAU,CAAC;IACpB,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,KAAK,CAAC;IACf,IAAI,4BAA4B,CAAC,IAAI,CAAC,OAAO,CAAC;QAC5C,OAAO,MAAM,CAAC;IAChB,IACE,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC;QAEvC,OAAO,UAAU,CAAC;IACpB,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,WAAW,CAAC;IACzC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,2BAA2B;AAC3B,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,IACE,6BAA6B,CAAC,IAAI,CAAC,OAAO,CAAC;QAE3C,OAAO,cAAc,CAAC;IACxB,IACE,uBAAuB,CAAC,IAAI,CAC1B,OAAO,CACR;QAED,OAAO,OAAO,CAAC;IACjB,IACE,2BAA2B,CAAC,IAAI,CAC9B,OAAO,CACR;QAED,OAAO,WAAW,CAAC;IACrB,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,kCAAkC;AAClC,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,OAAe;IAEf,IAAI,KAAK,GAAG,GAAG,CAAC;IAChB,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;QAAE,KAAK,IAAI,GAAG,CAAC;IACpC,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;QAAE,KAAK,IAAI,GAAG,CAAC;IACpC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,KAAK,IAAI,IAAI,CAAC;IACpC,IAAI,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,KAAK,IAAI,GAAG,CAAC;IAC5C,IACE,sBAAsB,CAAC,IAAI,CAAC,IAAI,CAAC;QAEjC,KAAK,IAAI,GAAG,CAAC;IACf,IACE,yBAAyB,CAAC,IAAI,CAAC,IAAI,CAAC;QAEpC,KAAK,IAAI,GAAG,CAAC;IACf,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AAC3C,CAAC;AAED,yCAAyC;AACzC,MAAM,UAAU,mBAAmB,CACjC,IAAY;IAEZ,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAClB,qCAAqC,CACtC,CAAC;IACF,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAClD,CAAC;AAED,+BAA+B;AAC/B,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,GAAG,CAAC;IACjC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,GAAG,CAAC;IACjC,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0BAA0B;AAC1B,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,OAAO,GAAG,IAAI;SACjB,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC;SAClC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC;SACvC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;SACjC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;SACnB,IAAI,EAAE,CAAC;IACV,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC9B,CAAC;AAED,iCAAiC;AACjC,MAAM,UAAU,aAAa,CAC3B,IAAY;IAEZ,MAAM,EAAE,GACN,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC;IACvC,MAAM,EAAE,GACN,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,IAAI,EAAE,CAAC;IAC5C,MAAM,GAAG,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,QAAQ;IACR,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC;QACnB,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,IAAI;QACJ,KAAK;QACL,KAAK;QACL,KAAK;QACL,MAAM;QACN,MAAM;QACN,MAAM;QACN,MAAM;QACN,KAAK;QACL,KAAK;QACL,KAAK;QACL,KAAK;QACL,KAAK;KACN,CAAC,CAAC;IACH,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;AACrD,CAAC;AAsBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,OAAe,EACf,QAAgB,EAChB,YAAoB;IAEpB,kBAAkB;IAClB,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,QAAQ,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAE1E,iBAAiB;IACjB,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC3B,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI,EAAE,CAAC,CAAC,OAAO;YACf,SAAS,EAAE,CAAC,CAAC,aAAa,IAAI,cAAc,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC;YAClE,YAAY,EAAE,iBAAiB,CAAC,OAAO,CAAC;YACxC,UAAU,EAAE,eAAe,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC;YACjD,cAAc,EAAE,mBAAmB,CAAC,CAAC,CAAC,OAAO,CAAC;YAC9C,UAAU,EAAE,eAAe,CAAC,CAAC,CAAC,OAAO,CAAC;YACtC,OAAO,EAAE,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC;YAChC,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC;SACnC,CAAC,CAAC,CAAC;IACN,CAAC;IAED,6BAA6B;IAC7B,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAA0D,EAAE,CAAC;IAE3E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QAClC,IAAI,CAAC,EAAE,CAAC;YACN,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YAC9C,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;aAAM,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YAC9D,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,iBAAiB;IACjB,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,KAAK,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE,OAAO,KAAK,KAAK,EAAE,CAAC;QAC9E,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,KAAK,EAAE,CAAC,CAAC,CAAC;QACxE,IAAI,KAAK,GAAG,CAAC;YAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACxB,MAAM,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,OAAO;YACL,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI;YACJ,SAAS,EAAE,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC;YAC1C,YAAY,EAAE,iBAAiB,CAAC,OAAO,CAAC;YACxC,UAAU,EAAE,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC;YAC5C,cAAc,EAAE,mBAAmB,CAAC,IAAI,CAAC;YACzC,UAAU,EAAE,eAAe,CAAC,IAAI,CAAC;YACjC,OAAO,EAAE,YAAY,CAAC,IAAI,CAAC;YAC3B,QAAQ,EAAE,aAAa,CAAC,IAAI,CAAC;SAC9B,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
|
package/dist/search.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAa,SAAS,EAAE,MAAM,YAAY,CAAC;AA0BvD,6BAA6B;AAC7B,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,SAAS,EAAE,CAkExD;AAED,aAAa;AACb,eAAO,MAAM,MAAM,sBAAgB,CAAC"}
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// search.ts — 关键词搜索 (v5.0)
|
|
2
|
+
// 标题/路径/标签/内容匹配 → 行级上下文展示
|
|
3
|
+
import { getIndex } from "./store.js";
|
|
4
|
+
import { readFileSync } from "node:fs";
|
|
5
|
+
import { resolve as resolvePath } from "node:path";
|
|
6
|
+
import { getContent } from "./content-cache.js";
|
|
7
|
+
/** 提取匹配行的上下文(前一行、匹配行、后一行) */
|
|
8
|
+
function lineContext(content, query, maxLen = 100) {
|
|
9
|
+
const lower = content.toLowerCase();
|
|
10
|
+
const q = query.toLowerCase();
|
|
11
|
+
const pos = lower.indexOf(q);
|
|
12
|
+
if (pos < 0)
|
|
13
|
+
return "";
|
|
14
|
+
// 找到匹配位置所在行号
|
|
15
|
+
const before = content.slice(0, pos);
|
|
16
|
+
const lineNum = before.split("\n").length; // 1-indexed
|
|
17
|
+
const lines = content.split("\n");
|
|
18
|
+
const prev = lineNum > 1 ? lines[lineNum - 2].trim() : "";
|
|
19
|
+
const curr = lines[lineNum - 1].trim();
|
|
20
|
+
const next = lineNum < lines.length ? lines[lineNum].trim() : "";
|
|
21
|
+
const parts = [];
|
|
22
|
+
if (prev)
|
|
23
|
+
parts.push(`L${lineNum - 1}: ${prev.slice(0, maxLen)}`);
|
|
24
|
+
parts.push(`L${lineNum}: ${curr.slice(0, maxLen)}`);
|
|
25
|
+
if (next)
|
|
26
|
+
parts.push(`L${lineNum + 1}: ${next.slice(0, maxLen)}`);
|
|
27
|
+
return parts.join("\n");
|
|
28
|
+
}
|
|
29
|
+
/** 关键词搜索(同步,纯子串匹配 + 加权打分) */
|
|
30
|
+
export function keywordSearch(query) {
|
|
31
|
+
const idx = getIndex();
|
|
32
|
+
const q = query.toLowerCase();
|
|
33
|
+
const hits = [];
|
|
34
|
+
for (const [relPath, entry] of Object.entries(idx)) {
|
|
35
|
+
let score = 0;
|
|
36
|
+
const parts = [];
|
|
37
|
+
// 标题匹配
|
|
38
|
+
if (entry.title.toLowerCase().includes(q)) {
|
|
39
|
+
score += 10;
|
|
40
|
+
}
|
|
41
|
+
// 路径匹配
|
|
42
|
+
if (relPath.toLowerCase().includes(q)) {
|
|
43
|
+
score += 5;
|
|
44
|
+
}
|
|
45
|
+
// 标签匹配
|
|
46
|
+
if (entry.tags.some(t => t.toLowerCase().includes(q))) {
|
|
47
|
+
score += 3;
|
|
48
|
+
}
|
|
49
|
+
// 内容匹配(优先内存缓存,miss 时回退磁盘)
|
|
50
|
+
let content = getContent(relPath);
|
|
51
|
+
if (!content) {
|
|
52
|
+
try {
|
|
53
|
+
content = readFileSync(resolvePath(entry.sourceDir, relPath), "utf-8");
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
content = undefined;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
if (content) {
|
|
60
|
+
const lower = content.toLowerCase();
|
|
61
|
+
let count = 0, p = lower.indexOf(q);
|
|
62
|
+
while (p >= 0 && count < 5) {
|
|
63
|
+
count++;
|
|
64
|
+
if (count === 1)
|
|
65
|
+
score += 1;
|
|
66
|
+
// 取该匹配位置的行上下文
|
|
67
|
+
const ctx = lineContext(content, query);
|
|
68
|
+
if (ctx && !parts.some(pp => pp.includes(ctx.slice(0, 30)))) {
|
|
69
|
+
parts.push(ctx);
|
|
70
|
+
}
|
|
71
|
+
p = lower.indexOf(q, p + 1);
|
|
72
|
+
}
|
|
73
|
+
// 多次出现加分
|
|
74
|
+
score += Math.min(count - 1, 9);
|
|
75
|
+
}
|
|
76
|
+
if (score > 0) {
|
|
77
|
+
hits.push({
|
|
78
|
+
relPath: entry.relPath,
|
|
79
|
+
sourceDir: entry.sourceDir,
|
|
80
|
+
title: entry.title,
|
|
81
|
+
tags: entry.tags,
|
|
82
|
+
snippet: parts.join("\n"),
|
|
83
|
+
score,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return hits.sort((a, b) => b.score - a.score);
|
|
88
|
+
}
|
|
89
|
+
/** 向后兼容别名 */
|
|
90
|
+
export const search = keywordSearch; // alias
|
|
91
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA,2BAA2B;AAC3B,0BAA0B;AAE1B,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AACtC,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,IAAI,WAAW,EAAE,MAAM,WAAW,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAGhD,6BAA6B;AAC7B,SAAS,WAAW,CAAC,OAAe,EAAE,KAAa,EAAE,MAAM,GAAG,GAAG;IAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC9B,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC7B,IAAI,GAAG,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvB,aAAa;IACb,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,YAAY;IACvD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,MAAM,IAAI,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1D,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACvC,MAAM,IAAI,GAAG,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAEjE,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,GAAG,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAClE,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IACpD,IAAI,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,GAAG,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;IAElE,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,6BAA6B;AAC7B,MAAM,UAAU,aAAa,CAAC,KAAa;IACzC,MAAM,GAAG,GAAG,QAAQ,EAAE,CAAC;IACvB,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAC9B,MAAM,IAAI,GAAgB,EAAE,CAAC;IAE7B,KAAK,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACnD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,OAAO;QACP,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAC1C,KAAK,IAAI,EAAE,CAAC;QACd,CAAC;QAED,OAAO;QACP,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,CAAC;QACb,CAAC;QAED,OAAO;QACP,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YACtD,KAAK,IAAI,CAAC,CAAC;QACb,CAAC;QAED,0BAA0B;QAC1B,IAAI,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,IAAI,CAAC;gBACH,OAAO,GAAG,YAAY,CAAC,WAAW,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,EAAE,OAAO,CAAC,CAAC;YACzE,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,GAAG,SAAS,CAAC;YACtB,CAAC;QACH,CAAC;QACD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;YAEpC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,IAAI,KAAK,KAAK,CAAC;oBAAE,KAAK,IAAI,CAAC,CAAC;gBAE5B,cAAc;gBACd,MAAM,GAAG,GAAG,WAAW,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;gBACxC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC5D,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBAClB,CAAC;gBACD,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC9B,CAAC;YAED,SAAS;YACT,KAAK,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;QAED,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,IAAI,CAAC,IAAI,CAAC;gBACR,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,SAAS,EAAE,KAAK,CAAC,SAAS;gBAC1B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;gBACzB,KAAK;aACN,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAChD,CAAC;AAED,aAAa;AACb,MAAM,CAAC,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,QAAQ"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type { RawChunk, CompiledChunk, FileSegment } from "./types.js";
|
|
2
|
+
import type { PreprocessedChunk } from "./preprocessor.js";
|
|
3
|
+
/** 推荐每批处理的 chunk 数(平衡上下文大小和效率) */
|
|
4
|
+
export declare const BATCH_SIZE = 25;
|
|
5
|
+
export declare const COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1\u4E0D\u662F\u603B\u7ED3\u5185\u5BB9\u3002\n\n\u4F60\u7684\u4EFB\u52A1\u662F\uFF1A\n\u5C06\u4EBA\u7C7B\u968F\u624B\u8BB0\u5F55\u7684\u975E\u7ED3\u6784\u5316\u7B14\u8BB0\uFF0C\n\u8F6C\u6362\u4E3A\u9002\u5408\u673A\u5668\u8BED\u4E49\u7D22\u5F15\u3001\u6982\u5FF5\u68C0\u7D22\u3001\n\u77E5\u8BC6\u805A\u7C7B\u3001\u957F\u671F\u6F14\u5316\u7684\"\u8BA4\u77E5\u77E5\u8BC6\u5355\u5143\"\u3002\n\n\u6838\u5FC3\u539F\u5219\uFF1A\n1. \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\n2. \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n3. \u8865\u5168\u9690\u5F0F\u8868\u8FBE \u2014 \u8865\u5145\u7701\u7565\u7684\u4E3B\u8BED\u3001\u5C55\u5F00\u7F29\u5199\n4. \u7EDF\u4E00\u672F\u8BED \u2014 \u5C06\u540C\u4E49\u8868\u8FBE\u5F52\u4E00\uFF08\u5982 \"\u72B6\u6001\u6C61\u67D3\" \u2194 \"stale closure\"\uFF09\n5. \u63D0\u53D6\u6838\u5FC3\u6982\u5FF5 \u2014 \u8BC6\u522B\u6280\u672F\u5173\u952E\u8BCD\n6. \u4FDD\u6301\u5355\u4E3B\u9898 \u2014 \u4E00\u4E2A chunk \u53EA\u63CF\u8FF0\u4E00\u4E2A\u8BA4\u77E5\u4E3B\u9898\n7. \u8F93\u51FA\u7ED3\u6784\u5316 JSON \u2014 \u4E25\u683C\u9075\u5FAA schema\n\n\u7981\u6B62\uFF1A\n1. \u8FC7\u5EA6\u603B\u7ED3\n2. \u5220\u9664\u539F\u6587\n3. \u6539\u5199\u903B\u8F91\n4. \u4E3B\u89C2\u63A8\u65AD\n5. \u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\n\n\u4F60\u7684\u89D2\u8272\u662F\uFF1A\"\u8BED\u4E49\u6807\u51C6\u5316\u5668\"\uFF0C\u4E0D\u662F\"\u5185\u5BB9\u4F5C\u8005\"\u3002";
|
|
6
|
+
/** 为一批 chunk 构建编译 prompt */
|
|
7
|
+
export declare function buildCompilePrompt(chunks: RawChunk[]): string;
|
|
8
|
+
/**
|
|
9
|
+
* 从编译后的 ChunkInfo 构建最优 embedding 输入
|
|
10
|
+
*
|
|
11
|
+
* 蓝图推荐格式:
|
|
12
|
+
* [TOPIC] + [CONCEPTS] + [ALIASES] + [KEYWORDS] + [NORMALIZED] + [RAW]
|
|
13
|
+
*
|
|
14
|
+
* 原因: 增强隐式语义,让向量模型在检索时更稳定地匹配
|
|
15
|
+
*/
|
|
16
|
+
export declare function buildEmbeddingText(topic: string, normalizedText: string, concepts: string[], aliases: string[], keywords: string[], contentClass: string, temporalAnchor: string | undefined, rawText: string): string;
|
|
17
|
+
/** 尝试从 LLM 响应中提取 CompiledChunk 数组 */
|
|
18
|
+
export declare function parseCompiledResult(text: string): CompiledChunk[] | null;
|
|
19
|
+
/** 文件级编译 System Prompt(只要求 LLM 做 4 件事) */
|
|
20
|
+
export declare const FILE_COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1: \u5C06\u6574\u7BC7\u7B14\u8BB0\u8F6C\u6362\u4E3A\u7ED3\u6784\u5316\u7684\u8BED\u4E49\u77E5\u8BC6\u5355\u5143\u3002\n\n\u4F60\u9700\u8981\u505A\u7684 4 \u4EF6\u4E8B:\n1. \u81EA\u884C\u5224\u65AD\u8BED\u4E49\u8FB9\u754C \u2014 \u5C06\u6587\u4EF6\u5206\u6210\u82E5\u5E72\u8FDE\u7EED\u7684\u8BED\u4E49\u7247\u6BB5\uFF08segments\uFF09\n2. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u5199\u51FA topic\uFF08\u6838\u5FC3\u4E3B\u9898\uFF0C\u4E00\u53E5\u8BDD\uFF09\n3. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u5199\u51FA normalizedText\uFF08\u89C4\u8303\u5316\u6587\u672C\uFF1A\u8865\u5168\u7701\u7565\u3001\u7EDF\u4E00\u672F\u8BED\u3001\u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF09\n4. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u63D0\u53D6 concepts\uFF08\u6280\u672F\u6982\u5FF5\uFF09\u548C aliases\uFF08\u540C\u4E49\u8868\u8FBE\uFF0C\u683C\u5F0F \"\u4E2D\u6587 \u2194 English\"\uFF09\n\n\u6838\u5FC3\u539F\u5219:\n- \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\uFF08API \u540D\u3001\u53C2\u6570\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u7F29\u5199\uFF09\n- \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n- \u8BED\u4E49\u8FB9\u754C = \u540C\u4E00\u8BA4\u77E5\u4E3B\u9898\u7684\u81EA\u7136\u6BB5\u6216\u8FDE\u7EED\u6BB5\u843D\n- \u5982\u679C\u6574\u4E2A\u6587\u4EF6\u662F\u5355\u4E00\u4E3B\u9898\uFF0C\u53EA\u8F93\u51FA 1 \u4E2A segment\n\n\u7981\u6B62: \u8FC7\u5EA6\u603B\u7ED3\u3001\u5220\u9664\u539F\u6587\u3001\u6539\u5199\u903B\u8F91\u3001\u4E3B\u89C2\u63A8\u65AD\u3001\u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\u3002";
|
|
21
|
+
/**
|
|
22
|
+
* 为文件级编译构建 prompt
|
|
23
|
+
* @param relPath 文件路径
|
|
24
|
+
* @param fullText 文件全文
|
|
25
|
+
* @param preprocessed 预处理器输出(仅展示给 LLM 参考)
|
|
26
|
+
*/
|
|
27
|
+
export declare function buildFileCompilePrompt(relPath: string, fullText: string, preprocessed: PreprocessedChunk[]): string;
|
|
28
|
+
/** 从 LLM 响应中提取 FileSegment 数组 */
|
|
29
|
+
export declare function parseFileSegments(text: string): FileSegment[] | null;
|
|
30
|
+
/** v5.4 文件级 System Prompt(极简版) */
|
|
31
|
+
export declare const FILE_LLM_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1: \u5C06\u6574\u7BC7\u7B14\u8BB0\u8F6C\u6362\u4E3A\u7ED3\u6784\u5316\u7684\u8BED\u4E49\u5143\u6570\u636E\uFF0C\u7528\u4E8E\u589E\u5F3A\u8BED\u4E49\u641C\u7D22\u3002\n\n\u4F60\u9700\u8981\u8F93\u51FA\u7684 4 \u4E2A\u5B57\u6BB5:\n1. topic \u2014 \u6838\u5FC3\u4E3B\u9898\uFF08\u4E00\u53E5\u8BDD\u6982\u62EC\u5168\u6587\uFF09\n2. normalizedText \u2014 \u89C4\u8303\u5316\u6587\u672C\uFF08\u8865\u5168\u7701\u7565\u4E3B\u8BED\u3001\u7EDF\u4E00\u672F\u8BED\u3001\u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF09\n3. concepts \u2014 \u6280\u672F\u6982\u5FF5\u5217\u8868\uFF083-8 \u4E2A\u6838\u5FC3\u6982\u5FF5\uFF09\n4. aliases \u2014 \u540C\u4E49\u8868\u8FBE\uFF08\u683C\u5F0F \"\u4E2D\u6587 \u2194 English\"\uFF0C2-5 \u7EC4\uFF09\n\n\u6838\u5FC3\u539F\u5219:\n- \u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF08API \u540D\u3001\u53C2\u6570\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u7F29\u5199\u3001\u7248\u672C\u53F7\uFF09\n- \u4E0D\u6539\u53D8\u539F\u610F\uFF0C\u53EA\u89C4\u8303\u5316\u8868\u8FBE\n- concepts \u63D0\u53D6\u6280\u672F\u5173\u952E\u8BCD\uFF0C\u4E0D\u662F\u6458\u8981\n- aliases \u8986\u76D6\u4E2D\u82F1\u5BF9\u7167\u548C\u7F29\u5199\u5C55\u5F00\n\n\u7981\u6B62: \u8FC7\u5EA6\u603B\u7ED3\u3001\u5220\u9664\u539F\u6587\u3001\u6539\u5199\u903B\u8F91\u3001\u4E3B\u89C2\u63A8\u65AD\u3001\u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\u3002";
|
|
32
|
+
/**
|
|
33
|
+
* v5.4 构建简化文件级编译 prompt
|
|
34
|
+
*/
|
|
35
|
+
export declare function buildFileLLMPrompt(relPath: string, fullText: string): string;
|
|
36
|
+
/**
|
|
37
|
+
* v5.4 解析文件级 LLM 响应(单对象,非 segments 数组)
|
|
38
|
+
*/
|
|
39
|
+
export declare function parseFileLLMResult(text: string): import("./types.js").FileLLMData | null;
|
|
40
|
+
/**
|
|
41
|
+
* v5.4 构建文件级 LLM 向量的 embedding 文本
|
|
42
|
+
*/
|
|
43
|
+
export declare function buildFileLLMEmbeddingText(data: import("./types.js").FileLLMData, relPath?: string, maxEmbedLen?: number): string;
|
|
44
|
+
//# sourceMappingURL=semantic-compiler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-compiler.d.ts","sourceRoot":"","sources":["../src/semantic-compiler.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AACvE,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAM3D,kCAAkC;AAClC,eAAO,MAAM,UAAU,KAAK,CAAC;AAM7B,eAAO,MAAM,qBAAqB,siDAyBT,CAAC;AAM1B,4BAA4B;AAC5B,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,CAkG7D;AAMD;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,MAAM,EACb,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,MAAM,EAAE,EAClB,OAAO,EAAE,MAAM,EAAE,EACjB,QAAQ,EAAE,MAAM,EAAE,EAClB,YAAY,EAAE,MAAM,EACpB,cAAc,EAAE,MAAM,GAAG,SAAS,EAClC,OAAO,EAAE,MAAM,GACd,MAAM,CAWR;AAMD,qCAAqC;AACrC,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,GACX,aAAa,EAAE,GAAG,IAAI,CAiBxB;AAMD,0CAA0C;AAC1C,eAAO,MAAM,0BAA0B,koDAgBL,CAAC;AAEnC;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,iBAAiB,EAAE,GAChC,MAAM,CAwCR;AAED,iCAAiC;AACjC,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,EAAE,GAAG,IAAI,CAgBpE;AAMD,kCAAkC;AAClC,eAAO,MAAM,sBAAsB,u7CAgBD,CAAC;AAEnC;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,GACf,MAAM,CAmBR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,GACX,OAAO,YAAY,EAAE,WAAW,GAAG,IAAI,CAyBzC;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,IAAI,EAAE,OAAO,YAAY,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,MAAM,EAAE,WAAW,SAAO,GAAG,MAAM,CAY9H"}
|