@llangtop/pwiki-core 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -39
- package/dist/WikiEngine.d.ts.map +1 -1
- package/dist/WikiEngine.js +16 -1
- package/dist/WikiEngine.js.map +1 -1
- package/dist/config.d.ts +1 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +3 -0
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -1
- package/dist/index.js.map +1 -1
- package/dist/lib/bm25.d.ts +30 -0
- package/dist/lib/bm25.d.ts.map +1 -0
- package/dist/lib/bm25.js +155 -0
- package/dist/lib/bm25.js.map +1 -0
- package/dist/lib/indexer.d.ts +1 -0
- package/dist/lib/indexer.d.ts.map +1 -1
- package/dist/lib/indexer.js +1 -0
- package/dist/lib/indexer.js.map +1 -1
- package/dist/lib/search.d.ts +3 -0
- package/dist/lib/search.d.ts.map +1 -1
- package/dist/lib/search.js +128 -20
- package/dist/lib/search.js.map +1 -1
- package/dist/lib/semantic-compiler.js +165 -165
- package/dist/lib/store-index.d.ts +3 -0
- package/dist/lib/store-index.d.ts.map +1 -1
- package/dist/lib/store-index.js +14 -1
- package/dist/lib/store-index.js.map +1 -1
- package/dist/lib/store.d.ts +1 -1
- package/dist/lib/store.d.ts.map +1 -1
- package/dist/lib/store.js +1 -1
- package/dist/lib/store.js.map +1 -1
- package/dist/lib/tokenizer.d.ts +10 -0
- package/dist/lib/tokenizer.d.ts.map +1 -0
- package/dist/lib/tokenizer.js +87 -0
- package/dist/lib/tokenizer.js.map +1 -0
- package/package.json +34 -34
- package/dist/WikiEngine.d.ts +0 -73
- package/dist/lib/ast-chunker.d.ts +0 -23
- package/dist/lib/content-cache.d.ts +0 -13
- package/dist/lib/embedder.d.ts +0 -22
- package/dist/lib/file-manifest.d.ts +0 -36
- package/dist/lib/indexer-compile.d.ts +0 -18
- package/dist/lib/indexer-embed.d.ts +0 -21
- package/dist/lib/indexer-scan.d.ts +0 -4
- package/dist/lib/model-registry.d.ts +0 -18
- package/dist/lib/parser.d.ts +0 -9
- package/dist/lib/preprocessor.d.ts +0 -36
- package/dist/lib/semantic-compiler.d.ts +0 -44
- package/dist/lib/semantic-search.d.ts +0 -4
- package/dist/lib/store-config.d.ts +0 -25
- package/dist/lib/store-vectors.d.ts +0 -17
- package/dist/lib/types.d.ts +0 -108
package/dist/lib/store.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/lib/store.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAAE,SAAS,EAAE,YAAY,EACnC,kBAAkB,EAAE,kBAAkB,EACtC,WAAW,EAAE,YAAY,EACzB,YAAY,EAAE,WAAW,GAC1B,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,QAAQ,EAAE,UAAU,EAAE,WAAW,EAAE,eAAe,EAAE,QAAQ,EAC5D,qBAAqB,EAAE,UAAU,
|
|
1
|
+
{"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/lib/store.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,UAAU,EAAE,SAAS,EAAE,YAAY,EACnC,kBAAkB,EAAE,kBAAkB,EACtC,WAAW,EAAE,YAAY,EACzB,YAAY,EAAE,WAAW,GAC1B,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,QAAQ,EAAE,UAAU,EAAE,WAAW,EAAE,eAAe,EAAE,QAAQ,EAC5D,qBAAqB,EAAE,UAAU,EACjC,aAAa,EAAE,cAAc,GAC9B,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EACL,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,YAAY,EAC1B,eAAe,EAAE,iBAAiB,EAAE,eAAe,EACnD,YAAY,GACb,MAAM,oBAAoB,CAAC;AAO5B,wBAAgB,KAAK;;;;;;EAWpB"}
|
package/dist/lib/store.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// store.ts �?数据�?barrel
|
|
2
2
|
// 聚合 store-config + store-index + store-vectors
|
|
3
3
|
export { getSources, addSource, removeSource, getSemanticEnabled, setSemanticEnabled, readModelId, writeModelId, getWikiModel, configStats, } from "./store-config.js";
|
|
4
|
-
export { getIndex, mergeIndex, removeEntry, updateEntryPath, getEntry, removeEntriesBySource, indexStats, } from "./store-index.js";
|
|
4
|
+
export { getIndex, mergeIndex, removeEntry, updateEntryPath, getEntry, removeEntriesBySource, indexStats, readBm25Stats, writeBm25Stats, } from "./store-index.js";
|
|
5
5
|
export { getEmbeddings, setEmbeddings, getChunkInfo, setChunkInfo, removeEmbedding, getEmbeddingModel, getEmbeddingDim, vectorsStats, } from "./store-vectors.js";
|
|
6
6
|
import { configStats } from "./store-config.js";
|
|
7
7
|
import { indexStats } from "./store-index.js";
|
package/dist/lib/store.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"store.js","sourceRoot":"","sources":["../../src/lib/store.ts"],"names":[],"mappings":"AAAA,wBAAwB;AACxB,gDAAgD;AAEhD,OAAO,EACL,UAAU,EAAE,SAAS,EAAE,YAAY,EACnC,kBAAkB,EAAE,kBAAkB,EACtC,WAAW,EAAE,YAAY,EACzB,YAAY,EAAE,WAAW,GAC1B,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,QAAQ,EAAE,UAAU,EAAE,WAAW,EAAE,eAAe,EAAE,QAAQ,EAC5D,qBAAqB,EAAE,UAAU,
|
|
1
|
+
{"version":3,"file":"store.js","sourceRoot":"","sources":["../../src/lib/store.ts"],"names":[],"mappings":"AAAA,wBAAwB;AACxB,gDAAgD;AAEhD,OAAO,EACL,UAAU,EAAE,SAAS,EAAE,YAAY,EACnC,kBAAkB,EAAE,kBAAkB,EACtC,WAAW,EAAE,YAAY,EACzB,YAAY,EAAE,WAAW,GAC1B,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,QAAQ,EAAE,UAAU,EAAE,WAAW,EAAE,eAAe,EAAE,QAAQ,EAC5D,qBAAqB,EAAE,UAAU,EACjC,aAAa,EAAE,cAAc,GAC9B,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EACL,aAAa,EAAE,aAAa,EAC5B,YAAY,EAAE,YAAY,EAC1B,eAAe,EAAE,iBAAiB,EAAE,eAAe,EACnD,YAAY,GACb,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AAEvD,MAAM,UAAU,KAAK;IACnB,MAAM,CAAC,GAAG,WAAW,EAAE,CAAC;IACxB,MAAM,CAAC,GAAG,UAAU,EAAE,CAAC;IACvB,MAAM,CAAC,GAAG,YAAY,EAAE,CAAC;IACzB,OAAO;QACL,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,eAAe,EAAE,kBAAkB,EAAE;QACrC,UAAU,EAAE,CAAC,CAAC,UAAU;KACzB,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* 对一段文本进行分词
|
|
3
|
+
* @returns 小写化的 token 列表,不包含单字符 ASCII
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* tokenize("变压器故障排查,check transformer oil 温度")
|
|
7
|
+
* // → ["变压","压器","器故","故障","障排","排查","check","transformer","oil","温度"]
|
|
8
|
+
*/
|
|
9
|
+
export declare function tokenize(text: string): string[];
|
|
10
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AA6BA;;;;;;;GAOG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAqD/C"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// tokenizer.ts — 中文 2-gram + ASCII 切词分词器
|
|
2
|
+
//
|
|
3
|
+
// 零外部依赖。状态机按字符 Unicode 属性分类:
|
|
4
|
+
// CJK 字符 → 收集连续序列 → 输出二元组 (2-gram)
|
|
5
|
+
// ASCII 字母/数字 → 按非字母数字切分,保留 ≥2 字符的词
|
|
6
|
+
// 其他(标点、空白)→ 丢弃,作分隔符
|
|
7
|
+
//
|
|
8
|
+
// 查询和文档使用同一个 tokenize(),确保 BM25 token 对齐。
|
|
9
|
+
/**
|
|
10
|
+
* 检查字符是否属于 CJK 统一表意文字范围
|
|
11
|
+
*/
|
|
12
|
+
function isCJK(cp) {
|
|
13
|
+
return (cp >= 0x4E00 && cp <= 0x9FFF) // CJK Unified Ideographs
|
|
14
|
+
|| (cp >= 0x3400 && cp <= 0x4DBF) // CJK Extension A
|
|
15
|
+
|| (cp >= 0x20000 && cp <= 0x2A6DF) // CJK Extension B
|
|
16
|
+
|| (cp >= 0xF900 && cp <= 0xFAFF) // CJK Compatibility Ideographs
|
|
17
|
+
|| (cp >= 0x2F800 && cp <= 0x2FA1F); // CJK Compatibility Supplement
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* 检查字符是否为 ASCII 字母或数字(参与分词)
|
|
21
|
+
*/
|
|
22
|
+
function isASCII(cp) {
|
|
23
|
+
return (cp >= 0x61 && cp <= 0x7A) // a-z
|
|
24
|
+
|| (cp >= 0x41 && cp <= 0x5A) // A-Z
|
|
25
|
+
|| (cp >= 0x30 && cp <= 0x39); // 0-9
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* 对一段文本进行分词
|
|
29
|
+
* @returns 小写化的 token 列表,不包含单字符 ASCII
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* tokenize("变压器故障排查,check transformer oil 温度")
|
|
33
|
+
* // → ["变压","压器","器故","故障","障排","排查","check","transformer","oil","温度"]
|
|
34
|
+
*/
|
|
35
|
+
export function tokenize(text) {
|
|
36
|
+
const tokens = [];
|
|
37
|
+
const cjkBuf = []; // 连续 CJK 字符缓冲区
|
|
38
|
+
const asciiBuf = []; // 连续 ASCII 字符缓冲区
|
|
39
|
+
/**
|
|
40
|
+
* 将 CJK 缓冲区输出为 2-gram tokens
|
|
41
|
+
*/
|
|
42
|
+
function flushCJK() {
|
|
43
|
+
if (cjkBuf.length < 2) {
|
|
44
|
+
// 单字符 CJK:保留
|
|
45
|
+
if (cjkBuf.length === 1)
|
|
46
|
+
tokens.push(cjkBuf[0]);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
// 滑动窗口生成所有相邻二元组
|
|
50
|
+
for (let i = 0; i < cjkBuf.length - 1; i++) {
|
|
51
|
+
tokens.push(cjkBuf[i] + cjkBuf[i + 1]);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
cjkBuf.length = 0;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* 将 ASCII 缓冲区输出为单词(≥2 字符)
|
|
58
|
+
*/
|
|
59
|
+
function flushASCII() {
|
|
60
|
+
if (asciiBuf.length >= 2) {
|
|
61
|
+
tokens.push(asciiBuf.join("").toLowerCase());
|
|
62
|
+
}
|
|
63
|
+
asciiBuf.length = 0;
|
|
64
|
+
}
|
|
65
|
+
for (let i = 0; i < text.length; i++) {
|
|
66
|
+
const ch = text[i];
|
|
67
|
+
const cp = ch.codePointAt(0);
|
|
68
|
+
if (isCJK(cp)) {
|
|
69
|
+
flushASCII(); // 脚本切换:先输出之前的 ASCII 词
|
|
70
|
+
cjkBuf.push(ch);
|
|
71
|
+
}
|
|
72
|
+
else if (isASCII(cp)) {
|
|
73
|
+
flushCJK(); // 脚本切换:先输出之前的 CJK 词
|
|
74
|
+
asciiBuf.push(ch);
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
// 标点/空白/Emoji → 分隔符,同时清空两个缓冲区
|
|
78
|
+
flushCJK();
|
|
79
|
+
flushASCII();
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// 文本结束,输出残留
|
|
83
|
+
flushCJK();
|
|
84
|
+
flushASCII();
|
|
85
|
+
return tokens;
|
|
86
|
+
}
|
|
87
|
+
//# sourceMappingURL=tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/lib/tokenizer.ts"],"names":[],"mappings":"AAAA,yCAAyC;AACzC,EAAE;AACF,6BAA6B;AAC7B,qCAAqC;AACrC,sCAAsC;AACtC,uBAAuB;AACvB,EAAE;AACF,0CAA0C;AAE1C;;GAEG;AACH,SAAS,KAAK,CAAC,EAAU;IACvB,OAAO,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,CAAG,yBAAyB;WAC1D,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,CAAG,kBAAkB;WACnD,CAAC,EAAE,IAAI,OAAO,IAAI,EAAE,IAAI,OAAO,CAAC,CAAC,kBAAkB;WACnD,CAAC,EAAE,IAAI,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,CAAG,+BAA+B;WAChE,CAAC,EAAE,IAAI,OAAO,IAAI,EAAE,IAAI,OAAO,CAAC,CAAC,CAAA,+BAA+B;AACzE,CAAC;AAED;;GAEG;AACH,SAAS,OAAO,CAAC,EAAU;IACzB,OAAO,CAAC,EAAE,IAAI,IAAI,IAAI,EAAE,IAAI,IAAI,CAAC,CAAE,MAAM;WAClC,CAAC,EAAE,IAAI,IAAI,IAAI,EAAE,IAAI,IAAI,CAAC,CAAE,MAAM;WAClC,CAAC,EAAE,IAAI,IAAI,IAAI,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM;AAC3C,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,MAAM,GAAa,EAAE,CAAC,CAAG,eAAe;IAC9C,MAAM,QAAQ,GAAa,EAAE,CAAC,CAAC,iBAAiB;IAEhD;;OAEG;IACH,SAAS,QAAQ;QACf,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,aAAa;YACb,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACN,gBAAgB;YAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACzC,CAAC;QACH,CAAC;QACD,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;IACpB,CAAC;IAED;;OAEG;IACH,SAAS,UAAU;QACjB,IAAI,QAAQ,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;QAC/C,CAAC;QACD,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;IACtB,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,MAAM,EAAE,GAAG,EAAE,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC;QAE9B,IAAI,KAAK,CAAC,EAAE,CAAC,EAAE,CAAC;YACd,UAAU,EAAE,CAAC,CAAS,sBAAsB;YAC5C,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAClB,CAAC;aAAM,IAAI,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC;YACvB,QAAQ,EAAE,CAAC,CAAW,oBAAoB;YAC1C,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpB,CAAC;aAAM,CAAC;YACN,8BAA8B;YAC9B,QAAQ,EAAE,CAAC;YACX,UAAU,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IAED,YAAY;IACZ,QAAQ,EAAE,CAAC;IACX,UAAU,EAAE,CAAC;IAEb,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,34 +1,34 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@llangtop/pwiki-core",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Wiki knowledge base engine — keyword/semantic/hybrid search with local ONNX embeddings",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "./dist/index.js",
|
|
7
|
-
"types": "./dist/index.d.ts",
|
|
8
|
-
"exports": {
|
|
9
|
-
".": {
|
|
10
|
-
"types": "./dist/index.d.ts",
|
|
11
|
-
"import": "./dist/index.js"
|
|
12
|
-
}
|
|
13
|
-
},
|
|
14
|
-
"files": ["dist", "README.md"],
|
|
15
|
-
"scripts": {
|
|
16
|
-
"build": "tsc",
|
|
17
|
-
"start": "node dist/index.js"
|
|
18
|
-
},
|
|
19
|
-
"dependencies": {
|
|
20
|
-
"@huggingface/transformers": "^3.0.0",
|
|
21
|
-
"remark-parse": "^11.0.0",
|
|
22
|
-
"unified": "^11.0.0",
|
|
23
|
-
"unist-util-visit": "^5.0.0"
|
|
24
|
-
},
|
|
25
|
-
"devDependencies": {
|
|
26
|
-
"@types/mdast": "^4.0.0",
|
|
27
|
-
"@types/node": "^22.0.0",
|
|
28
|
-
"typescript": "^5.5.0"
|
|
29
|
-
},
|
|
30
|
-
"engines": {
|
|
31
|
-
"node": ">=18"
|
|
32
|
-
},
|
|
33
|
-
"license": "MIT"
|
|
34
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "@llangtop/pwiki-core",
|
|
3
|
+
"version": "1.2.0",
|
|
4
|
+
"description": "Wiki knowledge base engine — keyword/semantic/hybrid search with local ONNX embeddings",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"files": ["dist", "README.md"],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "tsc",
|
|
17
|
+
"start": "node dist/index.js"
|
|
18
|
+
},
|
|
19
|
+
"dependencies": {
|
|
20
|
+
"@huggingface/transformers": "^3.0.0",
|
|
21
|
+
"remark-parse": "^11.0.0",
|
|
22
|
+
"unified": "^11.0.0",
|
|
23
|
+
"unist-util-visit": "^5.0.0"
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@types/mdast": "^4.0.0",
|
|
27
|
+
"@types/node": "^22.0.0",
|
|
28
|
+
"typescript": "^5.5.0"
|
|
29
|
+
},
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=18"
|
|
32
|
+
},
|
|
33
|
+
"license": "MIT"
|
|
34
|
+
}
|
package/dist/WikiEngine.d.ts
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
import type { ModelInfo } from "./lib/model-registry.js";
|
|
2
|
-
import type { SearchMode, SearchHit, FileEntry, FileLLMData, WikiStatus } from "./lib/types.js";
|
|
3
|
-
export interface EngineConfig {
|
|
4
|
-
basePath?: string;
|
|
5
|
-
modelId?: string;
|
|
6
|
-
}
|
|
7
|
-
export declare class WikiEngine {
|
|
8
|
-
constructor(config?: EngineConfig);
|
|
9
|
-
get sources(): string[];
|
|
10
|
-
addSource(absPath: string): boolean;
|
|
11
|
-
removeSource(target: string): string | null;
|
|
12
|
-
loadSource(absPath: string): Promise<number>;
|
|
13
|
-
load(): Promise<{
|
|
14
|
-
files: number;
|
|
15
|
-
sources: number;
|
|
16
|
-
}>;
|
|
17
|
-
search(query: string, mode?: SearchMode): Promise<SearchHit[]>;
|
|
18
|
-
readEntry(pathOrRelPath: string): {
|
|
19
|
-
entry: FileEntry;
|
|
20
|
-
content: string;
|
|
21
|
-
} | null;
|
|
22
|
-
createEntry(sourceDir: string, relPath: string, title?: string, tags?: string[], content?: string): string;
|
|
23
|
-
renameEntry(relPath: string, newTitle: string): boolean;
|
|
24
|
-
moveEntry(relPath: string, newRelPath: string): boolean;
|
|
25
|
-
modifyEntry(sourceDir: string, relPath: string, content: string): boolean;
|
|
26
|
-
get semanticEnabled(): boolean;
|
|
27
|
-
enableSemantic(modelId?: string): Promise<{
|
|
28
|
-
ok: boolean;
|
|
29
|
-
msg: string;
|
|
30
|
-
}>;
|
|
31
|
-
disableSemantic(): void;
|
|
32
|
-
generateEmbeddings(sourceDir?: string): Promise<{
|
|
33
|
-
embedded: number;
|
|
34
|
-
}>;
|
|
35
|
-
downloadModel(modelId?: string): Promise<{
|
|
36
|
-
ok: boolean;
|
|
37
|
-
msg: string;
|
|
38
|
-
}>;
|
|
39
|
-
listModels(): ModelInfo[];
|
|
40
|
-
get llmInfo(): {
|
|
41
|
-
apiBase: string;
|
|
42
|
-
model: string;
|
|
43
|
-
hasKey: boolean;
|
|
44
|
-
};
|
|
45
|
-
compileStatus(sourceDir?: string): {
|
|
46
|
-
total: number;
|
|
47
|
-
compiled: number;
|
|
48
|
-
uncompiled: string[];
|
|
49
|
-
};
|
|
50
|
-
getCompilePrompt(relPath: string): {
|
|
51
|
-
system: string;
|
|
52
|
-
user: string;
|
|
53
|
-
} | null;
|
|
54
|
-
storeCompiled(relPath: string, data: FileLLMData): boolean;
|
|
55
|
-
compileFile(relPath: string, opts?: {
|
|
56
|
-
model?: string;
|
|
57
|
-
force?: boolean;
|
|
58
|
-
}): Promise<{
|
|
59
|
-
ok: boolean;
|
|
60
|
-
msg: string;
|
|
61
|
-
}>;
|
|
62
|
-
compileAll(sourceDir?: string, limit?: number, opts?: {
|
|
63
|
-
model?: string;
|
|
64
|
-
force?: boolean;
|
|
65
|
-
}): Promise<{
|
|
66
|
-
compiled: number;
|
|
67
|
-
skipped: number;
|
|
68
|
-
failed: number;
|
|
69
|
-
msgs: string[];
|
|
70
|
-
}>;
|
|
71
|
-
status(): WikiStatus;
|
|
72
|
-
}
|
|
73
|
-
//# sourceMappingURL=WikiEngine.d.ts.map
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
/** AST 分块结果(兼容旧 extractChunks 接口 + 新字段) */
|
|
2
|
-
export interface ChunkResult {
|
|
3
|
-
key: string;
|
|
4
|
-
heading: string;
|
|
5
|
-
level: number;
|
|
6
|
-
embedText: string;
|
|
7
|
-
rawText: string;
|
|
8
|
-
headingPath: string[];
|
|
9
|
-
chunkTypeHint: string;
|
|
10
|
-
wikilinks: string[];
|
|
11
|
-
startLine: number;
|
|
12
|
-
endLine: number;
|
|
13
|
-
}
|
|
14
|
-
/**
|
|
15
|
-
* 使用 unified + remark-parse 解析 markdown,按标题切分为语义块。
|
|
16
|
-
*
|
|
17
|
-
* @param raw 原始 markdown 文本
|
|
18
|
-
* @param relPath 文件相对路径(用于 key 生成)
|
|
19
|
-
* @param defaultTitle 无标题时的默认标题
|
|
20
|
-
* @returns 分块结果数组,AST 解析失败时返回空数组(调用方应降级 regex)
|
|
21
|
-
*/
|
|
22
|
-
export declare function extractChunksAST(raw: string, relPath: string, defaultTitle: string, maxEmbedLen?: number): Promise<ChunkResult[]>;
|
|
23
|
-
//# sourceMappingURL=ast-chunker.d.ts.map
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/** 存入缓存 */
|
|
2
|
-
export declare function setContent(relPath: string, content: string): void;
|
|
3
|
-
/** 读取缓存 */
|
|
4
|
-
export declare function getContent(relPath: string): string | undefined;
|
|
5
|
-
/** 检查是否已缓存 */
|
|
6
|
-
export declare function hasContent(relPath: string): boolean;
|
|
7
|
-
/** 按 sourceDir 清除缓存条目 */
|
|
8
|
-
export declare function clearSource(sourceDir: string): void;
|
|
9
|
-
/** 清除所有缓存 */
|
|
10
|
-
export declare function clearAll(): void;
|
|
11
|
-
/** 缓存大小 */
|
|
12
|
-
export declare function cacheSize(): number;
|
|
13
|
-
//# sourceMappingURL=content-cache.d.ts.map
|
package/dist/lib/embedder.d.ts
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
export interface LocalModelInfo {
|
|
2
|
-
path: string;
|
|
3
|
-
variant: "fp32" | "int8" | "none";
|
|
4
|
-
onnxSize: number;
|
|
5
|
-
otherSize: number;
|
|
6
|
-
}
|
|
7
|
-
export declare function getLocalModelInfo(): LocalModelInfo | null;
|
|
8
|
-
export declare function initialize(): Promise<boolean>;
|
|
9
|
-
export declare function isAvailable(): boolean;
|
|
10
|
-
export declare function downloadModel(modelId?: string): {
|
|
11
|
-
ok: boolean;
|
|
12
|
-
msg: string;
|
|
13
|
-
};
|
|
14
|
-
export declare function getInitError(): string | null;
|
|
15
|
-
export declare function getModelName(): string;
|
|
16
|
-
export declare function getModelRepo(): string;
|
|
17
|
-
export declare function getModelSource(): string;
|
|
18
|
-
export declare function getLoadedVariant(): string;
|
|
19
|
-
export declare function embed(text: string): Promise<number[]>;
|
|
20
|
-
export declare function embedBatch(texts: string[]): Promise<number[][]>;
|
|
21
|
-
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
22
|
-
//# sourceMappingURL=embedder.d.ts.map
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
export interface FileManifestEntry {
|
|
2
|
-
md5: string;
|
|
3
|
-
fileSize: number;
|
|
4
|
-
astChunkCount: number;
|
|
5
|
-
astIndexedAt: string;
|
|
6
|
-
llmCompiled: boolean;
|
|
7
|
-
llmCompiledAt?: string;
|
|
8
|
-
compilingSince?: string;
|
|
9
|
-
hasSemanticVectors: boolean;
|
|
10
|
-
contentClass?: string;
|
|
11
|
-
deleted?: boolean;
|
|
12
|
-
}
|
|
13
|
-
export interface FileManifest {
|
|
14
|
-
version: 1;
|
|
15
|
-
files: Record<string, FileManifestEntry>;
|
|
16
|
-
}
|
|
17
|
-
export declare function getManifestStats(): {
|
|
18
|
-
total: number;
|
|
19
|
-
compiled: number;
|
|
20
|
-
withVectors: number;
|
|
21
|
-
};
|
|
22
|
-
export declare function getManifest(): FileManifest;
|
|
23
|
-
export declare function getFileState(relPath: string): FileManifestEntry | undefined;
|
|
24
|
-
export declare function updateFileState(relPath: string, patch: Partial<FileManifestEntry>): void;
|
|
25
|
-
export declare function removeFileState(relPath: string): void;
|
|
26
|
-
export declare function computeMD5(content: string): string;
|
|
27
|
-
export declare function isFileChanged(relPath: string, currentMD5: string): boolean;
|
|
28
|
-
export declare function isCompilationStale(relPath: string, currentMD5: string): boolean;
|
|
29
|
-
export declare function ensureCompiledDir(): void;
|
|
30
|
-
export declare function getCompiledFilePath(relPath: string): string;
|
|
31
|
-
export declare function manifestStats(): {
|
|
32
|
-
total: number;
|
|
33
|
-
compiled: number;
|
|
34
|
-
stale: number;
|
|
35
|
-
};
|
|
36
|
-
//# sourceMappingURL=file-manifest.d.ts.map
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import type { CompiledChunk, RawChunk, FileSegment, FileLLMData } from "./types.js";
|
|
2
|
-
import type { PreprocessedChunk } from "./preprocessor.js";
|
|
3
|
-
/**
|
|
4
|
-
* 鑾峰彇鎵€鏈夊凡绱㈠紩鏂囦欢鐨勫師濮嬪潡锛堜緵 AI 缂栬瘧锛? */
|
|
5
|
-
export declare function getRawChunks(sourceDir?: string, uncompiledOnly?: boolean): Promise<RawChunk[]>;
|
|
6
|
-
/**
|
|
7
|
-
* 瀛樺偍鍧楃骇缂栬瘧缁撴灉骞堕噸寤?embedding (v5.1 閬楃暀)
|
|
8
|
-
*/
|
|
9
|
-
export declare function storeCompiledChunks(compiled: CompiledChunk[]): Promise<number>;
|
|
10
|
-
/**
|
|
11
|
-
* 瀛樺偍鏂囦欢绾х紪璇戠粨鏋滃苟閲嶅缓 embedding (v5.2 鈫?v5.4 杩囨浮)
|
|
12
|
-
* v5.4 TODO: 鏇挎崲涓?storeFileLLMVector 鈥?鍙寕 1 涓?###llm 鍚戦噺锛屼笉鍒?AST chunks
|
|
13
|
-
*/
|
|
14
|
-
export declare function storeFileSegments(relPath: string, segments: FileSegment[], preprocessed: PreprocessedChunk[]): Promise<number>;
|
|
15
|
-
/**
|
|
16
|
-
* 瀛樺偍鏂囦欢绾?LLM 缂栬瘧缁撴灉锛氭寕杞?1 涓?###llm 鍚戦噺锛屼笉鍒犻櫎 AST chunks銆? */
|
|
17
|
-
export declare function storeFileLLMVector(sourceDir: string, relPath: string, llmData: FileLLMData, llmModel?: string): Promise<boolean>;
|
|
18
|
-
//# sourceMappingURL=indexer-compile.d.ts.map
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import type { FileEntry } from "./types.js";
|
|
2
|
-
/**
|
|
3
|
-
* 鎸夋爣棰樺皢鏂囦欢鍒嗗壊涓哄涓潡銆? * v5.3: 浼樺厛 AST 瑙f瀽锛屽け璐ラ檷绾?regex銆? */
|
|
4
|
-
export declare function extractChunks(filePath: string, relPath: string, defaultTitle: string, maxEmbedLen?: number): Promise<{
|
|
5
|
-
key: string;
|
|
6
|
-
heading: string;
|
|
7
|
-
level: number;
|
|
8
|
-
embedText: string;
|
|
9
|
-
rawText: string;
|
|
10
|
-
}[]>;
|
|
11
|
-
/**
|
|
12
|
-
* 鎵归噺鐢熸垚 embedding 骞舵寔涔呭寲鍒?vectors.json
|
|
13
|
-
*/
|
|
14
|
-
export declare function generateEmbeddings(sourceDir: string, entries: FileEntry[]): Promise<number>;
|
|
15
|
-
/**
|
|
16
|
-
* 涓哄崟涓枃浠剁敓鎴?鏇存柊 embedding
|
|
17
|
-
*/
|
|
18
|
-
export declare function embedSingleFile(sourceDir: string, relPath: string, title: string): Promise<boolean>;
|
|
19
|
-
/** 计算全部向量的均值(噪声基底),供语义搜索降噪 */
|
|
20
|
-
export declare function recomputeCentroid(): void;
|
|
21
|
-
//# sourceMappingURL=indexer-embed.d.ts.map
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
export interface ModelInfo {
|
|
2
|
-
id: string;
|
|
3
|
-
name: string;
|
|
4
|
-
hfRepo: string;
|
|
5
|
-
dim: number;
|
|
6
|
-
description: string;
|
|
7
|
-
languages: string[];
|
|
8
|
-
maxTokens: number;
|
|
9
|
-
int8Size: number;
|
|
10
|
-
fp32Size: number;
|
|
11
|
-
}
|
|
12
|
-
export declare const BUILTIN_MODELS: ModelInfo[];
|
|
13
|
-
export declare function getBuiltinModels(): ModelInfo[];
|
|
14
|
-
export declare function findModel(id: string): ModelInfo | undefined;
|
|
15
|
-
export declare function getCurrentModel(): ModelInfo;
|
|
16
|
-
export declare function selectModel(id: string): ModelInfo | null;
|
|
17
|
-
export declare function getDefaultModelId(): string;
|
|
18
|
-
//# sourceMappingURL=model-registry.d.ts.map
|
package/dist/lib/parser.d.ts
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import type { FileEntry } from "./types.js";
|
|
2
|
-
/**
|
|
3
|
-
* 解析单个 .md 文件为 FileEntry
|
|
4
|
-
* @param root 数据源根目录
|
|
5
|
-
* @param filePath 文件绝对路径
|
|
6
|
-
* @param mtime 文件修改时间(可选;不传则通过 statSync 自动获取)
|
|
7
|
-
*/
|
|
8
|
-
export declare function parseFileEntry(root: string, filePath: string, mtime?: string): FileEntry | null;
|
|
9
|
-
//# sourceMappingURL=parser.d.ts.map
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
/** chunkType: markdown 特征推断 */
|
|
2
|
-
export declare function inferChunkType(text: string, heading: string): string;
|
|
3
|
-
/** contentClass: 文件路径推断 */
|
|
4
|
-
export declare function inferContentClass(relPath: string): string;
|
|
5
|
-
/** importance: 启发式打分 (0.1-1.0) */
|
|
6
|
-
export declare function inferImportance(text: string, heading: string): number;
|
|
7
|
-
/** temporalAnchor: 正则提取第一个 YYYY-MM-DD */
|
|
8
|
-
export declare function inferTemporalAnchor(text: string): string | undefined;
|
|
9
|
-
/** confidence: 基于文本长度的预设置信度 */
|
|
10
|
-
export declare function inferConfidence(text: string): number;
|
|
11
|
-
/** summary: 清洗后取前 30 字 */
|
|
12
|
-
export declare function inferSummary(text: string): string;
|
|
13
|
-
/** keywords: 英文标识符 + 中文高频词预提取 */
|
|
14
|
-
export declare function inferKeywords(text: string): string[];
|
|
15
|
-
/** 预处理器对文件全文的输出 */
|
|
16
|
-
export interface PreprocessedChunk {
|
|
17
|
-
/** 程序切分的块(按标题简单切,仅作兜底) */
|
|
18
|
-
heading: string;
|
|
19
|
-
level: number;
|
|
20
|
-
text: string;
|
|
21
|
-
/** 程序提取的元数据 */
|
|
22
|
-
chunkType: string;
|
|
23
|
-
contentClass: string;
|
|
24
|
-
importance: number;
|
|
25
|
-
temporalAnchor?: string;
|
|
26
|
-
confidence: number;
|
|
27
|
-
summary: string;
|
|
28
|
-
keywords: string[];
|
|
29
|
-
}
|
|
30
|
-
/**
|
|
31
|
-
* 对文件全文做预处理: AST 分块 + 提取每段元数据。
|
|
32
|
-
* v5.3: 复用 ast-chunker 替代 regex 逐行扫描。
|
|
33
|
-
* 这是兜底分块 — LLM 在文件级编译时可能输出不同的 segments。
|
|
34
|
-
*/
|
|
35
|
-
export declare function preprocessFile(relPath: string, fullText: string, defaultTitle: string): Promise<PreprocessedChunk[]>;
|
|
36
|
-
//# sourceMappingURL=preprocessor.d.ts.map
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import type { RawChunk, CompiledChunk, FileSegment } from "./types.js";
|
|
2
|
-
import type { PreprocessedChunk } from "./preprocessor.js";
|
|
3
|
-
/** 推荐每批处理的 chunk 数(平衡上下文大小和效率) */
|
|
4
|
-
export declare const BATCH_SIZE = 25;
|
|
5
|
-
export declare const COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1\u4E0D\u662F\u603B\u7ED3\u5185\u5BB9\u3002\n\n\u4F60\u7684\u4EFB\u52A1\u662F\uFF1A\n\u5C06\u4EBA\u7C7B\u968F\u624B\u8BB0\u5F55\u7684\u975E\u7ED3\u6784\u5316\u7B14\u8BB0\uFF0C\n\u8F6C\u6362\u4E3A\u9002\u5408\u673A\u5668\u8BED\u4E49\u7D22\u5F15\u3001\u6982\u5FF5\u68C0\u7D22\u3001\n\u77E5\u8BC6\u805A\u7C7B\u3001\u957F\u671F\u6F14\u5316\u7684\"\u8BA4\u77E5\u77E5\u8BC6\u5355\u5143\"\u3002\n\n\u6838\u5FC3\u539F\u5219\uFF1A\n1. \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\n2. \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n3. \u8865\u5168\u9690\u5F0F\u8868\u8FBE \u2014 \u8865\u5145\u7701\u7565\u7684\u4E3B\u8BED\u3001\u5C55\u5F00\u7F29\u5199\n4. \u7EDF\u4E00\u672F\u8BED \u2014 \u5C06\u540C\u4E49\u8868\u8FBE\u5F52\u4E00\uFF08\u5982 \"\u72B6\u6001\u6C61\u67D3\" \u2194 \"stale closure\"\uFF09\n5. \u63D0\u53D6\u6838\u5FC3\u6982\u5FF5 \u2014 \u8BC6\u522B\u6280\u672F\u5173\u952E\u8BCD\n6. \u4FDD\u6301\u5355\u4E3B\u9898 \u2014 \u4E00\u4E2A chunk \u53EA\u63CF\u8FF0\u4E00\u4E2A\u8BA4\u77E5\u4E3B\u9898\n7. \u8F93\u51FA\u7ED3\u6784\u5316 JSON \u2014 \u4E25\u683C\u9075\u5FAA schema\n\n\u7981\u6B62\uFF1A\n1. \u8FC7\u5EA6\u603B\u7ED3\n2. \u5220\u9664\u539F\u6587\n3. \u6539\u5199\u903B\u8F91\n4. \u4E3B\u89C2\u63A8\u65AD\n5. \u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\n\n\u4F60\u7684\u89D2\u8272\u662F\uFF1A\"\u8BED\u4E49\u6807\u51C6\u5316\u5668\"\uFF0C\u4E0D\u662F\"\u5185\u5BB9\u4F5C\u8005\"\u3002";
|
|
6
|
-
/** 为一批 chunk 构建编译 prompt */
|
|
7
|
-
export declare function buildCompilePrompt(chunks: RawChunk[]): string;
|
|
8
|
-
/**
|
|
9
|
-
* 从编译后的 ChunkInfo 构建最优 embedding 输入
|
|
10
|
-
*
|
|
11
|
-
* 蓝图推荐格式:
|
|
12
|
-
* [TOPIC] + [CONCEPTS] + [ALIASES] + [KEYWORDS] + [NORMALIZED] + [RAW]
|
|
13
|
-
*
|
|
14
|
-
* 原因: 增强隐式语义,让向量模型在检索时更稳定地匹配
|
|
15
|
-
*/
|
|
16
|
-
export declare function buildEmbeddingText(topic: string, normalizedText: string, concepts: string[], aliases: string[], keywords: string[], contentClass: string, temporalAnchor: string | undefined, rawText: string): string;
|
|
17
|
-
/** 尝试从 LLM 响应中提取 CompiledChunk 数组 */
|
|
18
|
-
export declare function parseCompiledResult(text: string): CompiledChunk[] | null;
|
|
19
|
-
/** 文件级编译 System Prompt(只要求 LLM 做 4 件事) */
|
|
20
|
-
export declare const FILE_COMPILE_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1: \u5C06\u6574\u7BC7\u7B14\u8BB0\u8F6C\u6362\u4E3A\u7ED3\u6784\u5316\u7684\u8BED\u4E49\u77E5\u8BC6\u5355\u5143\u3002\n\n\u4F60\u9700\u8981\u505A\u7684 4 \u4EF6\u4E8B:\n1. \u81EA\u884C\u5224\u65AD\u8BED\u4E49\u8FB9\u754C \u2014 \u5C06\u6587\u4EF6\u5206\u6210\u82E5\u5E72\u8FDE\u7EED\u7684\u8BED\u4E49\u7247\u6BB5\uFF08segments\uFF09\n2. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u5199\u51FA topic\uFF08\u6838\u5FC3\u4E3B\u9898\uFF0C\u4E00\u53E5\u8BDD\uFF09\n3. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u5199\u51FA normalizedText\uFF08\u89C4\u8303\u5316\u6587\u672C\uFF1A\u8865\u5168\u7701\u7565\u3001\u7EDF\u4E00\u672F\u8BED\u3001\u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF09\n4. \u4E3A\u6BCF\u4E2A\u7247\u6BB5\u63D0\u53D6 concepts\uFF08\u6280\u672F\u6982\u5FF5\uFF09\u548C aliases\uFF08\u540C\u4E49\u8868\u8FBE\uFF0C\u683C\u5F0F \"\u4E2D\u6587 \u2194 English\"\uFF09\n\n\u6838\u5FC3\u539F\u5219:\n- \u4FDD\u7559\u539F\u59CB\u4FE1\u606F \u2014 \u4E0D\u5220\u6280\u672F\u7EC6\u8282\uFF08API \u540D\u3001\u53C2\u6570\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u7F29\u5199\uFF09\n- \u4E0D\u6539\u53D8\u539F\u610F \u2014 \u53EA\u89C4\u8303\u5316\u8868\u8FBE\n- \u8BED\u4E49\u8FB9\u754C = \u540C\u4E00\u8BA4\u77E5\u4E3B\u9898\u7684\u81EA\u7136\u6BB5\u6216\u8FDE\u7EED\u6BB5\u843D\n- \u5982\u679C\u6574\u4E2A\u6587\u4EF6\u662F\u5355\u4E00\u4E3B\u9898\uFF0C\u53EA\u8F93\u51FA 1 \u4E2A segment\n\n\u7981\u6B62: \u8FC7\u5EA6\u603B\u7ED3\u3001\u5220\u9664\u539F\u6587\u3001\u6539\u5199\u903B\u8F91\u3001\u4E3B\u89C2\u63A8\u65AD\u3001\u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\u3002";
|
|
21
|
-
/**
|
|
22
|
-
* 为文件级编译构建 prompt
|
|
23
|
-
* @param relPath 文件路径
|
|
24
|
-
* @param fullText 文件全文
|
|
25
|
-
* @param preprocessed 预处理器输出(仅展示给 LLM 参考)
|
|
26
|
-
*/
|
|
27
|
-
export declare function buildFileCompilePrompt(relPath: string, fullText: string, preprocessed: PreprocessedChunk[]): string;
|
|
28
|
-
/** 从 LLM 响应中提取 FileSegment 数组 */
|
|
29
|
-
export declare function parseFileSegments(text: string): FileSegment[] | null;
|
|
30
|
-
/** v5.4 文件级 System Prompt(极简版) */
|
|
31
|
-
export declare const FILE_LLM_SYSTEM_PROMPT = "\u4F60\u662F\u4E00\u4E2A\"\u77E5\u8BC6\u8BED\u4E49\u7F16\u8BD1\u5668\"\u3002\n\n\u4F60\u7684\u4EFB\u52A1: \u5C06\u6574\u7BC7\u7B14\u8BB0\u8F6C\u6362\u4E3A\u7ED3\u6784\u5316\u7684\u8BED\u4E49\u5143\u6570\u636E\uFF0C\u7528\u4E8E\u589E\u5F3A\u8BED\u4E49\u641C\u7D22\u3002\n\n\u4F60\u9700\u8981\u8F93\u51FA\u7684 4 \u4E2A\u5B57\u6BB5:\n1. topic \u2014 \u6838\u5FC3\u4E3B\u9898\uFF08\u4E00\u53E5\u8BDD\u6982\u62EC\u5168\u6587\uFF09\n2. normalizedText \u2014 \u89C4\u8303\u5316\u6587\u672C\uFF08\u8865\u5168\u7701\u7565\u4E3B\u8BED\u3001\u7EDF\u4E00\u672F\u8BED\u3001\u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF09\n3. concepts \u2014 \u6280\u672F\u6982\u5FF5\u5217\u8868\uFF083-8 \u4E2A\u6838\u5FC3\u6982\u5FF5\uFF09\n4. aliases \u2014 \u540C\u4E49\u8868\u8FBE\uFF08\u683C\u5F0F \"\u4E2D\u6587 \u2194 English\"\uFF0C2-5 \u7EC4\uFF09\n\n\u6838\u5FC3\u539F\u5219:\n- \u4FDD\u7559\u6240\u6709\u6280\u672F\u7EC6\u8282\uFF08API \u540D\u3001\u53C2\u6570\u3001\u9519\u8BEF\u4FE1\u606F\u3001\u7F29\u5199\u3001\u7248\u672C\u53F7\uFF09\n- \u4E0D\u6539\u53D8\u539F\u610F\uFF0C\u53EA\u89C4\u8303\u5316\u8868\u8FBE\n- concepts \u63D0\u53D6\u6280\u672F\u5173\u952E\u8BCD\uFF0C\u4E0D\u662F\u6458\u8981\n- aliases \u8986\u76D6\u4E2D\u82F1\u5BF9\u7167\u548C\u7F29\u5199\u5C55\u5F00\n\n\u7981\u6B62: \u8FC7\u5EA6\u603B\u7ED3\u3001\u5220\u9664\u539F\u6587\u3001\u6539\u5199\u903B\u8F91\u3001\u4E3B\u89C2\u63A8\u65AD\u3001\u5F15\u5165\u4E0D\u5B58\u5728\u7684\u4FE1\u606F\u3002";
|
|
32
|
-
/**
|
|
33
|
-
* v5.4 构建简化文件级编译 prompt
|
|
34
|
-
*/
|
|
35
|
-
export declare function buildFileLLMPrompt(relPath: string, fullText: string): string;
|
|
36
|
-
/**
|
|
37
|
-
* v5.4 解析文件级 LLM 响应(单对象,非 segments 数组)
|
|
38
|
-
*/
|
|
39
|
-
export declare function parseFileLLMResult(text: string): import("../lib/types.js").FileLLMData | null;
|
|
40
|
-
/**
|
|
41
|
-
* v5.4 构建文件级 LLM 向量的 embedding 文本
|
|
42
|
-
*/
|
|
43
|
-
export declare function buildFileLLMEmbeddingText(data: import("../lib/types.js").FileLLMData, relPath?: string, maxEmbedLen?: number): string;
|
|
44
|
-
//# sourceMappingURL=semantic-compiler.d.ts.map
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
interface WikiConfig {
|
|
2
|
-
sources: string[];
|
|
3
|
-
semanticEnabled: boolean;
|
|
4
|
-
currentModelId: string;
|
|
5
|
-
lastScan: string;
|
|
6
|
-
}
|
|
7
|
-
export declare function readConfig(): WikiConfig;
|
|
8
|
-
export declare function writeConfig(c: WikiConfig): void;
|
|
9
|
-
export declare function getSources(): string[];
|
|
10
|
-
export declare function addSource(absPath: string): boolean;
|
|
11
|
-
/** 移除数据�?�?返回被移除的路径,未找到返回 null */
|
|
12
|
-
export declare function removeSource(target: string): string | null;
|
|
13
|
-
export declare function getLastScan(): string;
|
|
14
|
-
export declare function setLastScan(iso: string): void;
|
|
15
|
-
export declare function getSemanticEnabled(): boolean;
|
|
16
|
-
export declare function setSemanticEnabled(enabled: boolean): void;
|
|
17
|
-
export declare function readModelId(): string;
|
|
18
|
-
export declare function writeModelId(id: string): void;
|
|
19
|
-
export declare function getWikiModel(): string;
|
|
20
|
-
export declare function configStats(): {
|
|
21
|
-
sources: number;
|
|
22
|
-
lastScan: string;
|
|
23
|
-
};
|
|
24
|
-
export {};
|
|
25
|
-
//# sourceMappingURL=store-config.d.ts.map
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
export declare function getEmbeddings(): Record<string, number[]>;
|
|
2
|
-
export declare function setEmbeddings(entries: Record<string, number[]>, model?: string, dim?: number): void;
|
|
3
|
-
export declare function getChunkInfo(): Record<string, import("./types.js").ChunkInfo>;
|
|
4
|
-
export declare function setChunkInfo(chunkInfo: Record<string, import("./types.js").ChunkInfo>): void;
|
|
5
|
-
export declare function removeEmbedding(relPath: string): void;
|
|
6
|
-
export declare function getEmbeddingModel(): string | undefined;
|
|
7
|
-
export declare function getEmbeddingDim(): number | undefined;
|
|
8
|
-
export declare function getCentroid(): number[] | null;
|
|
9
|
-
export declare function setCentroid(centroid: number[]): void;
|
|
10
|
-
export declare function clearCentroid(): void;
|
|
11
|
-
export declare function vectorsStats(): {
|
|
12
|
-
embeddings: number;
|
|
13
|
-
centroid: boolean;
|
|
14
|
-
model?: string;
|
|
15
|
-
dim?: number;
|
|
16
|
-
};
|
|
17
|
-
//# sourceMappingURL=store-vectors.d.ts.map
|