@llangtop/pwiki-core 0.3.4 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/WikiEngine.d.ts +25 -37
- package/dist/WikiEngine.d.ts.map +1 -1
- package/dist/WikiEngine.js +157 -298
- package/dist/WikiEngine.js.map +1 -1
- package/dist/ast-chunker.d.ts +23 -0
- package/dist/ast-chunker.d.ts.map +1 -0
- package/dist/ast-chunker.js +434 -0
- package/dist/ast-chunker.js.map +1 -0
- package/dist/content-cache.d.ts +13 -0
- package/dist/content-cache.d.ts.map +1 -0
- package/dist/content-cache.js +33 -0
- package/dist/content-cache.js.map +1 -0
- package/dist/embedder.d.ts +38 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +267 -0
- package/dist/embedder.js.map +1 -0
- package/dist/file-manifest.d.ts +46 -0
- package/dist/file-manifest.d.ts.map +1 -0
- package/dist/file-manifest.js +121 -0
- package/dist/file-manifest.js.map +1 -0
- package/dist/index.d.ts +18 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +25 -7
- package/dist/index.js.map +1 -1
- package/dist/indexer-compile.d.ts +20 -0
- package/dist/indexer-compile.d.ts.map +1 -0
- package/dist/indexer-compile.js +198 -0
- package/dist/indexer-compile.js.map +1 -0
- package/dist/indexer-embed.d.ts +21 -0
- package/dist/indexer-embed.d.ts.map +1 -0
- package/dist/indexer-embed.js +248 -0
- package/dist/indexer-embed.js.map +1 -0
- package/dist/indexer-scan.d.ts +4 -0
- package/dist/indexer-scan.d.ts.map +1 -0
- package/dist/indexer-scan.js +51 -0
- package/dist/indexer-scan.js.map +1 -0
- package/dist/indexer.d.ts +4 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +7 -0
- package/dist/indexer.js.map +1 -0
- package/dist/model-registry.d.ts +32 -0
- package/dist/model-registry.d.ts.map +1 -0
- package/dist/model-registry.js +82 -0
- package/dist/model-registry.js.map +1 -0
- package/dist/parser.d.ts +9 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +54 -0
- package/dist/parser.js.map +1 -0
- package/dist/preprocessor.d.ts +36 -0
- package/dist/preprocessor.d.ts.map +1 -0
- package/dist/preprocessor.js +209 -0
- package/dist/preprocessor.js.map +1 -0
- package/dist/search.d.ts +6 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +91 -0
- package/dist/search.js.map +1 -0
- package/dist/semantic-compiler.d.ts +44 -0
- package/dist/semantic-compiler.d.ts.map +1 -0
- package/dist/semantic-compiler.js +376 -0
- package/dist/semantic-compiler.js.map +1 -0
- package/dist/semantic-search.d.ts +11 -0
- package/dist/semantic-search.d.ts.map +1 -0
- package/dist/semantic-search.js +217 -0
- package/dist/semantic-search.js.map +1 -0
- package/dist/store-settings.d.ts +32 -0
- package/dist/store-settings.d.ts.map +1 -0
- package/dist/store-settings.js +138 -0
- package/dist/store-settings.js.map +1 -0
- package/dist/store-vectors.d.ts +13 -0
- package/dist/store-vectors.d.ts.map +1 -0
- package/dist/store-vectors.js +101 -0
- package/dist/store-vectors.js.map +1 -0
- package/dist/store.d.ts +11 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +28 -0
- package/dist/store.js.map +1 -0
- package/dist/types.d.ts +75 -92
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/types.js.map +1 -1
- package/dist/wiki-paths.d.ts +3 -0
- package/dist/wiki-paths.d.ts.map +1 -0
- package/dist/wiki-paths.js +13 -0
- package/dist/wiki-paths.js.map +1 -0
- package/package.json +38 -38
- package/dist/compile/compiler.d.ts +0 -39
- package/dist/compile/compiler.d.ts.map +0 -1
- package/dist/compile/compiler.js +0 -227
- package/dist/compile/compiler.js.map +0 -1
- package/dist/compile/index.d.ts +0 -3
- package/dist/compile/index.d.ts.map +0 -1
- package/dist/compile/index.js +0 -2
- package/dist/compile/index.js.map +0 -1
- package/dist/embed/WikiEmbedder.d.ts +0 -28
- package/dist/embed/WikiEmbedder.d.ts.map +0 -1
- package/dist/embed/WikiEmbedder.js +0 -147
- package/dist/embed/WikiEmbedder.js.map +0 -1
- package/dist/embed/index.d.ts +0 -2
- package/dist/embed/index.d.ts.map +0 -1
- package/dist/embed/index.js +0 -2
- package/dist/embed/index.js.map +0 -1
- package/dist/llm/WikiLLM.d.ts +0 -24
- package/dist/llm/WikiLLM.d.ts.map +0 -1
- package/dist/llm/WikiLLM.js +0 -46
- package/dist/llm/WikiLLM.js.map +0 -1
- package/dist/llm/index.d.ts +0 -3
- package/dist/llm/index.d.ts.map +0 -1
- package/dist/llm/index.js +0 -2
- package/dist/llm/index.js.map +0 -1
- package/dist/models.d.ts +0 -5
- package/dist/models.d.ts.map +0 -1
- package/dist/models.js +0 -54
- package/dist/models.js.map +0 -1
- package/dist/search/WikiSearch.d.ts +0 -14
- package/dist/search/WikiSearch.d.ts.map +0 -1
- package/dist/search/WikiSearch.js +0 -223
- package/dist/search/WikiSearch.js.map +0 -1
- package/dist/search/index.d.ts +0 -2
- package/dist/search/index.d.ts.map +0 -1
- package/dist/search/index.js +0 -2
- package/dist/search/index.js.map +0 -1
- package/dist/store/WikiStore.d.ts +0 -47
- package/dist/store/WikiStore.d.ts.map +0 -1
- package/dist/store/WikiStore.js +0 -301
- package/dist/store/WikiStore.js.map +0 -1
- package/dist/store/index.d.ts +0 -2
- package/dist/store/index.d.ts.map +0 -1
- package/dist/store/index.js +0 -2
- package/dist/store/index.js.map +0 -1
- package/dist/util/fs.d.ts +0 -7
- package/dist/util/fs.d.ts.map +0 -1
- package/dist/util/fs.js +0 -36
- package/dist/util/fs.js.map +0 -1
- package/dist/util/index.d.ts +0 -3
- package/dist/util/index.d.ts.map +0 -1
- package/dist/util/index.js +0 -3
- package/dist/util/index.js.map +0 -1
- package/dist/util/paths.d.ts +0 -17
- package/dist/util/paths.d.ts.map +0 -1
- package/dist/util/paths.js +0 -31
- package/dist/util/paths.js.map +0 -1
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
// indexer-compile.ts — 语义编译存储 (v5.4)
|
|
2
|
+
//
|
|
3
|
+
// getRawChunks / storeCompiledChunks (v5.1 块级)
|
|
4
|
+
// storeFileSegments (v5.2 文件级 segments → 待 v5.4 替换为 storeFileLLMVector)
|
|
5
|
+
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
6
|
+
import { resolve } from "node:path";
|
|
7
|
+
import { getIndex, getEmbeddings, setEmbeddings, getChunkInfo, setChunkInfo } from "./store.js";
|
|
8
|
+
import { getCurrentModel } from "./model-registry.js";
|
|
9
|
+
import { initialize, isAvailable, embed } from "./embedder.js";
|
|
10
|
+
import { extractChunks } from "./indexer-embed.js";
|
|
11
|
+
import { buildEmbeddingText, buildFileLLMEmbeddingText } from "./semantic-compiler.js";
|
|
12
|
+
import { updateFileState, computeMD5, getCompiledFilePath, ensureCompiledDir } from "./file-manifest.js";
|
|
13
|
+
/**
|
|
14
|
+
* 获取所有已索引文件的原始块(供 AI 编译)
|
|
15
|
+
*/
|
|
16
|
+
export async function getRawChunks(sourceDir, uncompiledOnly = true) {
|
|
17
|
+
const idx = getIndex();
|
|
18
|
+
const chunkInfo = getChunkInfo();
|
|
19
|
+
const entries = Object.values(idx).filter((e) => !sourceDir || e.sourceDir === sourceDir);
|
|
20
|
+
const result = [];
|
|
21
|
+
for (const entry of entries) {
|
|
22
|
+
const fullPath = resolve(entry.sourceDir, entry.relPath);
|
|
23
|
+
if (!existsSync(fullPath))
|
|
24
|
+
continue;
|
|
25
|
+
const chunks = await extractChunks(fullPath, entry.relPath, entry.title);
|
|
26
|
+
for (const ch of chunks) {
|
|
27
|
+
const ci = chunkInfo[ch.key];
|
|
28
|
+
const compiled = !!(ci?.normalizedText);
|
|
29
|
+
if (uncompiledOnly && compiled)
|
|
30
|
+
continue;
|
|
31
|
+
result.push({
|
|
32
|
+
key: ch.key,
|
|
33
|
+
relPath: entry.relPath,
|
|
34
|
+
heading: ch.heading,
|
|
35
|
+
rawText: ch.rawText,
|
|
36
|
+
compiled,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return result;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* 存储块级编译结果并重建 embedding (v5.1 遗留)
|
|
44
|
+
*/
|
|
45
|
+
export async function storeCompiledChunks(compiled) {
|
|
46
|
+
if (!isAvailable()) {
|
|
47
|
+
const ok = await initialize();
|
|
48
|
+
if (!ok)
|
|
49
|
+
return 0;
|
|
50
|
+
}
|
|
51
|
+
const chunkInfo = getChunkInfo();
|
|
52
|
+
const existing = getEmbeddings();
|
|
53
|
+
const rawChunks = await getRawChunks(undefined, false);
|
|
54
|
+
const rawMap = new Map(rawChunks.map((r) => [r.key, r]));
|
|
55
|
+
let updated = 0;
|
|
56
|
+
for (const cc of compiled) {
|
|
57
|
+
const ci = chunkInfo[cc.key];
|
|
58
|
+
if (!ci)
|
|
59
|
+
continue;
|
|
60
|
+
Object.assign(ci, {
|
|
61
|
+
topic: cc.topic,
|
|
62
|
+
normalizedText: cc.normalizedText,
|
|
63
|
+
concepts: cc.concepts,
|
|
64
|
+
aliases: cc.aliases,
|
|
65
|
+
});
|
|
66
|
+
const raw = rawMap.get(cc.key);
|
|
67
|
+
const rawText = raw?.rawText ?? "";
|
|
68
|
+
const embeddingText = buildEmbeddingText(cc.topic, cc.normalizedText, cc.concepts, cc.aliases, ci.keywords ?? [], ci.contentClass ?? "reference", ci.temporalAnchor, rawText);
|
|
69
|
+
try {
|
|
70
|
+
const vec = await embed(embeddingText);
|
|
71
|
+
existing[cc.key] = vec;
|
|
72
|
+
updated++;
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
/* skip */
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
if (updated > 0) {
|
|
79
|
+
const model = getCurrentModel();
|
|
80
|
+
setEmbeddings(existing, model.hfRepo, model.dim);
|
|
81
|
+
setChunkInfo(chunkInfo);
|
|
82
|
+
}
|
|
83
|
+
return updated;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* 存储文件级编译结果并重建 embedding (v5.2 → v5.4 过渡)
|
|
87
|
+
* v5.4 TODO: 替换为 storeFileLLMVector — 只挂 1 个 ###llm 向量,不删 AST chunks
|
|
88
|
+
*/
|
|
89
|
+
export async function storeFileSegments(relPath, segments, preprocessed) {
|
|
90
|
+
if (!isAvailable()) {
|
|
91
|
+
const ok = await initialize();
|
|
92
|
+
if (!ok)
|
|
93
|
+
return 0;
|
|
94
|
+
}
|
|
95
|
+
const chunkInfo = getChunkInfo();
|
|
96
|
+
const existing = getEmbeddings();
|
|
97
|
+
// 清除该文件的旧 chunk keys
|
|
98
|
+
const oldKeys = Object.keys(chunkInfo).filter((k) => k.startsWith(relPath));
|
|
99
|
+
for (const key of oldKeys) {
|
|
100
|
+
delete chunkInfo[key];
|
|
101
|
+
delete existing[key];
|
|
102
|
+
}
|
|
103
|
+
let updated = 0;
|
|
104
|
+
for (let i = 0; i < segments.length; i++) {
|
|
105
|
+
const seg = segments[i];
|
|
106
|
+
const key = `${relPath}###${i}`;
|
|
107
|
+
const pp = preprocessed[i] ?? preprocessed[0];
|
|
108
|
+
const ci = {
|
|
109
|
+
heading: pp?.heading ?? "",
|
|
110
|
+
level: pp?.level ?? 0,
|
|
111
|
+
topic: seg.topic,
|
|
112
|
+
normalizedText: seg.normalizedText,
|
|
113
|
+
concepts: seg.concepts,
|
|
114
|
+
aliases: seg.aliases,
|
|
115
|
+
chunkType: pp?.chunkType ?? "note",
|
|
116
|
+
contentClass: pp?.contentClass ?? "reference",
|
|
117
|
+
importance: pp?.importance ?? 0.5,
|
|
118
|
+
temporalAnchor: pp?.temporalAnchor,
|
|
119
|
+
confidence: pp?.confidence ?? 0.85,
|
|
120
|
+
summary: pp?.summary,
|
|
121
|
+
keywords: pp?.keywords,
|
|
122
|
+
};
|
|
123
|
+
chunkInfo[key] = ci;
|
|
124
|
+
const embeddingText = buildEmbeddingText(seg.topic, seg.normalizedText, seg.concepts, seg.aliases, pp?.keywords ?? [], pp?.contentClass ?? "reference", pp?.temporalAnchor, seg.text);
|
|
125
|
+
try {
|
|
126
|
+
const vec = await embed(embeddingText);
|
|
127
|
+
existing[key] = vec;
|
|
128
|
+
updated++;
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
/* skip */
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (updated > 0) {
|
|
135
|
+
const model = getCurrentModel();
|
|
136
|
+
setEmbeddings(existing, model.hfRepo, model.dim);
|
|
137
|
+
setChunkInfo(chunkInfo);
|
|
138
|
+
}
|
|
139
|
+
return updated;
|
|
140
|
+
}
|
|
141
|
+
// ---- v5.4 文件级 LLM 向量存储 ----
|
|
142
|
+
/**
|
|
143
|
+
* 存储文件级 LLM 编译结果:挂载 1 个 ###llm 向量,不删除 AST chunks。
|
|
144
|
+
*/
|
|
145
|
+
export async function storeFileLLMVector(sourceDir, relPath, llmData, llmModel) {
|
|
146
|
+
if (!isAvailable()) {
|
|
147
|
+
const ok = await initialize();
|
|
148
|
+
if (!ok)
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
const fullPath = resolve(sourceDir, relPath);
|
|
152
|
+
let currentMD5 = "";
|
|
153
|
+
try {
|
|
154
|
+
currentMD5 = computeMD5(readFileSync(fullPath, "utf-8"));
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
const model = getCurrentModel();
|
|
160
|
+
const maxEmbedLen = Math.floor(model.maxTokens * 2);
|
|
161
|
+
const embeddingText = buildFileLLMEmbeddingText(llmData, relPath, maxEmbedLen);
|
|
162
|
+
let vec;
|
|
163
|
+
try {
|
|
164
|
+
vec = await embed(embeddingText);
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
return false;
|
|
168
|
+
}
|
|
169
|
+
const vectorKey = `${relPath}###llm`;
|
|
170
|
+
const existing = getEmbeddings();
|
|
171
|
+
existing[vectorKey] = vec;
|
|
172
|
+
const chunkInfo = getChunkInfo();
|
|
173
|
+
chunkInfo[vectorKey] = {
|
|
174
|
+
heading: llmData.topic, level: 0,
|
|
175
|
+
topic: llmData.topic, normalizedText: llmData.normalizedText,
|
|
176
|
+
concepts: llmData.concepts, aliases: llmData.aliases,
|
|
177
|
+
chunkType: "llm_summary", contentClass: "knowledge",
|
|
178
|
+
importance: 0.8, confidence: 0.85,
|
|
179
|
+
};
|
|
180
|
+
ensureCompiledDir();
|
|
181
|
+
const compiledFile = getCompiledFilePath(relPath);
|
|
182
|
+
const record = {
|
|
183
|
+
relPath, compiledAt: new Date().toISOString(), sourceMD5: currentMD5,
|
|
184
|
+
model: llmModel || "unknown", result: llmData, embeddingText, vectorKey,
|
|
185
|
+
};
|
|
186
|
+
writeFileSync(compiledFile, JSON.stringify(record, null, 2), "utf-8");
|
|
187
|
+
const astChunks = await extractChunks(fullPath, relPath, "", maxEmbedLen);
|
|
188
|
+
updateFileState(relPath, {
|
|
189
|
+
md5: currentMD5, astChunkCount: astChunks.length,
|
|
190
|
+
astIndexedAt: new Date().toISOString(),
|
|
191
|
+
llmCompiled: true, llmCompiledAt: new Date().toISOString(),
|
|
192
|
+
});
|
|
193
|
+
// model already obtained above
|
|
194
|
+
setEmbeddings(existing, model.hfRepo, model.dim);
|
|
195
|
+
setChunkInfo(chunkInfo);
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
//# sourceMappingURL=indexer-compile.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer-compile.js","sourceRoot":"","sources":["../src/indexer-compile.ts"],"names":[],"mappings":"AAAA,qCAAqC;AACrC,EAAE;AACF,+CAA+C;AAC/C,wEAAwE;AAExE,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,aAAa,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAChG,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAC/D,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,kBAAkB,EAAE,yBAAyB,EAAE,MAAM,wBAAwB,CAAC;AAGvF,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEzG;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,SAAkB,EAClB,cAAc,GAAG,IAAI;IAErB,MAAM,GAAG,GAAG,QAAQ,EAAE,CAAC;IACvB,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;IACjC,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,SAAS,KAAK,SAAS,CAC/C,CAAC;IAEF,MAAM,MAAM,GAAe,EAAE,CAAC;IAE9B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QACzD,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEpC,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;YACxB,MAAM,EAAE,GAAG,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,QAAQ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,cAAc,CAAC,CAAC;YACxC,IAAI,cAAc,IAAI,QAAQ;gBAAE,SAAS;YAEzC,MAAM,CAAC,IAAI,CAAC;gBACV,GAAG,EAAE,EAAE,CAAC,GAAG;gBACX,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,OAAO,EAAE,EAAE,CAAC,OAAO;gBACnB,OAAO,EAAE,EAAE,CAAC,OAAO;gBACnB,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,QAAyB;IAEzB,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACnB,MAAM,EAAE,GAAG,MAAM,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE;YAAE,OAAO,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;IACjC,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACvD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAEzD,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,MAAM,EAAE,GAAG,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,CAAC,EAAE;YAAE,SAAS;QAElB,MAAM,CAAC,MAAM,CAAC,EAAE,EAAE;YAChB,KAAK,EAAE,EAAE,CAAC,KAAK;YACf,cAAc,EAAE,EAAE,CAAC,cAAc;YACjC,QAAQ,EAAE,EAAE,CAAC,QAAQ;YACrB,OAAO,EAAE,EAAE,CAAC,OAAO;SACpB,CAAC,CAAC;QAEH,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;QAC/B,MAAM,OAAO,GAAG,GAAG,EAAE,OAAO,IAAI,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,kBAAkB,CACtC,EAAE,CAAC,KAAK,EACR,EAAE,CAAC,cAAc,EACjB,EAAE,CAAC,QAAQ,EACX,EAAE,CAAC,OAAO,EACV,EAAE,CAAC,QAAQ,IAAI,EAAE,EACjB,EAAE,CAAC,YAAY,IAAI,WAAW,EAC9B,EAAE,CAAC,cAAc,EACjB,OAAO,CACR,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,CAAC;YACvC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACvB,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,MAAM,CAAC;YACP,UAAU;QACZ,CAAC;IACH,CAAC;IAED,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;QAChC,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACjD,YAAY,CAAC,SAAS,CAAC,CAAC;IAC1B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,OAAe,EACf,QAAuB,EACvB,YAAiC;IAEjC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACnB,MAAM,EAAE,GAAG,MAAM,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE;YAAE,OAAO,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;IAEjC,qBAAqB;IACrB,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;IAC5E,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;QACtB,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC;IACvB,CAAC;IAED,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,GAAG,GAAG,GAAG,OAAO,MAAM,CAAC,EAAE,CAAC;QAChC,MAAM,EAAE,GAAG,YAAY,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;QAE9C,MAAM,EAAE,GAAc;YACpB,OAAO,EAAE,EAAE,EAAE,OAAO,IAAI,EAAE;YAC1B,KAAK,EAAE,EAAE,EAAE,KAAK,IAAI,CAAC;YACrB,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,cAAc,EAAE,GAAG,CAAC,cAAc;YAClC,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,OAAO,EAAE,GAAG,CAAC,OAAO;YACpB,SAAS,EAAE,EAAE,EAAE,SAAS,IAAI,MAAM;YAClC,YAAY,EAAE,EAAE,EAAE,YAAY,IAAI,WAAW;YAC7C,UAAU,EAAE,EAAE,EAAE,UAAU,IAAI,GAAG;YACjC,cAAc,EAAE,EAAE,EAAE,cAAc;YAClC,UAAU,EAAE,EAAE,EAAE,UAAU,IAAI,IAAI;YAClC,OAAO,EAAE,EAAE,EAAE,OAAO;YACpB,QAAQ,EAAE,EAAE,EAAE,QAAQ;SACvB,CAAC;QAEF,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC;QAEpB,MAAM,aAAa,GAAG,kBAAkB,CACtC,GAAG,CAAC,KAAK,EACT,GAAG,CAAC,cAAc,EAClB,GAAG,CAAC,QAAQ,EACZ,GAAG,CAAC,OAAO,EACX,EAAE,EAAE,QAAQ,IAAI,EAAE,EAClB,EAAE,EAAE,YAAY,IAAI,WAAW,EAC/B,EAAE,EAAE,cAAc,EAClB,GAAG,CAAC,IAAI,CACT,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,CAAC;YACvC,QAAQ,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACpB,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,MAAM,CAAC;YACP,UAAU;QACZ,CAAC;IACH,CAAC;IAED,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QAChB,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;QAChC,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACjD,YAAY,CAAC,SAAS,CAAC,CAAC;IAC1B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,8BAA8B;AAE9B;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,SAAiB,EACjB,OAAe,EACf,OAAoB,EACpB,QAAiB;IAEjB,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACnB,MAAM,EAAE,GAAG,MAAM,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE;YAAE,OAAO,KAAK,CAAC;IACxB,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC7C,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,CAAC;QAAC,UAAU,GAAG,UAAU,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,KAAK,CAAC;IAAC,CAAC;IAEzF,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;IAChC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IACpD,MAAM,aAAa,GAAG,yBAAyB,CAAC,OAAO,EAAE,OAAO,EAAE,WAAW,CAAC,CAAC;IAC/E,IAAI,GAAa,CAAC;IAClB,IAAI,CAAC;QAAC,GAAG,GAAG,MAAM,KAAK,CAAC,aAAa,CAAC,CAAC;IAAC,CAAC;IAAC,MAAM,CAAC;QAAC,OAAO,KAAK,CAAC;IAAC,CAAC;IAEjE,MAAM,SAAS,GAAG,GAAG,OAAO,QAAQ,CAAC;IACrC,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;IACjC,QAAQ,CAAC,SAAS,CAAC,GAAG,GAAG,CAAC;IAE1B,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;IACjC,SAAS,CAAC,SAAS,CAAC,GAAG;QACrB,OAAO,EAAE,OAAO,CAAC,KAAK,EAAE,KAAK,EAAE,CAAC;QAChC,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,cAAc,EAAE,OAAO,CAAC,cAAc;QAC5D,QAAQ,EAAE,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO;QACpD,SAAS,EAAE,aAAa,EAAE,YAAY,EAAE,WAAW;QACnD,UAAU,EAAE,GAAG,EAAE,UAAU,EAAE,IAAI;KAClC,CAAC;IAEF,iBAAiB,EAAE,CAAC;IACpB,MAAM,YAAY,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAClD,MAAM,MAAM,GAAuB;QACjC,OAAO,EAAE,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,UAAU;QACpE,KAAK,EAAE,QAAQ,IAAI,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS;KACxE,CAAC;IACF,aAAa,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAEtE,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,EAAE,EAAE,WAAW,CAAC,CAAC;IAC1E,eAAe,CAAC,OAAO,EAAE;QACvB,GAAG,EAAE,UAAU,EAAE,aAAa,EAAE,SAAS,CAAC,MAAM;QAChD,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACtC,WAAW,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KAC3D,CAAC,CAAC;IAEH,+BAA+B;IAC/B,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;IACjD,YAAY,CAAC,SAAS,CAAC,CAAC;IACxB,OAAO,IAAI,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { FileEntry } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* 按标题将文件分割为多个块。
|
|
4
|
+
* v5.3: 优先 AST 解析,失败降级 regex。
|
|
5
|
+
*/
|
|
6
|
+
export declare function extractChunks(filePath: string, relPath: string, defaultTitle: string, maxEmbedLen?: number): Promise<{
|
|
7
|
+
key: string;
|
|
8
|
+
heading: string;
|
|
9
|
+
level: number;
|
|
10
|
+
embedText: string;
|
|
11
|
+
rawText: string;
|
|
12
|
+
}[]>;
|
|
13
|
+
/**
|
|
14
|
+
* 批量生成 embedding 并持久化到 vectors.json
|
|
15
|
+
*/
|
|
16
|
+
export declare function generateEmbeddings(sourceDir: string, entries: FileEntry[]): Promise<number>;
|
|
17
|
+
/**
|
|
18
|
+
* 为单个文件生成/更新 embedding
|
|
19
|
+
*/
|
|
20
|
+
export declare function embedSingleFile(sourceDir: string, relPath: string, title: string): Promise<boolean>;
|
|
21
|
+
//# sourceMappingURL=indexer-embed.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer-embed.d.ts","sourceRoot":"","sources":["../src/indexer-embed.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAmB5C;;;GAGG;AACH,wBAAsB,aAAa,CACjC,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,MAAM,EACpB,WAAW,SAAM,GAChB,OAAO,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,EAAE,CAAC,CA0FhG;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,SAAS,EAAE,GACnB,OAAO,CAAC,MAAM,CAAC,CA8FjB;AAED;;GAEG;AACH,wBAAsB,eAAe,CACnC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,OAAO,CAAC,CAoClB"}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
// indexer-embed.ts — 向量生成 (v5.4)
|
|
2
|
+
//
|
|
3
|
+
// extractChunks (委托 ast-chunker) + generateEmbeddings + embedSingleFile
|
|
4
|
+
import { readFileSync, existsSync, statSync } from "node:fs";
|
|
5
|
+
import { resolve } from "node:path";
|
|
6
|
+
import { getSemanticEnabled, getEmbeddings, setEmbeddings, getChunkInfo, setChunkInfo } from "./store.js";
|
|
7
|
+
import { getCurrentModel } from "./model-registry.js";
|
|
8
|
+
import { initialize, isAvailable, embed } from "./embedder.js";
|
|
9
|
+
import { extractChunksAST } from "./ast-chunker.js";
|
|
10
|
+
import { updateFileState, computeMD5, getFileState } from "./file-manifest.js";
|
|
11
|
+
/** 标题行正则(仅用于 fallback) */
|
|
12
|
+
const HEADING_RE = /^#{1,4} /;
|
|
13
|
+
/** 去除 markdown 标记,截断 */
|
|
14
|
+
function plainText(text, maxLen) {
|
|
15
|
+
return text
|
|
16
|
+
.replace(/^#{1,6}\s+/gm, "")
|
|
17
|
+
.replace(/\*\*|__|\*|_|`|~~/g, "")
|
|
18
|
+
.replace(/\[([^\]]*)\]\([^)]*\)/g, "$1")
|
|
19
|
+
.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1")
|
|
20
|
+
.replace(/^\s*[-*+]\s+/gm, "")
|
|
21
|
+
.replace(/^\s*\d+\.\s+/gm, "")
|
|
22
|
+
.replace(/\n{2,}/g, "\n")
|
|
23
|
+
.trim()
|
|
24
|
+
.slice(0, maxLen);
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* 按标题将文件分割为多个块。
|
|
28
|
+
* v5.3: 优先 AST 解析,失败降级 regex。
|
|
29
|
+
*/
|
|
30
|
+
export async function extractChunks(filePath, relPath, defaultTitle, maxEmbedLen = 800) {
|
|
31
|
+
try {
|
|
32
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
33
|
+
// 优先 AST
|
|
34
|
+
const astChunks = await extractChunksAST(raw, relPath, defaultTitle, maxEmbedLen);
|
|
35
|
+
if (astChunks.length > 0) {
|
|
36
|
+
return astChunks.map((c) => ({
|
|
37
|
+
key: c.key,
|
|
38
|
+
heading: c.heading,
|
|
39
|
+
level: c.level,
|
|
40
|
+
embedText: c.embedText,
|
|
41
|
+
rawText: c.rawText,
|
|
42
|
+
}));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
/* fall through to regex */
|
|
47
|
+
}
|
|
48
|
+
// fallback: regex
|
|
49
|
+
try {
|
|
50
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
51
|
+
const lines = raw.split("\n");
|
|
52
|
+
const chunks = [];
|
|
53
|
+
for (const line of lines) {
|
|
54
|
+
const m = line.match(HEADING_RE);
|
|
55
|
+
if (m) {
|
|
56
|
+
const heading = line.trim();
|
|
57
|
+
const level = heading.match(/^#+/)[0].length;
|
|
58
|
+
chunks.push({ heading, level, lines: [] });
|
|
59
|
+
}
|
|
60
|
+
else if (chunks.length > 0) {
|
|
61
|
+
chunks[chunks.length - 1].lines.push(line);
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
if (!chunks.length || chunks[chunks.length - 1].heading !== "") {
|
|
65
|
+
chunks.push({ heading: "", level: 0, lines: [] });
|
|
66
|
+
}
|
|
67
|
+
chunks[chunks.length - 1].lines.push(line);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (chunks.length === 0) {
|
|
71
|
+
chunks.push({ heading: defaultTitle, level: 0, lines });
|
|
72
|
+
}
|
|
73
|
+
let fmTitle = "";
|
|
74
|
+
const fmMatch = raw.match(/^---\n([\s\S]*?)\n---/);
|
|
75
|
+
if (fmMatch) {
|
|
76
|
+
for (const fl of fmMatch[1].split("\n")) {
|
|
77
|
+
const ci = fl.indexOf(":");
|
|
78
|
+
if (ci < 0)
|
|
79
|
+
continue;
|
|
80
|
+
const k = fl.slice(0, ci).trim();
|
|
81
|
+
if (k === "title")
|
|
82
|
+
fmTitle = fl.slice(ci + 1).trim().replace(/['"]/g, "");
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
const result = [];
|
|
86
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
87
|
+
const ch = chunks[i];
|
|
88
|
+
let heading;
|
|
89
|
+
let level;
|
|
90
|
+
if (i === 0 && ch.heading === "" && ch.level === 0) {
|
|
91
|
+
heading = fmTitle || defaultTitle;
|
|
92
|
+
level = 0;
|
|
93
|
+
}
|
|
94
|
+
else if (i === 0 && ch.level > 0) {
|
|
95
|
+
heading = fmTitle || defaultTitle;
|
|
96
|
+
level = 0;
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
heading = ch.heading;
|
|
100
|
+
level = ch.level;
|
|
101
|
+
}
|
|
102
|
+
const headingClean = heading.replace(/^#+\s*/, "");
|
|
103
|
+
const rawText = heading ? `${heading}\n${ch.lines.join("\n")}` : ch.lines.join("\n");
|
|
104
|
+
const pathContext = relPath.replace(/\\/g, "/").replace(/\//g, " > ").replace(/\.md$/i, "");
|
|
105
|
+
const embedText = `[${pathContext}]\n${headingClean}\n${plainText(ch.lines.join("\n"), maxEmbedLen)}`;
|
|
106
|
+
result.push({
|
|
107
|
+
key: `${relPath.replace(/\\/g, "/")}###${i}`,
|
|
108
|
+
heading,
|
|
109
|
+
level,
|
|
110
|
+
embedText,
|
|
111
|
+
rawText,
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
return result;
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
return [{ key: relPath, heading: defaultTitle, level: 0, embedText: defaultTitle, rawText: defaultTitle }];
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* 批量生成 embedding 并持久化到 vectors.json
|
|
122
|
+
*/
|
|
123
|
+
export async function generateEmbeddings(sourceDir, entries) {
|
|
124
|
+
if (!getSemanticEnabled())
|
|
125
|
+
return 0;
|
|
126
|
+
if (!isAvailable()) {
|
|
127
|
+
const ok = await initialize();
|
|
128
|
+
if (!ok)
|
|
129
|
+
return 0;
|
|
130
|
+
}
|
|
131
|
+
const model = getCurrentModel();
|
|
132
|
+
const maxEmbedLen = Math.floor(model.maxTokens * 2); // 估算字符上限(中西混合保守值)
|
|
133
|
+
const existing = getEmbeddings();
|
|
134
|
+
const chunkInfo = getChunkInfo();
|
|
135
|
+
let generated = 0;
|
|
136
|
+
const toEmbed = [];
|
|
137
|
+
for (const entry of entries) {
|
|
138
|
+
const fullPath = resolve(sourceDir, entry.relPath);
|
|
139
|
+
// v5.4: 计算 MD5
|
|
140
|
+
let currentMD5 = "";
|
|
141
|
+
try {
|
|
142
|
+
currentMD5 = computeMD5(readFileSync(fullPath, "utf-8"));
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
continue;
|
|
146
|
+
}
|
|
147
|
+
try {
|
|
148
|
+
const fileMtime = statSync(fullPath).mtime.toISOString();
|
|
149
|
+
if (existing[entry.relPath] || existing[`${entry.relPath}###0`]) {
|
|
150
|
+
if (fileMtime === entry.mtime)
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
continue;
|
|
156
|
+
}
|
|
157
|
+
const chunks = await extractChunks(fullPath, entry.relPath, entry.title, maxEmbedLen);
|
|
158
|
+
// 更新 manifest:记录 MD5 + AST 分块
|
|
159
|
+
updateFileState(entry.relPath, {
|
|
160
|
+
md5: currentMD5,
|
|
161
|
+
astChunkCount: chunks.length,
|
|
162
|
+
astIndexedAt: new Date().toISOString(),
|
|
163
|
+
});
|
|
164
|
+
for (const ch of chunks) {
|
|
165
|
+
toEmbed.push({ key: ch.key, heading: ch.heading, level: ch.level, text: ch.embedText });
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
for (const { key, heading, level, text } of toEmbed) {
|
|
169
|
+
try {
|
|
170
|
+
const vec = await embed(text);
|
|
171
|
+
existing[key] = vec;
|
|
172
|
+
chunkInfo[key] = { heading, level };
|
|
173
|
+
generated++;
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
/* skip */
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// 清理旧文件级 key
|
|
180
|
+
for (const entry of entries) {
|
|
181
|
+
if (existing[entry.relPath] && existing[`${entry.relPath}###0`]) {
|
|
182
|
+
delete existing[entry.relPath];
|
|
183
|
+
delete chunkInfo[entry.relPath];
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
if (generated > 0) {
|
|
187
|
+
const model = getCurrentModel();
|
|
188
|
+
setEmbeddings(existing, model.hfRepo, model.dim);
|
|
189
|
+
setChunkInfo(chunkInfo);
|
|
190
|
+
}
|
|
191
|
+
// v5.4: 为所有文件补充 manifest(未重新 embed 的无记录文件也补上)
|
|
192
|
+
for (const entry of entries) {
|
|
193
|
+
if (!getFileState(entry.relPath)) {
|
|
194
|
+
try {
|
|
195
|
+
const fullPath = resolve(sourceDir, entry.relPath);
|
|
196
|
+
const raw = readFileSync(fullPath, "utf-8");
|
|
197
|
+
const md5 = computeMD5(raw);
|
|
198
|
+
const chunks = await extractChunks(fullPath, entry.relPath, entry.title, maxEmbedLen);
|
|
199
|
+
updateFileState(entry.relPath, {
|
|
200
|
+
md5,
|
|
201
|
+
astChunkCount: chunks.length,
|
|
202
|
+
astIndexedAt: new Date().toISOString(),
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
catch { /* skip */ }
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return generated;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* 为单个文件生成/更新 embedding
|
|
212
|
+
*/
|
|
213
|
+
export async function embedSingleFile(sourceDir, relPath, title) {
|
|
214
|
+
if (!getSemanticEnabled())
|
|
215
|
+
return false;
|
|
216
|
+
if (!isAvailable()) {
|
|
217
|
+
const ok = await initialize();
|
|
218
|
+
if (!ok)
|
|
219
|
+
return false;
|
|
220
|
+
}
|
|
221
|
+
const model = getCurrentModel();
|
|
222
|
+
const maxEmbedLen = Math.floor(model.maxTokens * 2);
|
|
223
|
+
const fullPath = resolve(sourceDir, relPath);
|
|
224
|
+
if (!existsSync(fullPath))
|
|
225
|
+
return false;
|
|
226
|
+
try {
|
|
227
|
+
const chunks = await extractChunks(fullPath, relPath, title, maxEmbedLen);
|
|
228
|
+
const existing = getEmbeddings();
|
|
229
|
+
const chunkInfo = getChunkInfo();
|
|
230
|
+
let ok = false;
|
|
231
|
+
for (const ch of chunks) {
|
|
232
|
+
const vec = await embed(ch.embedText);
|
|
233
|
+
existing[ch.key] = vec;
|
|
234
|
+
chunkInfo[ch.key] = { heading: ch.heading, level: ch.level };
|
|
235
|
+
ok = true;
|
|
236
|
+
}
|
|
237
|
+
if (ok) {
|
|
238
|
+
const model = getCurrentModel();
|
|
239
|
+
setEmbeddings(existing, model.hfRepo, model.dim);
|
|
240
|
+
setChunkInfo(chunkInfo);
|
|
241
|
+
}
|
|
242
|
+
return ok;
|
|
243
|
+
}
|
|
244
|
+
catch {
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
//# sourceMappingURL=indexer-embed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer-embed.js","sourceRoot":"","sources":["../src/indexer-embed.ts"],"names":[],"mappings":"AAAA,iCAAiC;AACjC,EAAE;AACF,wEAAwE;AAExE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC7D,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,kBAAkB,EAAE,aAAa,EAAE,aAAa,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAC1G,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAC/D,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACpD,OAAO,EAAE,eAAe,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAG/E,0BAA0B;AAC1B,MAAM,UAAU,GAAG,UAAU,CAAC;AAE9B,wBAAwB;AACxB,SAAS,SAAS,CAAC,IAAY,EAAE,MAAc;IAC7C,OAAO,IAAI;SACR,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC;SACjC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC;SACvC,OAAO,CAAC,yBAAyB,EAAE,IAAI,CAAC;SACxC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC;SAC7B,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC;SAC7B,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC;SACxB,IAAI,EAAE;SACN,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;AACtB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAgB,EAChB,OAAe,EACf,YAAoB,EACpB,WAAW,GAAG,GAAG;IAEjB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,SAAS;QACT,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;QAClF,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC3B,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,SAAS,EAAE,CAAC,CAAC,SAAS;gBACtB,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC,CAAC;QACN,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,2BAA2B;IAC7B,CAAC;IAED,kBAAkB;IAClB,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC5C,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,MAAM,MAAM,GAA0D,EAAE,CAAC;QAEzE,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;YACjC,IAAI,CAAC,EAAE,CAAC;gBACN,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;gBAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;gBAC9C,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YAC7C,CAAC;iBAAM,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7C,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,KAAK,EAAE,EAAE,CAAC;oBAC/D,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;gBACpD,CAAC;gBACD,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7C,CAAC;QACH,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,MAAM,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QACnD,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,MAAM,EAAE,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACxC,MAAM,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBAC3B,IAAI,EAAE,GAAG,CAAC;oBAAE,SAAS;gBACrB,MAAM,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBACjC,IAAI,CAAC,KAAK,OAAO;oBAAE,OAAO,GAAG,EAAE,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAA0F,EAAE,CAAC;QACzG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACrB,IAAI,OAAe,CAAC;YACpB,IAAI,KAAa,CAAC;YAElB,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,OAAO,KAAK,EAAE,IAAI,EAAE,CAAC,KAAK,KAAK,CAAC,EAAE,CAAC;gBACnD,OAAO,GAAG,OAAO,IAAI,YAAY,CAAC;gBAClC,KAAK,GAAG,CAAC,CAAC;YACZ,CAAC;iBAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;gBACnC,OAAO,GAAG,OAAO,IAAI,YAAY,CAAC;gBAClC,KAAK,GAAG,CAAC,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC;gBACrB,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC;YACnB,CAAC;YAED,MAAM,YAAY,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YACnD,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,KAAK,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrF,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;YAC5F,MAAM,SAAS,GAAG,IAAI,WAAW,MAAM,YAAY,KAAK,SAAS,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,WAAW,CAAC,EAAE,CAAC;YAEtG,MAAM,CAAC,IAAI,CAAC;gBACV,GAAG,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE;gBAC5C,OAAO;gBACP,KAAK;gBACL,SAAS;gBACT,OAAO;aACR,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,EAAE,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;IAC7G,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,SAAiB,EACjB,OAAoB;IAEpB,IAAI,CAAC,kBAAkB,EAAE;QAAE,OAAO,CAAC,CAAC;IAEpC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACnB,MAAM,EAAE,GAAG,MAAM,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE;YAAE,OAAO,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;IAChC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,kBAAkB;IAEvE,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;IACjC,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;IAEjC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,OAAO,GAAoE,EAAE,CAAC;IAEpF,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;QAEnD,eAAe;QACf,IAAI,UAAU,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,UAAU,GAAG,UAAU,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;YACzD,IAAI,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,QAAQ,CAAC,GAAG,KAAK,CAAC,OAAO,MAAM,CAAC,EAAE,CAAC;gBAChE,IAAI,SAAS,KAAK,KAAK,CAAC,KAAK;oBAAE,SAAS;YAC1C,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QAEtF,8BAA8B;QAC9B,eAAe,CAAC,KAAK,CAAC,OAAO,EAAE;YAC7B,GAAG,EAAE,UAAU;YACf,aAAa,EAAE,MAAM,CAAC,MAAM;YAC5B,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACvC,CAAC,CAAC;QAEH,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;YACxB,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,SAAS,EAAE,CAAC,CAAC;QAC1F,CAAC;IACH,CAAC;IAED,KAAK,MAAM,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,OAAO,EAAE,CAAC;QACpD,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC;YAC9B,QAAQ,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACpB,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;YACpC,SAAS,EAAE,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,UAAU;QACZ,CAAC;IACH,CAAC;IAED,aAAa;IACb,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,QAAQ,CAAC,GAAG,KAAK,CAAC,OAAO,MAAM,CAAC,EAAE,CAAC;YAChE,OAAO,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,OAAO,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;QAChC,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACjD,YAAY,CAAC,SAAS,CAAC,CAAC;IAC1B,CAAC;IAED,8CAA8C;IAC9C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;gBACnD,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAC5C,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;gBAC5B,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;gBACtF,eAAe,CAAC,KAAK,CAAC,OAAO,EAAE;oBAC7B,GAAG;oBACH,aAAa,EAAE,MAAM,CAAC,MAAM;oBAC5B,YAAY,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;iBACvC,CAAC,CAAC;YACL,CAAC;YAAC,MAAM,CAAC,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,SAAiB,EACjB,OAAe,EACf,KAAa;IAEb,IAAI,CAAC,kBAAkB,EAAE;QAAE,OAAO,KAAK,CAAC;IAExC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACnB,MAAM,EAAE,GAAG,MAAM,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,EAAE;YAAE,OAAO,KAAK,CAAC;IACxB,CAAC;IAED,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;IAChC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;IAEpD,MAAM,QAAQ,GAAG,OAAO,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IAC7C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,KAAK,CAAC;IAExC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;QAC1E,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;QACjC,MAAM,SAAS,GAAG,YAAY,EAAE,CAAC;QAEjC,IAAI,EAAE,GAAG,KAAK,CAAC;QACf,KAAK,MAAM,EAAE,IAAI,MAAM,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC;YACtC,QAAQ,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACvB,SAAS,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,EAAE,CAAC;YAC7D,EAAE,GAAG,IAAI,CAAC;QACZ,CAAC;QAED,IAAI,EAAE,EAAE,CAAC;YACP,MAAM,KAAK,GAAG,eAAe,EAAE,CAAC;YAChC,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;YACjD,YAAY,CAAC,SAAS,CAAC,CAAC;QAC1B,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer-scan.d.ts","sourceRoot":"","sources":["../src/indexer-scan.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AAI5C,kCAAkC;AAClC,wBAAsB,OAAO,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAIrE"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// indexer-scan.ts — 文件扫描器 (v5.4)
|
|
2
|
+
//
|
|
3
|
+
// 递归扫描目录,提取 .md 文件为 FileEntry
|
|
4
|
+
import { readFileSync } from "node:fs";
|
|
5
|
+
import { readdir, stat } from "node:fs/promises";
|
|
6
|
+
import { resolve } from "node:path";
|
|
7
|
+
import { parseFileEntry } from "./parser.js";
|
|
8
|
+
import { setContent } from "./content-cache.js";
|
|
9
|
+
/** 递归扫描目录,返回所有 .md 的 FileEntry */
|
|
10
|
+
export async function scanDir(sourceDir) {
|
|
11
|
+
const results = [];
|
|
12
|
+
await walk(sourceDir, sourceDir, results);
|
|
13
|
+
return results;
|
|
14
|
+
}
|
|
15
|
+
async function walk(root, dir, out) {
|
|
16
|
+
let items;
|
|
17
|
+
try {
|
|
18
|
+
items = await readdir(dir);
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
for (const name of items) {
|
|
24
|
+
const full = resolve(dir, name);
|
|
25
|
+
let st;
|
|
26
|
+
try {
|
|
27
|
+
st = await stat(full);
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
continue;
|
|
31
|
+
}
|
|
32
|
+
if (st.isDirectory()) {
|
|
33
|
+
if (name.startsWith(".") || name === "node_modules")
|
|
34
|
+
continue;
|
|
35
|
+
await walk(root, full, out);
|
|
36
|
+
}
|
|
37
|
+
else if (st.isFile() && name.endsWith(".md")) {
|
|
38
|
+
const entry = parseFileEntry(root, full, st.mtime.toISOString());
|
|
39
|
+
if (entry) {
|
|
40
|
+
out.push(entry);
|
|
41
|
+
try {
|
|
42
|
+
setContent(entry.relPath, readFileSync(full, "utf-8"));
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
/* skip */
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=indexer-scan.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer-scan.js","sourceRoot":"","sources":["../src/indexer-scan.ts"],"names":[],"mappings":"AAAA,iCAAiC;AACjC,EAAE;AACF,8BAA8B;AAE9B,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,kCAAkC;AAClC,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,SAAiB;IAC7C,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,MAAM,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;IAC1C,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,IAAI,CACjB,IAAY,EACZ,GAAW,EACX,GAAgB;IAEhB,IAAI,KAAe,CAAC;IACpB,IAAI,CAAC;QACH,KAAK,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;IACT,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAChC,IAAI,EAAE,CAAC;QACP,IAAI,CAAC;YACH,EAAE,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QACD,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;YACrB,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,KAAK,cAAc;gBAAE,SAAS;YAC9D,MAAM,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;QAC9B,CAAC;aAAM,IAAI,EAAE,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/C,MAAM,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;YACjE,IAAI,KAAK,EAAE,CAAC;gBACV,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAChB,IAAI,CAAC;oBACH,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;gBACzD,CAAC;gBAAC,MAAM,CAAC;oBACP,UAAU;gBACZ,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { scanDir } from "./indexer-scan.js";
|
|
2
|
+
export { extractChunks, generateEmbeddings, embedSingleFile } from "./indexer-embed.js";
|
|
3
|
+
export { getRawChunks, storeCompiledChunks, storeFileSegments, storeFileLLMVector } from "./indexer-compile.js";
|
|
4
|
+
//# sourceMappingURL=indexer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACxF,OAAO,EAAE,YAAY,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
|
package/dist/indexer.js
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
// indexer.ts — 索引导入 barrel (v5.4)
|
|
2
|
+
//
|
|
3
|
+
// 聚合 scan / embed / compile 三个子模块。
|
|
4
|
+
export { scanDir } from "./indexer-scan.js";
|
|
5
|
+
export { extractChunks, generateEmbeddings, embedSingleFile } from "./indexer-embed.js";
|
|
6
|
+
export { getRawChunks, storeCompiledChunks, storeFileSegments, storeFileLLMVector } from "./indexer-compile.js";
|
|
7
|
+
//# sourceMappingURL=indexer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"indexer.js","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAClC,EAAE;AACF,mCAAmC;AAEnC,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACxF,OAAO,EAAE,YAAY,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export interface ModelInfo {
|
|
2
|
+
/** 短标识符: "bge-base-zh-v1.5" */
|
|
3
|
+
id: string;
|
|
4
|
+
/** 显示名 */
|
|
5
|
+
name: string;
|
|
6
|
+
/** HuggingFace 仓库 ID(transformers.js 兼容的 ONNX 转换版) */
|
|
7
|
+
hfRepo: string;
|
|
8
|
+
/** 向量维度 */
|
|
9
|
+
dim: number;
|
|
10
|
+
/** 一行描述 */
|
|
11
|
+
description: string;
|
|
12
|
+
/** 支持语言 */
|
|
13
|
+
languages: string[];
|
|
14
|
+
/** 最大输入 token 数 */
|
|
15
|
+
maxTokens: number;
|
|
16
|
+
/** INT8 量化版 ONNX 文件大小 (bytes, 约) */
|
|
17
|
+
int8Size: number;
|
|
18
|
+
/** FP32 全精度 ONNX 文件大小 (bytes, 约) */
|
|
19
|
+
fp32Size: number;
|
|
20
|
+
}
|
|
21
|
+
export declare const BUILTIN_MODELS: ModelInfo[];
|
|
22
|
+
/** 获取所有内置模型 */
|
|
23
|
+
export declare function getBuiltinModels(): ModelInfo[];
|
|
24
|
+
/** 按 id 查找模型 */
|
|
25
|
+
export declare function findModel(id: string | undefined): ModelInfo | undefined;
|
|
26
|
+
/** 获取当前选中的模型元信息 */
|
|
27
|
+
export declare function getCurrentModel(): ModelInfo;
|
|
28
|
+
/** 切换模型 — 返回新模型信息,若 id 不存在返回 null */
|
|
29
|
+
export declare function selectModel(id: string): ModelInfo | null;
|
|
30
|
+
/** 默认模型 id */
|
|
31
|
+
export declare function getDefaultModelId(): string;
|
|
32
|
+
//# sourceMappingURL=model-registry.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-registry.d.ts","sourceRoot":"","sources":["../src/model-registry.ts"],"names":[],"mappings":"AAYA,MAAM,WAAW,SAAS;IACxB,+BAA+B;IAC/B,EAAE,EAAE,MAAM,CAAC;IACX,UAAU;IACV,IAAI,EAAE,MAAM,CAAC;IACb,sDAAsD;IACtD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW;IACX,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW;IACX,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,mBAAmB;IACnB,SAAS,EAAE,MAAM,CAAC;IAClB,oCAAoC;IACpC,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,QAAQ,EAAE,MAAM,CAAC;CAClB;AAID,eAAO,MAAM,cAAc,EAAE,SAAS,EA6CrC,CAAC;AAIF,eAAe;AACf,wBAAgB,gBAAgB,IAAI,SAAS,EAAE,CAE9C;AAED,gBAAgB;AAChB,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,GAAG,SAAS,CAEvE;AAED,mBAAmB;AACnB,wBAAgB,eAAe,IAAI,SAAS,CAG3C;AAED,qCAAqC;AACrC,wBAAgB,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,CAKxD;AAED,cAAc;AACd,wBAAgB,iBAAiB,IAAI,MAAM,CAE1C"}
|