@hsingjui/contextweaver 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/{SearchService-CMHVTNSM.js → SearchService-DYGJT2DZ.js} +269 -241
- package/dist/chunk-C7XDGBT5.js +172 -0
- package/dist/{chunk-4XOWK7YZ.js → chunk-O3HDM3CF.js} +103 -72
- package/dist/{chunk-NHQE2IOY.js → chunk-QWQ64TBE.js} +266 -275
- package/dist/{chunk-ESTPBFCW.js → chunk-SKBAE26T.js} +130 -28
- package/dist/{chunk-AVBQ77MN.js → chunk-VW5RACJC.js} +119 -225
- package/dist/{chunk-RU4OUBZS.js → chunk-WWYSLCNZ.js} +131 -45
- package/dist/{codebaseRetrieval-NLSYDBF4.js → codebaseRetrieval-AV4GK6FT.js} +4 -3
- package/dist/{config-BQCFTZVY.js → config-LCOJHTCF.js} +1 -3
- package/dist/index.js +47 -32
- package/dist/lock-PX2BX2YN.js +106 -0
- package/dist/scanner-HYP3L57R.js +10 -0
- package/dist/{server-LC4NWEGV.js → server-7PYHHTOM.js} +7 -8
- package/package.json +5 -3
- package/dist/scanner-CXUYLK56.js +0 -9
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
// src/config.ts
|
|
2
|
-
import dotenv from "dotenv";
|
|
3
|
-
import path from "path";
|
|
4
|
-
import os from "os";
|
|
5
2
|
import fs from "fs";
|
|
3
|
+
import os from "os";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import dotenv from "dotenv";
|
|
6
6
|
var isDev = process.env.NODE_ENV === "dev";
|
|
7
7
|
var isMcpMode = process.argv.includes("mcp");
|
|
8
8
|
function loadEnv() {
|
|
@@ -79,8 +79,8 @@ function getEmbeddingConfig() {
|
|
|
79
79
|
apiKey,
|
|
80
80
|
baseUrl,
|
|
81
81
|
model,
|
|
82
|
-
maxConcurrency: isNaN(maxConcurrency) ? 4 : maxConcurrency,
|
|
83
|
-
dimensions: isNaN(dimensions) ? 1024 : dimensions
|
|
82
|
+
maxConcurrency: Number.isNaN(maxConcurrency) ? 4 : maxConcurrency,
|
|
83
|
+
dimensions: Number.isNaN(dimensions) ? 1024 : dimensions
|
|
84
84
|
};
|
|
85
85
|
}
|
|
86
86
|
function getRerankerConfig() {
|
|
@@ -101,20 +101,63 @@ function getRerankerConfig() {
|
|
|
101
101
|
apiKey,
|
|
102
102
|
baseUrl,
|
|
103
103
|
model,
|
|
104
|
-
topN: isNaN(topN) ? 10 : topN
|
|
104
|
+
topN: Number.isNaN(topN) ? 10 : topN
|
|
105
105
|
};
|
|
106
106
|
}
|
|
107
107
|
var DEFAULT_EXCLUDE_PATTERNS = [
|
|
108
|
-
//
|
|
108
|
+
// --- 1. 依赖与环境 (绝对黑名单) ---
|
|
109
|
+
"node_modules",
|
|
110
|
+
"bower_components",
|
|
109
111
|
".venv",
|
|
110
112
|
"venv",
|
|
111
|
-
|
|
112
|
-
"env",
|
|
113
|
-
|
|
114
|
-
//
|
|
113
|
+
// Python 虚拟环境
|
|
114
|
+
".env.*",
|
|
115
|
+
// 环境变量文件 (.env.local, .env.production 等)
|
|
116
|
+
// --- 2. 锁文件 (Token 杀手,且语义密度极低) ---
|
|
117
|
+
"package-lock.json",
|
|
118
|
+
"yarn.lock",
|
|
119
|
+
"pnpm-lock.yaml",
|
|
120
|
+
"bun.lockb",
|
|
121
|
+
"poetry.lock",
|
|
122
|
+
"Gemfile.lock",
|
|
123
|
+
"composer.lock",
|
|
124
|
+
"Cargo.lock",
|
|
125
|
+
// --- 3. 版本控制与 IDE ---
|
|
115
126
|
".git",
|
|
116
127
|
".svn",
|
|
117
128
|
".hg",
|
|
129
|
+
".idea",
|
|
130
|
+
".vscode",
|
|
131
|
+
".vs",
|
|
132
|
+
// --- 4. 构建产物与缓存 ---
|
|
133
|
+
// 通用构建输出
|
|
134
|
+
"dist",
|
|
135
|
+
"build",
|
|
136
|
+
"out",
|
|
137
|
+
"target",
|
|
138
|
+
// 编译产物
|
|
139
|
+
"*.pyc",
|
|
140
|
+
"*.pyo",
|
|
141
|
+
"*.pyd",
|
|
142
|
+
"*.so",
|
|
143
|
+
"*.dll",
|
|
144
|
+
"*.exe",
|
|
145
|
+
"*.bin",
|
|
146
|
+
"*.wasm",
|
|
147
|
+
// 现代前端框架产物
|
|
148
|
+
".next",
|
|
149
|
+
".nuxt",
|
|
150
|
+
".output",
|
|
151
|
+
".svelte-kit",
|
|
152
|
+
// Bundler 缓存
|
|
153
|
+
".turbo",
|
|
154
|
+
".parcel-cache",
|
|
155
|
+
".webpack",
|
|
156
|
+
".esbuild",
|
|
157
|
+
".rollup.cache",
|
|
158
|
+
// 测试覆盖率
|
|
159
|
+
"coverage",
|
|
160
|
+
".nyc_output",
|
|
118
161
|
// Python 缓存
|
|
119
162
|
"__pycache__",
|
|
120
163
|
".pytest_cache",
|
|
@@ -122,24 +165,84 @@ var DEFAULT_EXCLUDE_PATTERNS = [
|
|
|
122
165
|
".tox",
|
|
123
166
|
".eggs",
|
|
124
167
|
"*.egg-info",
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
"
|
|
128
|
-
"
|
|
129
|
-
"
|
|
130
|
-
//
|
|
131
|
-
"
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
|
|
168
|
+
// --- 5. 纯噪音文件 (无文本语义) ---
|
|
169
|
+
// 压缩文件与 SourceMap
|
|
170
|
+
"*.min.js",
|
|
171
|
+
"*.min.css",
|
|
172
|
+
"*.map",
|
|
173
|
+
// 图片与多媒体
|
|
174
|
+
"*.svg",
|
|
175
|
+
"*.png",
|
|
176
|
+
"*.jpg",
|
|
177
|
+
"*.jpeg",
|
|
178
|
+
"*.gif",
|
|
179
|
+
"*.ico",
|
|
180
|
+
"*.webp",
|
|
181
|
+
"*.bmp",
|
|
182
|
+
"*.pdf",
|
|
183
|
+
"*.mp3",
|
|
184
|
+
"*.mp4",
|
|
185
|
+
"*.wav",
|
|
186
|
+
"*.webm",
|
|
187
|
+
"*.ogg",
|
|
188
|
+
"*.flac",
|
|
189
|
+
// 字体文件
|
|
190
|
+
"*.woff",
|
|
191
|
+
"*.woff2",
|
|
192
|
+
"*.ttf",
|
|
193
|
+
"*.eot",
|
|
194
|
+
"*.otf",
|
|
195
|
+
// 压缩包
|
|
196
|
+
"*.zip",
|
|
197
|
+
"*.tar",
|
|
198
|
+
"*.gz",
|
|
199
|
+
"*.rar",
|
|
200
|
+
"*.7z",
|
|
201
|
+
// 系统垃圾
|
|
135
202
|
".DS_Store",
|
|
136
203
|
"Thumbs.db",
|
|
137
|
-
//
|
|
138
|
-
|
|
139
|
-
"
|
|
140
|
-
"*.
|
|
141
|
-
|
|
142
|
-
"
|
|
204
|
+
// --- 6. 测试噪音 (保留 *.test.ts,但剔除这些) ---
|
|
205
|
+
// Jest 快照
|
|
206
|
+
"__snapshots__",
|
|
207
|
+
"*.snap",
|
|
208
|
+
// 测试夹具与数据
|
|
209
|
+
"test/fixtures",
|
|
210
|
+
"tests/fixtures",
|
|
211
|
+
"__fixtures__",
|
|
212
|
+
"test/data",
|
|
213
|
+
"tests/data",
|
|
214
|
+
"testdata",
|
|
215
|
+
"test-data",
|
|
216
|
+
"testutils",
|
|
217
|
+
// Mock 数据
|
|
218
|
+
"mock",
|
|
219
|
+
"mocks",
|
|
220
|
+
"__mocks__",
|
|
221
|
+
"stub",
|
|
222
|
+
"stubs",
|
|
223
|
+
// --- 7. 第三方与生成文件 ---
|
|
224
|
+
// 第三方依赖目录
|
|
225
|
+
"vendor",
|
|
226
|
+
"vendors",
|
|
227
|
+
"third_party",
|
|
228
|
+
"thirdparty",
|
|
229
|
+
"3rdparty",
|
|
230
|
+
"external",
|
|
231
|
+
"externals",
|
|
232
|
+
// 生成文件
|
|
233
|
+
"generated",
|
|
234
|
+
"gen",
|
|
235
|
+
"auto-generated",
|
|
236
|
+
"*.generated.ts",
|
|
237
|
+
"*.generated.js",
|
|
238
|
+
"*.pb.go",
|
|
239
|
+
"*.pb.ts",
|
|
240
|
+
// protobuf 生成
|
|
241
|
+
// --- 8. 日志与临时文件 ---
|
|
242
|
+
"*.log",
|
|
243
|
+
".cache",
|
|
244
|
+
".tmp",
|
|
245
|
+
"tmp"
|
|
143
246
|
];
|
|
144
247
|
function getExcludePatterns() {
|
|
145
248
|
const envPatterns = process.env.IGNORE_PATTERNS;
|
|
@@ -158,6 +261,5 @@ export {
|
|
|
158
261
|
checkRerankerEnv,
|
|
159
262
|
getEmbeddingConfig,
|
|
160
263
|
getRerankerConfig,
|
|
161
|
-
DEFAULT_EXCLUDE_PATTERNS,
|
|
162
264
|
getExcludePatterns
|
|
163
265
|
};
|
|
@@ -1,172 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
} from "./chunk-ESTPBFCW.js";
|
|
5
|
-
|
|
6
|
-
// src/utils/logger.ts
|
|
7
|
-
import pino from "pino";
|
|
8
|
-
import path from "path";
|
|
9
|
-
import os from "os";
|
|
10
|
-
import fs from "fs";
|
|
11
|
-
import { Writable } from "stream";
|
|
12
|
-
var logLevel = isDev ? "debug" : "info";
|
|
13
|
-
var logDir = path.join(os.homedir(), ".contextweaver", "logs");
|
|
14
|
-
var LOG_RETENTION_DAYS = 7;
|
|
15
|
-
function ensureLogDir(dir) {
|
|
16
|
-
if (!fs.existsSync(dir)) {
|
|
17
|
-
fs.mkdirSync(dir, { recursive: true });
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
function getLogFileName() {
|
|
21
|
-
const now = /* @__PURE__ */ new Date();
|
|
22
|
-
const dateStr = now.toISOString().split("T")[0];
|
|
23
|
-
return `app.${dateStr}.log`;
|
|
24
|
-
}
|
|
25
|
-
function formatTime() {
|
|
26
|
-
const now = /* @__PURE__ */ new Date();
|
|
27
|
-
const pad = (n) => n.toString().padStart(2, "0");
|
|
28
|
-
return `${now.getFullYear()}-${pad(now.getMonth() + 1)}-${pad(now.getDate())} ${pad(now.getHours())}:${pad(now.getMinutes())}:${pad(now.getSeconds())}`;
|
|
29
|
-
}
|
|
30
|
-
function getLevelLabel(level) {
|
|
31
|
-
const labels = {
|
|
32
|
-
10: "TRACE",
|
|
33
|
-
20: "DEBUG",
|
|
34
|
-
30: "INFO",
|
|
35
|
-
40: "WARN",
|
|
36
|
-
50: "ERROR",
|
|
37
|
-
60: "FATAL"
|
|
38
|
-
};
|
|
39
|
-
return labels[level] || "INFO";
|
|
40
|
-
}
|
|
41
|
-
function cleanupOldLogs(dir) {
|
|
42
|
-
try {
|
|
43
|
-
if (!fs.existsSync(dir)) return;
|
|
44
|
-
const files = fs.readdirSync(dir);
|
|
45
|
-
const now = Date.now();
|
|
46
|
-
const maxAge = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1e3;
|
|
47
|
-
const logPattern = /^app\.(\d{4}-\d{2}-\d{2})\.log$/;
|
|
48
|
-
for (const file of files) {
|
|
49
|
-
const match = file.match(logPattern);
|
|
50
|
-
if (!match) continue;
|
|
51
|
-
const dateStr = match[1];
|
|
52
|
-
const fileDate = new Date(dateStr).getTime();
|
|
53
|
-
if (isNaN(fileDate)) continue;
|
|
54
|
-
if (now - fileDate > maxAge) {
|
|
55
|
-
const filePath = path.join(dir, file);
|
|
56
|
-
try {
|
|
57
|
-
fs.unlinkSync(filePath);
|
|
58
|
-
console.error(`[Logger] \u6E05\u7406\u8FC7\u671F\u65E5\u5FD7: ${file}`);
|
|
59
|
-
} catch {
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
} catch {
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
function createFormattedStream(filePath) {
|
|
67
|
-
const writeStream = fs.createWriteStream(filePath, { flags: "a" });
|
|
68
|
-
return new Writable({
|
|
69
|
-
write(chunk, encoding, callback) {
|
|
70
|
-
try {
|
|
71
|
-
const log = JSON.parse(chunk.toString());
|
|
72
|
-
const time = formatTime();
|
|
73
|
-
const level = getLevelLabel(log.level);
|
|
74
|
-
const msg = log.msg || "";
|
|
75
|
-
const { level: _l, time: _t, pid: _p, hostname: _h, name: _n, msg: _m, ...extra } = log;
|
|
76
|
-
let line = `${time} [${level}] ${msg}`;
|
|
77
|
-
if (Object.keys(extra).length > 0) {
|
|
78
|
-
const extraLines = JSON.stringify(extra, null, 2).split("\n").map((l, i) => i === 0 ? l : " " + l).join("\n");
|
|
79
|
-
line += `
|
|
80
|
-
${extraLines}`;
|
|
81
|
-
}
|
|
82
|
-
writeStream.write(line + "\n", callback);
|
|
83
|
-
} catch {
|
|
84
|
-
writeStream.write(chunk.toString(), callback);
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
function createConsoleStream() {
|
|
90
|
-
const colors = {
|
|
91
|
-
10: "\x1B[90m",
|
|
92
|
-
// TRACE - 灰色
|
|
93
|
-
20: "\x1B[36m",
|
|
94
|
-
// DEBUG - 青色
|
|
95
|
-
30: "\x1B[32m",
|
|
96
|
-
// INFO - 绿色
|
|
97
|
-
40: "\x1B[33m",
|
|
98
|
-
// WARN - 黄色
|
|
99
|
-
50: "\x1B[31m",
|
|
100
|
-
// ERROR - 红色
|
|
101
|
-
60: "\x1B[35m"
|
|
102
|
-
// FATAL - 品红
|
|
103
|
-
};
|
|
104
|
-
const reset = "\x1B[0m";
|
|
105
|
-
return new Writable({
|
|
106
|
-
write(chunk, encoding, callback) {
|
|
107
|
-
try {
|
|
108
|
-
const log = JSON.parse(chunk.toString());
|
|
109
|
-
const time = formatTime();
|
|
110
|
-
const level = getLevelLabel(log.level);
|
|
111
|
-
const color = colors[log.level] || "";
|
|
112
|
-
const msg = log.msg || "";
|
|
113
|
-
const { level: _l, time: _t, pid: _p, hostname: _h, name: _n, msg: _m, ...extra } = log;
|
|
114
|
-
let line = `${color}${time} [${level}]${reset} ${msg}`;
|
|
115
|
-
if (Object.keys(extra).length > 0) {
|
|
116
|
-
const extraStr = JSON.stringify(extra);
|
|
117
|
-
line += ` ${color}${extraStr}${reset}`;
|
|
118
|
-
}
|
|
119
|
-
process.stdout.write(line + "\n", callback);
|
|
120
|
-
} catch {
|
|
121
|
-
process.stdout.write(chunk.toString(), callback);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
function createDevLogger() {
|
|
127
|
-
ensureLogDir(logDir);
|
|
128
|
-
cleanupOldLogs(logDir);
|
|
129
|
-
const logPath = path.join(logDir, getLogFileName());
|
|
130
|
-
const logStream = createFormattedStream(logPath);
|
|
131
|
-
const consoleStream = createConsoleStream();
|
|
132
|
-
return pino(
|
|
133
|
-
{
|
|
134
|
-
level: logLevel,
|
|
135
|
-
name: "contextweaver"
|
|
136
|
-
},
|
|
137
|
-
// MCP 模式下禁用控制台输出,避免污染 STDIO 协议流
|
|
138
|
-
isMcpMode ? logStream : pino.multistream([
|
|
139
|
-
{ stream: logStream, level: logLevel },
|
|
140
|
-
{ stream: consoleStream, level: logLevel }
|
|
141
|
-
])
|
|
142
|
-
);
|
|
143
|
-
}
|
|
144
|
-
function createProdLogger() {
|
|
145
|
-
ensureLogDir(logDir);
|
|
146
|
-
cleanupOldLogs(logDir);
|
|
147
|
-
const logPath = path.join(logDir, getLogFileName());
|
|
148
|
-
const logStream = createFormattedStream(logPath);
|
|
149
|
-
const consoleStream = createConsoleStream();
|
|
150
|
-
return pino(
|
|
151
|
-
{
|
|
152
|
-
level: logLevel,
|
|
153
|
-
name: "contextweaver"
|
|
154
|
-
},
|
|
155
|
-
// MCP 模式下禁用控制台输出,避免污染 STDIO 协议流
|
|
156
|
-
isMcpMode ? logStream : pino.multistream([
|
|
157
|
-
{ stream: logStream, level: logLevel },
|
|
158
|
-
{ stream: consoleStream, level: logLevel }
|
|
159
|
-
])
|
|
160
|
-
);
|
|
161
|
-
}
|
|
162
|
-
var logger = isDev ? createDevLogger() : createProdLogger();
|
|
163
|
-
var info = logger.info.bind(logger);
|
|
164
|
-
var warn = logger.warn.bind(logger);
|
|
165
|
-
var error = logger.error.bind(logger);
|
|
166
|
-
var debug = logger.debug.bind(logger);
|
|
167
|
-
function isDebugEnabled() {
|
|
168
|
-
return logger.isLevelEnabled("debug");
|
|
169
|
-
}
|
|
2
|
+
logger
|
|
3
|
+
} from "./chunk-C7XDGBT5.js";
|
|
170
4
|
|
|
171
5
|
// src/search/fts.ts
|
|
172
6
|
var tokenizerCache = /* @__PURE__ */ new WeakMap();
|
|
@@ -181,7 +15,7 @@ function detectFtsTokenizer(db) {
|
|
|
181
15
|
`);
|
|
182
16
|
tokenizer = "trigram";
|
|
183
17
|
logger.debug("FTS tokenizer: trigram \u53EF\u7528");
|
|
184
|
-
} catch (
|
|
18
|
+
} catch (_err) {
|
|
185
19
|
tokenizer = "unicode61";
|
|
186
20
|
logger.debug("FTS tokenizer: \u964D\u7EA7\u5230 unicode61");
|
|
187
21
|
}
|
|
@@ -274,10 +108,13 @@ function searchChunksFts(db, query, limit) {
|
|
|
274
108
|
logger.debug("Chunk FTS \u5206\u8BCD\u540E\u65E0\u6709\u6548 token\uFF0C\u8DF3\u8FC7\u641C\u7D22");
|
|
275
109
|
return [];
|
|
276
110
|
}
|
|
277
|
-
logger.debug(
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
111
|
+
logger.debug(
|
|
112
|
+
{
|
|
113
|
+
rawQuery: query,
|
|
114
|
+
tokens
|
|
115
|
+
},
|
|
116
|
+
"Chunk FTS \u5206\u8BCD\u7ED3\u679C"
|
|
117
|
+
);
|
|
281
118
|
const runQuery = (qStr, queryLimit) => {
|
|
282
119
|
try {
|
|
283
120
|
const rows = db.prepare(`
|
|
@@ -299,7 +136,7 @@ function searchChunksFts(db, query, limit) {
|
|
|
299
136
|
}
|
|
300
137
|
};
|
|
301
138
|
const strictQuery = tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" AND ");
|
|
302
|
-
|
|
139
|
+
const results = runQuery(strictQuery, limit);
|
|
303
140
|
logger.debug({ type: "strict", count: results.length, query: strictQuery }, "Chunk FTS \u7CBE\u51C6\u641C\u7D22");
|
|
304
141
|
if (results.length < limit && tokens.length > 1) {
|
|
305
142
|
const beforeCount = results.length;
|
|
@@ -314,16 +151,22 @@ function searchChunksFts(db, query, limit) {
|
|
|
314
151
|
existingIds.add(row.chunkId);
|
|
315
152
|
}
|
|
316
153
|
}
|
|
317
|
-
logger.debug(
|
|
154
|
+
logger.debug(
|
|
155
|
+
{ type: "relaxed", added: results.length - beforeCount, query: relaxedQuery },
|
|
156
|
+
"Chunk FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55"
|
|
157
|
+
);
|
|
318
158
|
}
|
|
319
|
-
logger.debug(
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
159
|
+
logger.debug(
|
|
160
|
+
{
|
|
161
|
+
chunkCount: results.length,
|
|
162
|
+
topChunks: results.slice(0, 5).map((r) => ({
|
|
163
|
+
path: r.filePath.split("/").slice(-2).join("/"),
|
|
164
|
+
chunkIndex: r.chunkIndex,
|
|
165
|
+
bm25: r.score.toFixed(3)
|
|
166
|
+
}))
|
|
167
|
+
},
|
|
168
|
+
"Chunk FTS \u53EC\u56DE\u7ED3\u679C"
|
|
169
|
+
);
|
|
327
170
|
return results.sort((a, b) => b.score - a.score);
|
|
328
171
|
}
|
|
329
172
|
function batchUpsertFileFts(db, files) {
|
|
@@ -340,14 +183,14 @@ function batchUpsertFileFts(db, files) {
|
|
|
340
183
|
function batchDeleteFileFts(db, paths) {
|
|
341
184
|
const stmt = db.prepare("DELETE FROM files_fts WHERE path = ?");
|
|
342
185
|
const transaction = db.transaction((items) => {
|
|
343
|
-
for (const
|
|
344
|
-
stmt.run(
|
|
186
|
+
for (const path2 of items) {
|
|
187
|
+
stmt.run(path2);
|
|
345
188
|
}
|
|
346
189
|
});
|
|
347
190
|
transaction(paths);
|
|
348
191
|
}
|
|
349
192
|
function sanitizeQuery(query) {
|
|
350
|
-
return query.replace(/[():"
|
|
193
|
+
return query.replace(/[():"*^./\\:@#$%&=+[\]{}<>|~`!?,;]/g, " ").replace(/\b(AND|OR|NOT|NEAR)\b/gi, " ").replace(/\s+/g, " ").trim();
|
|
351
194
|
}
|
|
352
195
|
var zhSegmenter = null;
|
|
353
196
|
function getZhSegmenter() {
|
|
@@ -360,29 +203,68 @@ function getZhSegmenter() {
|
|
|
360
203
|
}
|
|
361
204
|
return zhSegmenter;
|
|
362
205
|
}
|
|
206
|
+
function toSnakeCase(str) {
|
|
207
|
+
return str.replace(/([a-z])([A-Z])/g, "$1_$2").replace(/([A-Z]+)([A-Z][a-z])/g, "$1_$2").toLowerCase();
|
|
208
|
+
}
|
|
209
|
+
function toCamelCase(str) {
|
|
210
|
+
return str.toLowerCase().replace(/_([a-z])/g, (_, c) => c.toUpperCase());
|
|
211
|
+
}
|
|
212
|
+
function generateVariants(token) {
|
|
213
|
+
const variants = [token.toLowerCase()];
|
|
214
|
+
const stripped = token.replace(/[._-]/g, "").toLowerCase();
|
|
215
|
+
if (stripped !== token.toLowerCase() && stripped.length > 0) {
|
|
216
|
+
variants.push(stripped);
|
|
217
|
+
}
|
|
218
|
+
if (/[a-z][A-Z]/.test(token)) {
|
|
219
|
+
const snake = toSnakeCase(token);
|
|
220
|
+
if (!variants.includes(snake)) {
|
|
221
|
+
variants.push(snake);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
if (/_/.test(token)) {
|
|
225
|
+
const camel = toCamelCase(token);
|
|
226
|
+
if (!variants.includes(camel)) {
|
|
227
|
+
variants.push(camel);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return variants;
|
|
231
|
+
}
|
|
363
232
|
function segmentQuery(query) {
|
|
364
233
|
const uniqueTokens = /* @__PURE__ */ new Set();
|
|
365
234
|
const cleanRaw = sanitizeQuery(query);
|
|
366
235
|
if (!cleanRaw) return [];
|
|
367
|
-
query.split(/\s+/)
|
|
368
|
-
if (/[._
|
|
369
|
-
|
|
236
|
+
for (const t of query.split(/\s+/)) {
|
|
237
|
+
if (/[._/]/.test(t) || /[a-z][A-Z]/.test(t)) {
|
|
238
|
+
const variants = generateVariants(t);
|
|
239
|
+
for (const v of variants) {
|
|
240
|
+
uniqueTokens.add(v);
|
|
241
|
+
}
|
|
370
242
|
}
|
|
371
|
-
}
|
|
243
|
+
}
|
|
372
244
|
const segmenter = getZhSegmenter();
|
|
373
245
|
if (segmenter) {
|
|
374
246
|
const segments = segmenter.segment(cleanRaw);
|
|
375
247
|
for (const seg of segments) {
|
|
376
248
|
if (seg.isWordLike) {
|
|
377
249
|
const t = seg.segment.toLowerCase();
|
|
378
|
-
if (t.trim().length > 0)
|
|
250
|
+
if (t.trim().length > 0) {
|
|
251
|
+
const variants = generateVariants(seg.segment);
|
|
252
|
+
for (const v of variants) {
|
|
253
|
+
uniqueTokens.add(v);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
379
256
|
}
|
|
380
257
|
}
|
|
381
258
|
} else {
|
|
382
259
|
logger.warn("Intl.Segmenter \u4E0D\u53EF\u7528\uFF0C\u4E2D\u6587\u641C\u7D22\u5C06\u9000\u5316\u4E3A\u7CBE\u786E\u5339\u914D");
|
|
383
|
-
cleanRaw.split(/[\s\p{P}]+/u)
|
|
384
|
-
if (t.length > 0)
|
|
385
|
-
|
|
260
|
+
for (const t of cleanRaw.split(/[\s\p{P}]+/u)) {
|
|
261
|
+
if (t.length > 0) {
|
|
262
|
+
const variants = generateVariants(t);
|
|
263
|
+
for (const v of variants) {
|
|
264
|
+
uniqueTokens.add(v);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
386
268
|
}
|
|
387
269
|
return Array.from(uniqueTokens);
|
|
388
270
|
}
|
|
@@ -392,10 +274,13 @@ function searchFilesFts(db, query, limit) {
|
|
|
392
274
|
logger.debug("FTS \u5206\u8BCD\u540E\u65E0\u6709\u6548 token\uFF0C\u8DF3\u8FC7\u641C\u7D22");
|
|
393
275
|
return [];
|
|
394
276
|
}
|
|
395
|
-
logger.debug(
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
277
|
+
logger.debug(
|
|
278
|
+
{
|
|
279
|
+
rawQuery: query,
|
|
280
|
+
tokens
|
|
281
|
+
},
|
|
282
|
+
"FTS \u5206\u8BCD\u7ED3\u679C"
|
|
283
|
+
);
|
|
399
284
|
const runQuery = (qStr, queryLimit) => {
|
|
400
285
|
try {
|
|
401
286
|
const rows = db.prepare(`
|
|
@@ -406,12 +291,12 @@ function searchFilesFts(db, query, limit) {
|
|
|
406
291
|
LIMIT ?
|
|
407
292
|
`).all(qStr, queryLimit);
|
|
408
293
|
return rows.map((r) => ({ path: r.path, score: -r.score }));
|
|
409
|
-
} catch (
|
|
294
|
+
} catch (_e) {
|
|
410
295
|
return [];
|
|
411
296
|
}
|
|
412
297
|
};
|
|
413
298
|
const strictQuery = tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" AND ");
|
|
414
|
-
|
|
299
|
+
const results = runQuery(strictQuery, limit);
|
|
415
300
|
logger.debug({ type: "strict", count: results.length, query: strictQuery }, "FTS \u7CBE\u51C6\u641C\u7D22");
|
|
416
301
|
if (results.length < limit && tokens.length > 1) {
|
|
417
302
|
const beforeCount = results.length;
|
|
@@ -426,15 +311,21 @@ function searchFilesFts(db, query, limit) {
|
|
|
426
311
|
existingPaths.add(row.path);
|
|
427
312
|
}
|
|
428
313
|
}
|
|
429
|
-
logger.debug(
|
|
314
|
+
logger.debug(
|
|
315
|
+
{ type: "relaxed", added: results.length - beforeCount, query: relaxedQuery },
|
|
316
|
+
"FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55"
|
|
317
|
+
);
|
|
430
318
|
}
|
|
431
|
-
logger.debug(
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
319
|
+
logger.debug(
|
|
320
|
+
{
|
|
321
|
+
fileCount: results.length,
|
|
322
|
+
topFiles: results.slice(0, 5).map((r) => ({
|
|
323
|
+
path: r.path.split("/").slice(-2).join("/"),
|
|
324
|
+
bm25: r.score.toFixed(3)
|
|
325
|
+
}))
|
|
326
|
+
},
|
|
327
|
+
"FTS \u53EC\u56DE\u7ED3\u679C"
|
|
328
|
+
);
|
|
438
329
|
return results.sort((a, b) => b.score - a.score);
|
|
439
330
|
}
|
|
440
331
|
function isFtsInitialized(db) {
|
|
@@ -446,23 +337,23 @@ function isFtsInitialized(db) {
|
|
|
446
337
|
}
|
|
447
338
|
|
|
448
339
|
// src/db/index.ts
|
|
449
|
-
import Database from "better-sqlite3";
|
|
450
|
-
import path2 from "path";
|
|
451
|
-
import os2 from "os";
|
|
452
|
-
import fs2 from "fs";
|
|
453
340
|
import crypto from "crypto";
|
|
454
|
-
|
|
341
|
+
import fs from "fs";
|
|
342
|
+
import os from "os";
|
|
343
|
+
import path from "path";
|
|
344
|
+
import Database from "better-sqlite3";
|
|
345
|
+
var BASE_DIR = path.join(os.homedir(), ".contextweaver");
|
|
455
346
|
function getDirectoryBirthtime(projectPath) {
|
|
456
|
-
const gitDir =
|
|
347
|
+
const gitDir = path.join(projectPath, ".git");
|
|
457
348
|
try {
|
|
458
|
-
const gitStats =
|
|
349
|
+
const gitStats = fs.statSync(gitDir);
|
|
459
350
|
if (gitStats.isDirectory() && gitStats.birthtimeMs) {
|
|
460
351
|
return Math.floor(gitStats.birthtimeMs);
|
|
461
352
|
}
|
|
462
353
|
} catch {
|
|
463
354
|
}
|
|
464
355
|
try {
|
|
465
|
-
const rootStats =
|
|
356
|
+
const rootStats = fs.statSync(projectPath);
|
|
466
357
|
if (rootStats.birthtimeMs) {
|
|
467
358
|
return Math.floor(rootStats.birthtimeMs);
|
|
468
359
|
}
|
|
@@ -476,11 +367,11 @@ function generateProjectId(projectPath) {
|
|
|
476
367
|
return crypto.createHash("md5").update(uniqueKey).digest("hex").slice(0, 10);
|
|
477
368
|
}
|
|
478
369
|
function initDb(projectId) {
|
|
479
|
-
const projectDir =
|
|
480
|
-
if (!
|
|
481
|
-
|
|
370
|
+
const projectDir = path.join(BASE_DIR, projectId);
|
|
371
|
+
if (!fs.existsSync(projectDir)) {
|
|
372
|
+
fs.mkdirSync(projectDir, { recursive: true });
|
|
482
373
|
}
|
|
483
|
-
const dbPath =
|
|
374
|
+
const dbPath = path.join(projectDir, "index.db");
|
|
484
375
|
const db = new Database(dbPath);
|
|
485
376
|
db.pragma("journal_mode = WAL");
|
|
486
377
|
db.exec(`
|
|
@@ -495,7 +386,7 @@ function initDb(projectId) {
|
|
|
495
386
|
)
|
|
496
387
|
`);
|
|
497
388
|
try {
|
|
498
|
-
db.exec(
|
|
389
|
+
db.exec("ALTER TABLE files ADD COLUMN vector_index_hash TEXT");
|
|
499
390
|
} catch {
|
|
500
391
|
}
|
|
501
392
|
db.exec(`
|
|
@@ -567,7 +458,12 @@ function batchUpsert(db, files) {
|
|
|
567
458
|
}
|
|
568
459
|
});
|
|
569
460
|
transaction(files);
|
|
570
|
-
const ftsFiles =
|
|
461
|
+
const ftsFiles = [];
|
|
462
|
+
for (const f of files) {
|
|
463
|
+
if (f.content !== null) {
|
|
464
|
+
ftsFiles.push({ path: f.path, content: f.content });
|
|
465
|
+
}
|
|
466
|
+
}
|
|
571
467
|
if (ftsFiles.length > 0) {
|
|
572
468
|
batchUpsertFileFts(db, ftsFiles);
|
|
573
469
|
}
|
|
@@ -616,15 +512,13 @@ function getStoredEmbeddingDimensions(db) {
|
|
|
616
512
|
const value = getMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS);
|
|
617
513
|
if (value === null) return null;
|
|
618
514
|
const parsed = parseInt(value, 10);
|
|
619
|
-
return isNaN(parsed) ? null : parsed;
|
|
515
|
+
return Number.isNaN(parsed) ? null : parsed;
|
|
620
516
|
}
|
|
621
517
|
function setStoredEmbeddingDimensions(db, dimensions) {
|
|
622
518
|
setMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS, String(dimensions));
|
|
623
519
|
}
|
|
624
520
|
|
|
625
521
|
export {
|
|
626
|
-
logger,
|
|
627
|
-
isDebugEnabled,
|
|
628
522
|
isChunksFtsInitialized,
|
|
629
523
|
batchUpsertChunkFts,
|
|
630
524
|
batchDeleteFileChunksFts,
|