@hsingjui/contextweaver 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/{SearchService-CMHVTNSM.js → SearchService-SRT2KP6A.js} +269 -241
- package/dist/{chunk-RU4OUBZS.js → chunk-5MZUQNOD.js} +149 -53
- package/dist/chunk-C7XDGBT5.js +172 -0
- package/dist/{chunk-ESTPBFCW.js → chunk-SKBAE26T.js} +130 -28
- package/dist/{chunk-NHQE2IOY.js → chunk-T337P433.js} +278 -281
- package/dist/{chunk-AVBQ77MN.js → chunk-VW5RACJC.js} +119 -225
- package/dist/{chunk-4XOWK7YZ.js → chunk-XHJNV3MK.js} +110 -78
- package/dist/{codebaseRetrieval-NLSYDBF4.js → codebaseRetrieval-7SCFUVKL.js} +4 -3
- package/dist/{config-BQCFTZVY.js → config-LCOJHTCF.js} +1 -3
- package/dist/index.js +55 -34
- package/dist/lock-PX2BX2YN.js +106 -0
- package/dist/scanner-7AZ4CHAR.js +10 -0
- package/dist/{server-LC4NWEGV.js → server-GSXFZX6I.js} +27 -11
- package/package.json +5 -3
- package/dist/scanner-CXUYLK56.js +0 -9
|
@@ -3,18 +3,20 @@ import {
|
|
|
3
3
|
batchUpdateVectorIndexHash,
|
|
4
4
|
batchUpsertChunkFts,
|
|
5
5
|
clearVectorIndexHash,
|
|
6
|
-
isChunksFtsInitialized
|
|
6
|
+
isChunksFtsInitialized
|
|
7
|
+
} from "./chunk-VW5RACJC.js";
|
|
8
|
+
import {
|
|
7
9
|
logger
|
|
8
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-C7XDGBT5.js";
|
|
9
11
|
import {
|
|
10
12
|
getEmbeddingConfig
|
|
11
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-SKBAE26T.js";
|
|
12
14
|
|
|
13
15
|
// src/vectorStore/index.ts
|
|
14
|
-
import * as lancedb from "@lancedb/lancedb";
|
|
15
|
-
import path from "path";
|
|
16
|
-
import os from "os";
|
|
17
16
|
import fs from "fs";
|
|
17
|
+
import os from "os";
|
|
18
|
+
import path from "path";
|
|
19
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
18
20
|
var BASE_DIR = path.join(os.homedir(), ".contextweaver");
|
|
19
21
|
var VectorStore = class {
|
|
20
22
|
db = null;
|
|
@@ -49,11 +51,14 @@ var VectorStore = class {
|
|
|
49
51
|
if (this.table) return;
|
|
50
52
|
if (!this.db) throw new Error("VectorStore not initialized");
|
|
51
53
|
if (records.length === 0) return;
|
|
52
|
-
this.table = await this.db.createTable(
|
|
54
|
+
this.table = await this.db.createTable(
|
|
55
|
+
"chunks",
|
|
56
|
+
records
|
|
57
|
+
);
|
|
53
58
|
}
|
|
54
59
|
/**
|
|
55
60
|
* 单调版本更新:先插入新版本,再删除旧版本
|
|
56
|
-
*
|
|
61
|
+
*
|
|
57
62
|
* 这保证了:
|
|
58
63
|
* - 最坏情况(崩溃)是新旧版本共存(不缺失)
|
|
59
64
|
* - 正常情况下旧版本被清理
|
|
@@ -70,18 +75,20 @@ var VectorStore = class {
|
|
|
70
75
|
await this.table.add(records);
|
|
71
76
|
}
|
|
72
77
|
if (this.table) {
|
|
73
|
-
await this.table.delete(
|
|
78
|
+
await this.table.delete(
|
|
79
|
+
`file_path = '${this.escapeString(filePath)}' AND file_hash != '${this.escapeString(newHash)}'`
|
|
80
|
+
);
|
|
74
81
|
}
|
|
75
82
|
}
|
|
76
83
|
/**
|
|
77
84
|
* 批量 upsert 多个文件(性能优化版,带分批机制)
|
|
78
|
-
*
|
|
85
|
+
*
|
|
79
86
|
* 流程:
|
|
80
87
|
* 1. 将文件分成小批次(每批最多 BATCH_FILES 个文件或 BATCH_RECORDS 条记录)
|
|
81
88
|
* 2. 每批执行:插入新 records → 删除旧版本
|
|
82
|
-
*
|
|
89
|
+
*
|
|
83
90
|
* 分批是必要的,因为 LanceDB native 模块在处理超大数据时可能崩溃
|
|
84
|
-
*
|
|
91
|
+
*
|
|
85
92
|
* @param files 文件列表,每个包含 path、hash 和 records
|
|
86
93
|
*/
|
|
87
94
|
async batchUpsertFiles(files) {
|
|
@@ -122,7 +129,9 @@ var VectorStore = class {
|
|
|
122
129
|
await this.table.add(batchRecords);
|
|
123
130
|
}
|
|
124
131
|
if (this.table && batch.length > 0) {
|
|
125
|
-
const deleteConditions = batch.map(
|
|
132
|
+
const deleteConditions = batch.map(
|
|
133
|
+
(f) => `(file_path = '${this.escapeString(f.path)}' AND file_hash != '${this.escapeString(f.hash)}')`
|
|
134
|
+
).join(" OR ");
|
|
126
135
|
await this.table.delete(deleteConditions);
|
|
127
136
|
}
|
|
128
137
|
}
|
|
@@ -165,7 +174,7 @@ var VectorStore = class {
|
|
|
165
174
|
}
|
|
166
175
|
/**
|
|
167
176
|
* 批量获取多个文件的 chunks(性能优化:单次查询替代 N 次循环)
|
|
168
|
-
*
|
|
177
|
+
*
|
|
169
178
|
* 适用于 GraphExpander 扩展、词法召回等需要批量获取的场景
|
|
170
179
|
* @returns Map<filePath, ChunkRecord[]>,每个文件的 chunks 已按 chunk_index 排序
|
|
171
180
|
*/
|
|
@@ -242,9 +251,6 @@ async function closeAllVectorStores() {
|
|
|
242
251
|
vectorStores.clear();
|
|
243
252
|
}
|
|
244
253
|
|
|
245
|
-
// src/indexer/index.ts
|
|
246
|
-
import "better-sqlite3";
|
|
247
|
-
|
|
248
254
|
// src/api/embedding.ts
|
|
249
255
|
var ProgressTracker = class {
|
|
250
256
|
completed = 0;
|
|
@@ -253,15 +259,21 @@ var ProgressTracker = class {
|
|
|
253
259
|
startTime;
|
|
254
260
|
lastLogTime = 0;
|
|
255
261
|
logIntervalMs = 2e3;
|
|
256
|
-
// 每
|
|
257
|
-
|
|
262
|
+
// 每 2 秒输出一次
|
|
263
|
+
onProgress;
|
|
264
|
+
/** 是否跳过日志(单批次时跳过,避免与索引日志混淆) */
|
|
265
|
+
skipLogs;
|
|
266
|
+
constructor(total, onProgress) {
|
|
258
267
|
this.total = total;
|
|
259
268
|
this.startTime = Date.now();
|
|
269
|
+
this.onProgress = onProgress;
|
|
270
|
+
this.skipLogs = total <= 1;
|
|
260
271
|
}
|
|
261
272
|
/** 记录一个批次完成 */
|
|
262
273
|
recordBatch(tokens) {
|
|
263
274
|
this.completed++;
|
|
264
275
|
this.totalTokens += tokens;
|
|
276
|
+
this.onProgress?.(this.completed, this.total);
|
|
265
277
|
const now = Date.now();
|
|
266
278
|
if (now - this.lastLogTime >= this.logIntervalMs) {
|
|
267
279
|
this.logProgress();
|
|
@@ -270,6 +282,7 @@ var ProgressTracker = class {
|
|
|
270
282
|
}
|
|
271
283
|
/** 输出进度 */
|
|
272
284
|
logProgress() {
|
|
285
|
+
if (this.skipLogs) return;
|
|
273
286
|
const elapsed = (Date.now() - this.startTime) / 1e3;
|
|
274
287
|
const percent = Math.round(this.completed / this.total * 100);
|
|
275
288
|
const rate = this.completed / elapsed;
|
|
@@ -287,6 +300,7 @@ var ProgressTracker = class {
|
|
|
287
300
|
}
|
|
288
301
|
/** 完成时输出最终统计 */
|
|
289
302
|
complete() {
|
|
303
|
+
if (this.skipLogs) return;
|
|
290
304
|
const elapsed = (Date.now() - this.startTime) / 1e3;
|
|
291
305
|
logger.info(
|
|
292
306
|
{
|
|
@@ -391,20 +405,17 @@ var RateLimitController = class {
|
|
|
391
405
|
},
|
|
392
406
|
"\u901F\u7387\u9650\u5236\uFF1A\u89E6\u53D1 429\uFF0C\u6682\u505C\u6240\u6709\u8BF7\u6C42"
|
|
393
407
|
);
|
|
394
|
-
let resumeResolve
|
|
408
|
+
let resumeResolve = () => {
|
|
409
|
+
};
|
|
395
410
|
this.pausePromise = new Promise((resolve) => {
|
|
396
411
|
resumeResolve = resolve;
|
|
397
412
|
});
|
|
398
413
|
await sleep(this.backoffMs);
|
|
399
414
|
this.backoffMs = Math.min(this.maxBackoffMs, this.backoffMs * 2);
|
|
400
415
|
this.isPaused = false;
|
|
401
|
-
const resolvedPromise = this.pausePromise;
|
|
402
416
|
this.pausePromise = null;
|
|
403
417
|
resumeResolve();
|
|
404
|
-
logger.info(
|
|
405
|
-
{ waitMs: this.backoffMs },
|
|
406
|
-
"\u901F\u7387\u9650\u5236\uFF1A\u6062\u590D\u8BF7\u6C42"
|
|
407
|
-
);
|
|
418
|
+
logger.info({ waitMs: this.backoffMs }, "\u901F\u7387\u9650\u5236\uFF1A\u6062\u590D\u8BF7\u6C42");
|
|
408
419
|
}
|
|
409
420
|
/**
|
|
410
421
|
* 获取当前状态(用于调试)
|
|
@@ -444,8 +455,9 @@ var EmbeddingClient = class {
|
|
|
444
455
|
* 批量获取 Embedding
|
|
445
456
|
* @param texts 待处理的文本数组
|
|
446
457
|
* @param batchSize 每批次发送的文本数量(默认 20)
|
|
458
|
+
* @param onProgress 可选的进度回调 (completed, total) => void
|
|
447
459
|
*/
|
|
448
|
-
async embedBatch(texts, batchSize = 20) {
|
|
460
|
+
async embedBatch(texts, batchSize = 20, onProgress) {
|
|
449
461
|
if (texts.length === 0) {
|
|
450
462
|
return [];
|
|
451
463
|
}
|
|
@@ -453,7 +465,7 @@ var EmbeddingClient = class {
|
|
|
453
465
|
for (let i = 0; i < texts.length; i += batchSize) {
|
|
454
466
|
batches.push(texts.slice(i, i + batchSize));
|
|
455
467
|
}
|
|
456
|
-
const progress = new ProgressTracker(batches.length);
|
|
468
|
+
const progress = new ProgressTracker(batches.length, onProgress);
|
|
457
469
|
const batchResults = await Promise.all(
|
|
458
470
|
batches.map(
|
|
459
471
|
(batch, batchIndex) => this.processWithRateLimit(batch, batchIndex * batchSize, progress)
|
|
@@ -463,10 +475,13 @@ var EmbeddingClient = class {
|
|
|
463
475
|
return batchResults.flat();
|
|
464
476
|
}
|
|
465
477
|
/**
|
|
466
|
-
*
|
|
478
|
+
* 带速率限制和网络错误重试的批次处理
|
|
467
479
|
* 使用循环而非递归,避免栈溢出和槽位泄漏
|
|
468
480
|
*/
|
|
469
481
|
async processWithRateLimit(texts, startIndex, progress) {
|
|
482
|
+
const MAX_NETWORK_RETRIES = 3;
|
|
483
|
+
const INITIAL_RETRY_DELAY_MS = 1e3;
|
|
484
|
+
let networkRetries = 0;
|
|
470
485
|
while (true) {
|
|
471
486
|
await this.rateLimiter.acquire();
|
|
472
487
|
try {
|
|
@@ -474,17 +489,75 @@ var EmbeddingClient = class {
|
|
|
474
489
|
this.rateLimiter.releaseSuccess();
|
|
475
490
|
return result;
|
|
476
491
|
} catch (err) {
|
|
477
|
-
const
|
|
492
|
+
const error = err;
|
|
493
|
+
const errorMessage = error.message || "";
|
|
494
|
+
const isRateLimited = errorMessage.includes("429") || errorMessage.includes("rate");
|
|
495
|
+
const isNetworkError = this.isNetworkError(err);
|
|
478
496
|
if (isRateLimited) {
|
|
479
497
|
this.rateLimiter.releaseForRetry();
|
|
480
498
|
await this.rateLimiter.triggerRateLimit();
|
|
499
|
+
networkRetries = 0;
|
|
500
|
+
} else if (isNetworkError && networkRetries < MAX_NETWORK_RETRIES) {
|
|
501
|
+
networkRetries++;
|
|
502
|
+
const delayMs = INITIAL_RETRY_DELAY_MS * 2 ** (networkRetries - 1);
|
|
503
|
+
logger.warn(
|
|
504
|
+
{
|
|
505
|
+
error: errorMessage,
|
|
506
|
+
retry: networkRetries,
|
|
507
|
+
maxRetries: MAX_NETWORK_RETRIES,
|
|
508
|
+
delayMs
|
|
509
|
+
},
|
|
510
|
+
"\u7F51\u7EDC\u9519\u8BEF\uFF0C\u51C6\u5907\u91CD\u8BD5"
|
|
511
|
+
);
|
|
512
|
+
this.rateLimiter.releaseForRetry();
|
|
513
|
+
await sleep(delayMs);
|
|
481
514
|
} else {
|
|
482
515
|
this.rateLimiter.releaseFailure();
|
|
516
|
+
if (isNetworkError) {
|
|
517
|
+
logger.error({ error: errorMessage, retries: networkRetries }, "\u7F51\u7EDC\u9519\u8BEF\u91CD\u8BD5\u6B21\u6570\u8017\u5C3D");
|
|
518
|
+
}
|
|
483
519
|
throw err;
|
|
484
520
|
}
|
|
485
521
|
}
|
|
486
522
|
}
|
|
487
523
|
}
|
|
524
|
+
/**
|
|
525
|
+
* 判断是否为网络错误
|
|
526
|
+
*
|
|
527
|
+
* 常见网络错误类型:
|
|
528
|
+
* - terminated: 连接被中断(TLS 断开)
|
|
529
|
+
* - ECONNRESET: 连接被远端重置
|
|
530
|
+
* - ETIMEDOUT: 连接超时
|
|
531
|
+
* - ENOTFOUND: DNS 解析失败
|
|
532
|
+
* - fetch failed: 通用 fetch 失败
|
|
533
|
+
* - socket hang up: 套接字意外关闭
|
|
534
|
+
*/
|
|
535
|
+
isNetworkError(err) {
|
|
536
|
+
const error = err;
|
|
537
|
+
const message = (error.message || "").toLowerCase();
|
|
538
|
+
const code = error.code || "";
|
|
539
|
+
const networkErrorPatterns = [
|
|
540
|
+
"terminated",
|
|
541
|
+
"econnreset",
|
|
542
|
+
"etimedout",
|
|
543
|
+
"enotfound",
|
|
544
|
+
"econnrefused",
|
|
545
|
+
"fetch failed",
|
|
546
|
+
"socket hang up",
|
|
547
|
+
"network",
|
|
548
|
+
"aborted"
|
|
549
|
+
];
|
|
550
|
+
for (const pattern of networkErrorPatterns) {
|
|
551
|
+
if (message.includes(pattern)) {
|
|
552
|
+
return true;
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
const networkErrorCodes = ["ECONNRESET", "ETIMEDOUT", "ENOTFOUND", "ECONNREFUSED", "EPIPE"];
|
|
556
|
+
if (networkErrorCodes.includes(code)) {
|
|
557
|
+
return true;
|
|
558
|
+
}
|
|
559
|
+
return false;
|
|
560
|
+
}
|
|
488
561
|
/**
|
|
489
562
|
* 处理单个批次(单次请求,不含重试逻辑)
|
|
490
563
|
*/
|
|
@@ -558,11 +631,12 @@ var Indexer = class {
|
|
|
558
631
|
}
|
|
559
632
|
/**
|
|
560
633
|
* 处理扫描结果,更新向量索引
|
|
561
|
-
*
|
|
634
|
+
*
|
|
562
635
|
* @param db SQLite 数据库实例
|
|
563
636
|
* @param results 文件处理结果
|
|
637
|
+
* @param onProgress 可选的进度回调 (indexed, total) => void
|
|
564
638
|
*/
|
|
565
|
-
async indexFiles(db, results) {
|
|
639
|
+
async indexFiles(db, results, onProgress) {
|
|
566
640
|
if (!this.vectorStore) {
|
|
567
641
|
await this.init();
|
|
568
642
|
}
|
|
@@ -606,26 +680,31 @@ var Indexer = class {
|
|
|
606
680
|
stats.deleted = toDelete.length;
|
|
607
681
|
}
|
|
608
682
|
if (toIndex.length > 0) {
|
|
609
|
-
const indexResult = await this.batchIndex(db, toIndex);
|
|
683
|
+
const indexResult = await this.batchIndex(db, toIndex, onProgress);
|
|
610
684
|
stats.indexed = indexResult.success;
|
|
611
685
|
stats.errors = indexResult.errors;
|
|
612
686
|
}
|
|
613
687
|
logger.info(
|
|
614
|
-
{
|
|
688
|
+
{
|
|
689
|
+
indexed: stats.indexed,
|
|
690
|
+
deleted: stats.deleted,
|
|
691
|
+
errors: stats.errors,
|
|
692
|
+
skipped: stats.skipped
|
|
693
|
+
},
|
|
615
694
|
"\u5411\u91CF\u7D22\u5F15\u5B8C\u6210"
|
|
616
695
|
);
|
|
617
696
|
return stats;
|
|
618
697
|
}
|
|
619
698
|
/**
|
|
620
699
|
* 批量索引文件(性能优化版)
|
|
621
|
-
*
|
|
700
|
+
*
|
|
622
701
|
* 优化策略:
|
|
623
702
|
* 1. Embedding 已批量化(原有)
|
|
624
703
|
* 2. LanceDB 写入批量化:N 次 upsertFile → 1 次 batchUpsertFiles
|
|
625
704
|
* 3. FTS 写入批量化:N 次删除+插入 → 1 次批量删除 + 1 次批量插入
|
|
626
705
|
* 4. 日志汇总化:逐文件日志 → 汇总日志
|
|
627
706
|
*/
|
|
628
|
-
async batchIndex(db, files) {
|
|
707
|
+
async batchIndex(db, files, onProgress) {
|
|
629
708
|
if (files.length === 0) {
|
|
630
709
|
return { success: 0, errors: 0 };
|
|
631
710
|
}
|
|
@@ -646,11 +725,15 @@ var Indexer = class {
|
|
|
646
725
|
logger.info({ count: allTexts.length, files: files.length }, "\u5F00\u59CB\u6279\u91CF Embedding");
|
|
647
726
|
let embeddings;
|
|
648
727
|
try {
|
|
649
|
-
const results = await this.embeddingClient.embedBatch(allTexts);
|
|
728
|
+
const results = await this.embeddingClient.embedBatch(allTexts, 20, onProgress);
|
|
650
729
|
embeddings = results.map((r) => r.embedding);
|
|
651
730
|
} catch (err) {
|
|
652
|
-
|
|
653
|
-
|
|
731
|
+
const error = err;
|
|
732
|
+
logger.error({ error: error.message, stack: error.stack }, "Embedding \u5931\u8D25");
|
|
733
|
+
clearVectorIndexHash(
|
|
734
|
+
db,
|
|
735
|
+
files.map((f) => f.path)
|
|
736
|
+
);
|
|
654
737
|
return { success: 0, errors: files.length };
|
|
655
738
|
}
|
|
656
739
|
const filesToUpsert = [];
|
|
@@ -690,23 +773,35 @@ var Indexer = class {
|
|
|
690
773
|
filePath: record.file_path,
|
|
691
774
|
chunkIndex: record.chunk_index,
|
|
692
775
|
breadcrumb: record.breadcrumb,
|
|
693
|
-
content: record.breadcrumb
|
|
776
|
+
content: `${record.breadcrumb}
|
|
777
|
+
${record.display_code}`
|
|
694
778
|
});
|
|
695
779
|
}
|
|
696
780
|
filesToUpsert.push({ path: file.path, hash: file.hash, records });
|
|
697
781
|
successFiles.push({ path: file.path, hash: file.hash });
|
|
698
782
|
} catch (err) {
|
|
699
|
-
|
|
783
|
+
const error = err;
|
|
784
|
+
logger.error(
|
|
785
|
+
{ path: file.path, error: error.message, stack: error.stack },
|
|
786
|
+
"\u7EC4\u88C5 ChunkRecord \u5931\u8D25"
|
|
787
|
+
);
|
|
700
788
|
errorFiles.push(file.path);
|
|
701
789
|
}
|
|
702
790
|
}
|
|
703
791
|
if (filesToUpsert.length > 0) {
|
|
704
792
|
try {
|
|
705
|
-
await this.vectorStore
|
|
706
|
-
logger.info(
|
|
793
|
+
await this.vectorStore?.batchUpsertFiles(filesToUpsert);
|
|
794
|
+
logger.info(
|
|
795
|
+
{ files: filesToUpsert.length, chunks: allFtsChunks.length },
|
|
796
|
+
"LanceDB \u6279\u91CF\u5199\u5165\u5B8C\u6210"
|
|
797
|
+
);
|
|
707
798
|
} catch (err) {
|
|
708
|
-
|
|
709
|
-
|
|
799
|
+
const error = err;
|
|
800
|
+
logger.error({ error: error.message, stack: error.stack }, "LanceDB \u6279\u91CF\u5199\u5165\u5931\u8D25");
|
|
801
|
+
clearVectorIndexHash(
|
|
802
|
+
db,
|
|
803
|
+
files.map((f) => f.path)
|
|
804
|
+
);
|
|
710
805
|
return { success: 0, errors: files.length };
|
|
711
806
|
}
|
|
712
807
|
}
|
|
@@ -715,18 +810,19 @@ var Indexer = class {
|
|
|
715
810
|
const pathsToDelete = filesToUpsert.map((f) => f.path);
|
|
716
811
|
batchDeleteFileChunksFts(db, pathsToDelete);
|
|
717
812
|
batchUpsertChunkFts(db, allFtsChunks);
|
|
718
|
-
logger.info(
|
|
813
|
+
logger.info(
|
|
814
|
+
{ files: pathsToDelete.length, chunks: allFtsChunks.length },
|
|
815
|
+
"FTS \u6279\u91CF\u66F4\u65B0\u5B8C\u6210"
|
|
816
|
+
);
|
|
719
817
|
} catch (err) {
|
|
720
|
-
|
|
818
|
+
const error = err;
|
|
819
|
+
logger.warn({ error: error.message }, "FTS \u6279\u91CF\u66F4\u65B0\u5931\u8D25\uFF08\u5411\u91CF\u7D22\u5F15\u5DF2\u6210\u529F\uFF09");
|
|
721
820
|
}
|
|
722
821
|
}
|
|
723
822
|
if (successFiles.length > 0) {
|
|
724
823
|
batchUpdateVectorIndexHash(db, successFiles);
|
|
725
824
|
}
|
|
726
|
-
logger.info(
|
|
727
|
-
{ success: successFiles.length, errors: errorFiles.length },
|
|
728
|
-
"\u6279\u91CF\u7D22\u5F15\u5B8C\u6210"
|
|
729
|
-
);
|
|
825
|
+
logger.info({ success: successFiles.length, errors: errorFiles.length }, "\u6279\u91CF\u7D22\u5F15\u5B8C\u6210");
|
|
730
826
|
return { success: successFiles.length, errors: errorFiles.length };
|
|
731
827
|
}
|
|
732
828
|
/**
|
|
@@ -747,7 +843,7 @@ var Indexer = class {
|
|
|
747
843
|
if (!this.vectorStore) {
|
|
748
844
|
await this.init();
|
|
749
845
|
}
|
|
750
|
-
return this.vectorStore
|
|
846
|
+
return this.vectorStore?.search(queryVector, limit, filter);
|
|
751
847
|
}
|
|
752
848
|
/**
|
|
753
849
|
* 文本搜索(先 embedding 再向量搜索)
|
|
@@ -763,7 +859,7 @@ var Indexer = class {
|
|
|
763
859
|
if (!this.vectorStore) {
|
|
764
860
|
await this.init();
|
|
765
861
|
}
|
|
766
|
-
await this.vectorStore
|
|
862
|
+
await this.vectorStore?.clear();
|
|
767
863
|
}
|
|
768
864
|
/**
|
|
769
865
|
* 获取索引统计
|
|
@@ -772,7 +868,7 @@ var Indexer = class {
|
|
|
772
868
|
if (!this.vectorStore) {
|
|
773
869
|
await this.init();
|
|
774
870
|
}
|
|
775
|
-
const count = await this.vectorStore
|
|
871
|
+
const count = await this.vectorStore?.count() ?? 0;
|
|
776
872
|
return { totalChunks: count };
|
|
777
873
|
}
|
|
778
874
|
};
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isDev,
|
|
3
|
+
isMcpMode
|
|
4
|
+
} from "./chunk-SKBAE26T.js";
|
|
5
|
+
|
|
6
|
+
// src/utils/logger.ts
|
|
7
|
+
import fs from "fs";
|
|
8
|
+
import os from "os";
|
|
9
|
+
import path from "path";
|
|
10
|
+
import { Writable } from "stream";
|
|
11
|
+
import pino from "pino";
|
|
12
|
+
var logLevel = isDev ? "debug" : "info";
|
|
13
|
+
var logDir = path.join(os.homedir(), ".contextweaver", "logs");
|
|
14
|
+
var LOG_RETENTION_DAYS = 7;
|
|
15
|
+
function ensureLogDir(dir) {
|
|
16
|
+
if (!fs.existsSync(dir)) {
|
|
17
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
function getLogFileName() {
|
|
21
|
+
const now = /* @__PURE__ */ new Date();
|
|
22
|
+
const dateStr = now.toISOString().split("T")[0];
|
|
23
|
+
return `app.${dateStr}.log`;
|
|
24
|
+
}
|
|
25
|
+
function formatTime() {
|
|
26
|
+
const now = /* @__PURE__ */ new Date();
|
|
27
|
+
const pad = (n) => n.toString().padStart(2, "0");
|
|
28
|
+
return `${now.getFullYear()}-${pad(now.getMonth() + 1)}-${pad(now.getDate())} ${pad(now.getHours())}:${pad(now.getMinutes())}:${pad(now.getSeconds())}`;
|
|
29
|
+
}
|
|
30
|
+
function getLevelLabel(level) {
|
|
31
|
+
const labels = {
|
|
32
|
+
10: "TRACE",
|
|
33
|
+
20: "DEBUG",
|
|
34
|
+
30: "INFO",
|
|
35
|
+
40: "WARN",
|
|
36
|
+
50: "ERROR",
|
|
37
|
+
60: "FATAL"
|
|
38
|
+
};
|
|
39
|
+
return labels[level] || "INFO";
|
|
40
|
+
}
|
|
41
|
+
function cleanupOldLogs(dir) {
|
|
42
|
+
try {
|
|
43
|
+
if (!fs.existsSync(dir)) return;
|
|
44
|
+
const files = fs.readdirSync(dir);
|
|
45
|
+
const now = Date.now();
|
|
46
|
+
const maxAge = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1e3;
|
|
47
|
+
const logPattern = /^app\.(\d{4}-\d{2}-\d{2})\.log$/;
|
|
48
|
+
for (const file of files) {
|
|
49
|
+
const match = file.match(logPattern);
|
|
50
|
+
if (!match) continue;
|
|
51
|
+
const dateStr = match[1];
|
|
52
|
+
const fileDate = new Date(dateStr).getTime();
|
|
53
|
+
if (Number.isNaN(fileDate)) continue;
|
|
54
|
+
if (now - fileDate > maxAge) {
|
|
55
|
+
const filePath = path.join(dir, file);
|
|
56
|
+
try {
|
|
57
|
+
fs.unlinkSync(filePath);
|
|
58
|
+
console.error(`[Logger] \u6E05\u7406\u8FC7\u671F\u65E5\u5FD7: ${file}`);
|
|
59
|
+
} catch {
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
} catch {
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
function createFormattedStream(filePath) {
|
|
67
|
+
const writeStream = fs.createWriteStream(filePath, { flags: "a" });
|
|
68
|
+
return new Writable({
|
|
69
|
+
write(chunk, _encoding, callback) {
|
|
70
|
+
try {
|
|
71
|
+
const log = JSON.parse(chunk.toString());
|
|
72
|
+
const time = formatTime();
|
|
73
|
+
const level = getLevelLabel(log.level);
|
|
74
|
+
const msg = log.msg || "";
|
|
75
|
+
const { level: _l, time: _t, pid: _p, hostname: _h, name: _n, msg: _m, ...extra } = log;
|
|
76
|
+
let line = `${time} [${level}] ${msg}`;
|
|
77
|
+
if (Object.keys(extra).length > 0) {
|
|
78
|
+
const extraLines = JSON.stringify(extra, null, 2).split("\n").map((l, i) => i === 0 ? l : ` ${l}`).join("\n");
|
|
79
|
+
line += `
|
|
80
|
+
${extraLines}`;
|
|
81
|
+
}
|
|
82
|
+
writeStream.write(`${line}
|
|
83
|
+
`, callback);
|
|
84
|
+
} catch {
|
|
85
|
+
writeStream.write(chunk.toString(), callback);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
function createConsoleStream() {
|
|
91
|
+
const colors = {
|
|
92
|
+
10: "\x1B[90m",
|
|
93
|
+
// TRACE - 灰色
|
|
94
|
+
20: "\x1B[36m",
|
|
95
|
+
// DEBUG - 青色
|
|
96
|
+
30: "\x1B[32m",
|
|
97
|
+
// INFO - 绿色
|
|
98
|
+
40: "\x1B[33m",
|
|
99
|
+
// WARN - 黄色
|
|
100
|
+
50: "\x1B[31m",
|
|
101
|
+
// ERROR - 红色
|
|
102
|
+
60: "\x1B[35m"
|
|
103
|
+
// FATAL - 品红
|
|
104
|
+
};
|
|
105
|
+
const reset = "\x1B[0m";
|
|
106
|
+
return new Writable({
|
|
107
|
+
write(chunk, _encoding, callback) {
|
|
108
|
+
try {
|
|
109
|
+
const log = JSON.parse(chunk.toString());
|
|
110
|
+
const time = formatTime();
|
|
111
|
+
const level = getLevelLabel(log.level);
|
|
112
|
+
const color = colors[log.level] || "";
|
|
113
|
+
const msg = log.msg || "";
|
|
114
|
+
const { level: _l, time: _t, pid: _p, hostname: _h, name: _n, msg: _m, ...extra } = log;
|
|
115
|
+
let line = `${color}${time} [${level}]${reset} ${msg}`;
|
|
116
|
+
if (Object.keys(extra).length > 0) {
|
|
117
|
+
const extraStr = JSON.stringify(extra);
|
|
118
|
+
line += ` ${color}${extraStr}${reset}`;
|
|
119
|
+
}
|
|
120
|
+
process.stdout.write(`${line}
|
|
121
|
+
`, callback);
|
|
122
|
+
} catch {
|
|
123
|
+
process.stdout.write(chunk.toString(), callback);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
function createDevLogger() {
|
|
129
|
+
ensureLogDir(logDir);
|
|
130
|
+
cleanupOldLogs(logDir);
|
|
131
|
+
const logPath = path.join(logDir, getLogFileName());
|
|
132
|
+
const logStream = createFormattedStream(logPath);
|
|
133
|
+
const consoleStream = createConsoleStream();
|
|
134
|
+
return pino(
|
|
135
|
+
{
|
|
136
|
+
level: logLevel,
|
|
137
|
+
name: "contextweaver"
|
|
138
|
+
},
|
|
139
|
+
// MCP 模式下禁用控制台输出,避免污染 STDIO 协议流
|
|
140
|
+
isMcpMode ? logStream : pino.multistream([
|
|
141
|
+
{ stream: logStream, level: logLevel },
|
|
142
|
+
{ stream: consoleStream, level: logLevel }
|
|
143
|
+
])
|
|
144
|
+
);
|
|
145
|
+
}
|
|
146
|
+
function createProdLogger() {
|
|
147
|
+
ensureLogDir(logDir);
|
|
148
|
+
cleanupOldLogs(logDir);
|
|
149
|
+
const logPath = path.join(logDir, getLogFileName());
|
|
150
|
+
const logStream = createFormattedStream(logPath);
|
|
151
|
+
const consoleStream = createConsoleStream();
|
|
152
|
+
return pino(
|
|
153
|
+
{
|
|
154
|
+
level: logLevel,
|
|
155
|
+
name: "contextweaver"
|
|
156
|
+
},
|
|
157
|
+
// MCP 模式下禁用控制台输出,避免污染 STDIO 协议流
|
|
158
|
+
isMcpMode ? logStream : pino.multistream([
|
|
159
|
+
{ stream: logStream, level: logLevel },
|
|
160
|
+
{ stream: consoleStream, level: logLevel }
|
|
161
|
+
])
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
var logger = isDev ? createDevLogger() : createProdLogger();
|
|
165
|
+
function isDebugEnabled() {
|
|
166
|
+
return logger.isLevelEnabled("debug");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export {
|
|
170
|
+
logger,
|
|
171
|
+
isDebugEnabled
|
|
172
|
+
};
|