@hzttt/multimodal-rag 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/notifier.ts +28 -6
- package/src/types.ts +1 -0
- package/src/watcher.ts +30 -6
package/package.json
CHANGED
package/src/notifier.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
import type { PluginRuntime } from "openclaw/plugin-sdk";
|
|
6
6
|
import type { MediaType, NotificationConfig, IndexEventCallbacks } from "./types.js";
|
|
7
7
|
|
|
8
|
-
type BatchFileStatus = "queued" | "indexed" | "failed";
|
|
8
|
+
type BatchFileStatus = "queued" | "indexed" | "skipped" | "failed";
|
|
9
9
|
type BatchFile = { status: BatchFileStatus; fileType?: MediaType; error?: string };
|
|
10
10
|
type MainSessionConfig = Parameters<PluginRuntime["system"]["resolveMainSessionKey"]>[0];
|
|
11
11
|
type RuntimeSystemCompat = {
|
|
@@ -114,6 +114,17 @@ export class IndexNotifier implements IndexEventCallbacks {
|
|
|
114
114
|
this.resetQuietTimer();
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
/**
|
|
118
|
+
* 文件跳过事件(例如已存在的重复内容)
|
|
119
|
+
*/
|
|
120
|
+
onFileSkipped(filePath: string, fileType: MediaType): void {
|
|
121
|
+
if (this.state === "idle" && !this.batch.has(filePath)) {
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
this.batch.set(filePath, { status: "skipped", fileType });
|
|
125
|
+
this.resetQuietTimer();
|
|
126
|
+
}
|
|
127
|
+
|
|
117
128
|
/**
|
|
118
129
|
* 文件索引失败事件
|
|
119
130
|
*/
|
|
@@ -676,8 +687,7 @@ export class IndexNotifier implements IndexEventCallbacks {
|
|
|
676
687
|
* 构建"开始索引"消息
|
|
677
688
|
*/
|
|
678
689
|
private buildStartMessage(): string {
|
|
679
|
-
|
|
680
|
-
return `[Multimodal RAG] 新文件索引通知: 检测到 ${count} 个新的媒体文件正在被索引,请通知用户。`;
|
|
690
|
+
return "[Multimodal RAG] 新文件索引通知: 已开始处理本轮新增媒体文件,请通知用户。";
|
|
681
691
|
}
|
|
682
692
|
|
|
683
693
|
/**
|
|
@@ -685,9 +695,10 @@ export class IndexNotifier implements IndexEventCallbacks {
|
|
|
685
695
|
*/
|
|
686
696
|
private buildSummaryMessage(): string {
|
|
687
697
|
const files = [...this.batch.values()];
|
|
688
|
-
const total = files.length;
|
|
689
698
|
const succeeded = files.filter((f) => f.status === "indexed");
|
|
699
|
+
const skipped = files.filter((f) => f.status === "skipped");
|
|
690
700
|
const failed = files.filter((f) => f.status === "failed");
|
|
701
|
+
const total = succeeded.length + failed.length;
|
|
691
702
|
|
|
692
703
|
// 统计成功文件的类型
|
|
693
704
|
const images = succeeded.filter((f) => f.fileType === "image").length;
|
|
@@ -701,8 +712,15 @@ export class IndexNotifier implements IndexEventCallbacks {
|
|
|
701
712
|
const durationStr =
|
|
702
713
|
minutes > 0 ? `${minutes} 分 ${seconds} 秒` : `${seconds} 秒`;
|
|
703
714
|
|
|
704
|
-
//
|
|
705
|
-
|
|
715
|
+
// 构建消息(仅统计本轮真正处理的文件:成功 + 失败,不把 skipped 算进处理总数)
|
|
716
|
+
if (total === 0) {
|
|
717
|
+
if (skipped.length > 0) {
|
|
718
|
+
return `[Multimodal RAG] 索引完成通知: 本轮没有新增文件需要索引(跳过 ${skipped.length} 个已存在文件)。耗时 ${durationStr}。请通知用户。`;
|
|
719
|
+
}
|
|
720
|
+
return `[Multimodal RAG] 索引完成通知: 本轮没有可汇总的处理结果。耗时 ${durationStr}。请通知用户。`;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
let message = `[Multimodal RAG] 索引完成通知: 本轮共处理 ${total} 个文件,`;
|
|
706
724
|
message += `成功 ${succeeded.length} 个`;
|
|
707
725
|
|
|
708
726
|
if (images > 0 || audios > 0) {
|
|
@@ -716,6 +734,10 @@ export class IndexNotifier implements IndexEventCallbacks {
|
|
|
716
734
|
message += `,失败 ${failed.length} 个`;
|
|
717
735
|
}
|
|
718
736
|
|
|
737
|
+
if (skipped.length > 0) {
|
|
738
|
+
message += `(另跳过 ${skipped.length} 个已存在文件)`;
|
|
739
|
+
}
|
|
740
|
+
|
|
719
741
|
message += `。耗时 ${durationStr}。请发送索引完成总结通知给用户。`;
|
|
720
742
|
|
|
721
743
|
return message;
|
package/src/types.ts
CHANGED
|
@@ -54,6 +54,7 @@ export type NotificationConfig = {
|
|
|
54
54
|
export type IndexEventCallbacks = {
|
|
55
55
|
onFileQueued: (filePath: string) => void;
|
|
56
56
|
onFileIndexed: (filePath: string, fileType: MediaType) => void;
|
|
57
|
+
onFileSkipped?: (filePath: string, fileType: MediaType, reason?: string) => void;
|
|
57
58
|
onFileFailed: (filePath: string, error: string) => void;
|
|
58
59
|
dispose?: () => void;
|
|
59
60
|
};
|
package/src/watcher.ts
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import chokidar from "chokidar";
|
|
6
|
-
import { stat, readdir } from "node:fs/promises";
|
|
6
|
+
import { stat, readdir, realpath } from "node:fs/promises";
|
|
7
7
|
import { basename, extname, resolve, join } from "node:path";
|
|
8
8
|
import { createHash } from "node:crypto";
|
|
9
9
|
import { readFile } from "node:fs/promises";
|
|
@@ -253,9 +253,9 @@ export class MediaWatcher {
|
|
|
253
253
|
const existing = await this.storage.findByHash(fileHash);
|
|
254
254
|
if (existing) {
|
|
255
255
|
this.logger.info?.(`Skipping duplicate: ${fileName}`);
|
|
256
|
-
//
|
|
256
|
+
// 已存在内容:标记为 skipped,不计入“本次新索引成功数”
|
|
257
257
|
this.failedFiles.delete(filePath);
|
|
258
|
-
this.callbacks?.
|
|
258
|
+
this.callbacks?.onFileSkipped?.(filePath, fileType, "duplicate");
|
|
259
259
|
return true;
|
|
260
260
|
}
|
|
261
261
|
|
|
@@ -375,12 +375,27 @@ export class MediaWatcher {
|
|
|
375
375
|
const { entries: indexedFiles } = await this.storage.list({
|
|
376
376
|
limit: 10000 // 获取所有文件
|
|
377
377
|
});
|
|
378
|
-
const indexedPathsSet = new Set(indexedFiles.map(f => f.filePath));
|
|
378
|
+
const indexedPathsSet = new Set(indexedFiles.map((f) => f.filePath));
|
|
379
|
+
|
|
380
|
+
// 用 realpath 归一化,避免因为软链/路径别名导致“已索引文件被误判为缺失”
|
|
381
|
+
const normalizedIndexedPaths = await Promise.all(
|
|
382
|
+
indexedFiles.map(async (file) => await this.normalizeComparablePath(file.filePath)),
|
|
383
|
+
);
|
|
384
|
+
for (const normalized of normalizedIndexedPaths) {
|
|
385
|
+
indexedPathsSet.add(normalized);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
const comparableAllFiles = await Promise.all(
|
|
389
|
+
allFiles.map(async (filePath) => ({
|
|
390
|
+
filePath,
|
|
391
|
+
comparablePath: await this.normalizeComparablePath(filePath),
|
|
392
|
+
})),
|
|
393
|
+
);
|
|
379
394
|
|
|
380
395
|
// 找出缺失的文件
|
|
381
396
|
let missingFiles = 0;
|
|
382
|
-
for (const filePath of
|
|
383
|
-
if (!indexedPathsSet.has(filePath)) {
|
|
397
|
+
for (const { filePath, comparablePath } of comparableAllFiles) {
|
|
398
|
+
if (!indexedPathsSet.has(filePath) && !indexedPathsSet.has(comparablePath)) {
|
|
384
399
|
missingFiles++;
|
|
385
400
|
this.enqueueFile(filePath);
|
|
386
401
|
}
|
|
@@ -395,6 +410,15 @@ export class MediaWatcher {
|
|
|
395
410
|
}
|
|
396
411
|
}
|
|
397
412
|
|
|
413
|
+
private async normalizeComparablePath(filePath: string): Promise<string> {
|
|
414
|
+
const resolved = resolve(filePath);
|
|
415
|
+
try {
|
|
416
|
+
return await realpath(resolved);
|
|
417
|
+
} catch {
|
|
418
|
+
return resolved;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
398
422
|
/**
|
|
399
423
|
* 递归扫描目录获取所有支持的文件
|
|
400
424
|
*/
|