koishi-plugin-best-cave 2.7.11 → 2.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/AIManager.d.ts +16 -27
- package/lib/HashManager.d.ts +6 -0
- package/lib/Utils.d.ts +2 -2
- package/lib/index.d.ts +0 -4
- package/lib/index.js +200 -210
- package/package.json +1 -1
package/lib/AIManager.d.ts
CHANGED
|
@@ -48,55 +48,44 @@ export declare class AIManager {
|
|
|
48
48
|
* @description 对新提交的内容执行 AI 驱动的查重检查。
|
|
49
49
|
* @param {StoredElement[]} newElements - 新提交的内容元素数组。
|
|
50
50
|
* @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组。
|
|
51
|
-
* @returns {Promise<{ duplicate: boolean;
|
|
51
|
+
* @returns {Promise<{ duplicate: boolean; ids?: number[] }>} 一个 Promise,解析为一个对象,指示内容是否重复以及重复的回声洞 ID 数组(如果存在)。
|
|
52
52
|
*/
|
|
53
53
|
checkForDuplicates(newElements: StoredElement[], mediaBuffers?: {
|
|
54
54
|
fileName: string;
|
|
55
55
|
buffer: Buffer;
|
|
56
56
|
}[]): Promise<{
|
|
57
57
|
duplicate: boolean;
|
|
58
|
-
|
|
58
|
+
ids?: number[];
|
|
59
59
|
}>;
|
|
60
60
|
/**
|
|
61
|
-
* @description
|
|
62
|
-
* @param {CaveObject} cave - 要分析的回声洞对象。
|
|
63
|
-
* @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组,用于新提交内容的分析。
|
|
64
|
-
* @returns {Promise<void>} 分析和存储操作完成后解析的 Promise。
|
|
65
|
-
*/
|
|
66
|
-
analyzeAndStore(cave: CaveObject, mediaBuffers?: {
|
|
67
|
-
fileName: string;
|
|
68
|
-
buffer: Buffer;
|
|
69
|
-
}[]): Promise<void>;
|
|
70
|
-
/**
|
|
71
|
-
* @description 对一批回声洞执行分析并存储结果。
|
|
61
|
+
* @description 对单个或批量回声洞执行完整的分析和存储流程。
|
|
72
62
|
* @param {CaveObject[]} caves - 要分析的回声洞对象数组。
|
|
63
|
+
* @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组,仅在分析新内容时使用。
|
|
73
64
|
* @returns {Promise<number>} 一个 Promise,解析为成功分析和存储的条目数。
|
|
74
65
|
*/
|
|
75
|
-
|
|
66
|
+
analyzeAndStore(caves: CaveObject[], mediaBuffers?: {
|
|
67
|
+
fileName: string;
|
|
68
|
+
buffer: Buffer;
|
|
69
|
+
}[]): Promise<number>;
|
|
76
70
|
/**
|
|
77
|
-
* @description
|
|
78
|
-
* @param {
|
|
79
|
-
* @
|
|
71
|
+
* @description 调用 AI 判断两个回声洞内容是否重复或高度相似。
|
|
72
|
+
* @param {CaveObject} caveA - 第一个回声洞对象。
|
|
73
|
+
* @param {CaveObject} caveB - 第二个回声洞对象。
|
|
74
|
+
* @returns {Promise<boolean>} 如果内容相似则返回 true,否则返回 false。
|
|
80
75
|
*/
|
|
81
|
-
private
|
|
76
|
+
private isContentDuplicateAI;
|
|
82
77
|
/**
|
|
83
78
|
* @description 为一批回声洞准备内容,并向 AI 发送单个请求以获取所有分析结果。
|
|
84
79
|
* @param {CaveObject[]} caves - 要分析的回声洞对象数组。
|
|
85
80
|
* @param {Map<string, Buffer>} [mediaBufferMap] - 可选的媒体文件名到其缓冲区的映射。
|
|
86
|
-
* @returns {Promise<
|
|
81
|
+
* @returns {Promise<AnalysisResult[]>} 一个 Promise,解析为 AI 返回的分析结果数组。
|
|
87
82
|
*/
|
|
88
83
|
private getAnalyses;
|
|
89
84
|
/**
|
|
90
|
-
* @description
|
|
91
|
-
* @returns {Promise<void>} 当可以继续发送请求时解析的 Promise。
|
|
92
|
-
*/
|
|
93
|
-
private ensureRateLimit;
|
|
94
|
-
/**
|
|
95
|
-
* @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑。
|
|
85
|
+
* @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑,并稳健地解析 JSON 响应。
|
|
96
86
|
* @param {any[]} messages - 发送给 AI 的消息数组,遵循 OpenAI 格式。
|
|
97
87
|
* @param {string} systemPrompt - 系统提示词,用于指导 AI 的行为。
|
|
98
|
-
* @
|
|
99
|
-
* @returns {Promise<any>} 一个 Promise,解析为从 AI 接收到的、解析后的 JSON 对象。
|
|
88
|
+
* @returns {Promise<T>} 一个 Promise,解析为从 AI 接收到的、解析后的 JSON 对象。
|
|
100
89
|
* @throws {Error} 当 AI 返回空或无效内容时抛出错误。
|
|
101
90
|
*/
|
|
102
91
|
private requestAI;
|
package/lib/HashManager.d.ts
CHANGED
|
@@ -32,6 +32,12 @@ export declare class HashManager {
|
|
|
32
32
|
* @param cave - 主 `cave` 命令实例。
|
|
33
33
|
*/
|
|
34
34
|
registerCommands(cave: any): void;
|
|
35
|
+
/**
|
|
36
|
+
* @description 扫描并修复单个图片 Buffer,移除文件结束符之后的多余数据。
|
|
37
|
+
* @param imageBuffer - 原始的图片 Buffer。
|
|
38
|
+
* @returns 修复后的图片 Buffer。如果无需修复,则返回原始 Buffer。
|
|
39
|
+
*/
|
|
40
|
+
sanitizeImageBuffer(imageBuffer: Buffer): Buffer;
|
|
35
41
|
/**
|
|
36
42
|
* @description 执行一维离散余弦变换 (DCT-II) 的方法。
|
|
37
43
|
* @param input - 输入的数字数组。
|
package/lib/Utils.d.ts
CHANGED
|
@@ -57,7 +57,7 @@ export declare function processMessageElements(sourceElements: h[], newId: numbe
|
|
|
57
57
|
* @description 执行文本 (Simhash) 和图片 (pHash) 相似度查重。
|
|
58
58
|
* @returns 一个对象,指示是否发现重复项;如果未发现,则返回生成的哈希。
|
|
59
59
|
*/
|
|
60
|
-
export declare function performSimilarityChecks(ctx: Context, config: Config, hashManager: HashManager, finalElementsForDb: StoredElement[], downloadedMedia: {
|
|
60
|
+
export declare function performSimilarityChecks(ctx: Context, config: Config, hashManager: HashManager, logger: Logger, finalElementsForDb: StoredElement[], downloadedMedia: {
|
|
61
61
|
fileName: string;
|
|
62
62
|
buffer: Buffer;
|
|
63
63
|
}[]): Promise<{
|
|
@@ -81,7 +81,7 @@ export declare function performSimilarityChecks(ctx: Context, config: Config, ha
|
|
|
81
81
|
export declare function handleFileUploads(ctx: Context, config: Config, fileManager: FileManager, logger: Logger, cave: CaveObject, downloadedMedia: {
|
|
82
82
|
fileName: string;
|
|
83
83
|
buffer: Buffer;
|
|
84
|
-
}[], reusableIds: Set<number>,
|
|
84
|
+
}[], reusableIds: Set<number>, needsReview: boolean): Promise<'pending' | 'active'>;
|
|
85
85
|
/**
|
|
86
86
|
* @description 校验会话是否来自指定的管理群组。
|
|
87
87
|
* @param session 当前会话。
|
package/lib/index.d.ts
CHANGED
|
@@ -62,10 +62,6 @@ export interface Config {
|
|
|
62
62
|
aiApiKey?: string;
|
|
63
63
|
aiModel?: string;
|
|
64
64
|
aiRPM?: number;
|
|
65
|
-
AnalysePrompt?: string;
|
|
66
|
-
aiCheckPrompt?: string;
|
|
67
|
-
aiAnalyseSchema?: string;
|
|
68
|
-
aiCheckSchema?: string;
|
|
69
65
|
}
|
|
70
66
|
export declare const Config: Schema<Config>;
|
|
71
67
|
export declare function apply(ctx: Context, config: Config): void;
|
package/lib/index.js
CHANGED
|
@@ -450,42 +450,46 @@ async function processMessageElements(sourceElements, newId, session, creationTi
|
|
|
450
450
|
return { finalElementsForDb, mediaToSave };
|
|
451
451
|
}
|
|
452
452
|
__name(processMessageElements, "processMessageElements");
|
|
453
|
-
async function performSimilarityChecks(ctx, config, hashManager, finalElementsForDb, downloadedMedia) {
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
const
|
|
463
|
-
|
|
453
|
+
async function performSimilarityChecks(ctx, config, hashManager, logger2, finalElementsForDb, downloadedMedia) {
|
|
454
|
+
try {
|
|
455
|
+
const textHashesToStore = [];
|
|
456
|
+
const imageHashesToStore = [];
|
|
457
|
+
const combinedText = finalElementsForDb.filter((el) => el.type === "text" && typeof el.content === "string").map((el) => el.content).join(" ");
|
|
458
|
+
if (combinedText) {
|
|
459
|
+
const newSimhash = hashManager.generateTextSimhash(combinedText);
|
|
460
|
+
if (newSimhash) {
|
|
461
|
+
const existingTextHashes = await ctx.database.get("cave_hash", { type: "text" });
|
|
462
|
+
for (const existing of existingTextHashes) {
|
|
463
|
+
const similarity = hashManager.calculateSimilarity(newSimhash, existing.hash);
|
|
464
|
+
if (similarity >= config.textThreshold) return { duplicate: true, message: `文本与回声洞(${existing.cave})的相似度(${similarity.toFixed(2)}%)超过阈值` };
|
|
465
|
+
}
|
|
466
|
+
textHashesToStore.push({ hash: newSimhash, type: "text" });
|
|
464
467
|
}
|
|
465
|
-
textHashesToStore.push({ hash: newSimhash, type: "text" });
|
|
466
468
|
}
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
469
|
+
if (downloadedMedia.length > 0) {
|
|
470
|
+
const allExistingImageHashes = await ctx.database.get("cave_hash", { type: "image" });
|
|
471
|
+
for (const media of downloadedMedia) {
|
|
472
|
+
if ([".png", ".jpg", ".jpeg", ".webp"].includes(path2.extname(media.fileName).toLowerCase())) {
|
|
473
|
+
const imageHash = await hashManager.generatePHash(media.buffer);
|
|
474
|
+
for (const existing of allExistingImageHashes) {
|
|
475
|
+
const similarity = hashManager.calculateSimilarity(imageHash, existing.hash);
|
|
476
|
+
if (similarity >= config.imageThreshold) return { duplicate: true, message: `图片与回声洞(${existing.cave})的相似度(${similarity.toFixed(2)}%)超过阈值` };
|
|
477
|
+
}
|
|
478
|
+
imageHashesToStore.push({ hash: imageHash, type: "image" });
|
|
479
|
+
allExistingImageHashes.push({ cave: 0, hash: imageHash, type: "image" });
|
|
476
480
|
}
|
|
477
|
-
imageHashesToStore.push({ hash: imageHash, type: "image" });
|
|
478
|
-
allExistingImageHashes.push({ cave: 0, hash: imageHash, type: "image" });
|
|
479
481
|
}
|
|
480
482
|
}
|
|
483
|
+
return { duplicate: false, textHashesToStore, imageHashesToStore };
|
|
484
|
+
} catch (error) {
|
|
485
|
+
logger2.warn("相似度比较失败:", error);
|
|
486
|
+
return { duplicate: false, textHashesToStore: [], imageHashesToStore: [] };
|
|
481
487
|
}
|
|
482
|
-
return { duplicate: false, textHashesToStore, imageHashesToStore };
|
|
483
488
|
}
|
|
484
489
|
__name(performSimilarityChecks, "performSimilarityChecks");
|
|
485
|
-
async function handleFileUploads(ctx, config, fileManager, logger2, cave, downloadedMedia, reusableIds,
|
|
490
|
+
async function handleFileUploads(ctx, config, fileManager, logger2, cave, downloadedMedia, reusableIds, needsReview) {
|
|
486
491
|
try {
|
|
487
492
|
await Promise.all(downloadedMedia.map((item) => fileManager.saveFile(item.fileName, item.buffer)));
|
|
488
|
-
const needsReview = config.enablePend && session.channelId !== config.adminChannel?.split(":")[1];
|
|
489
493
|
const finalStatus = needsReview ? "pending" : "active";
|
|
490
494
|
await ctx.database.upsert("cave", [{ id: cave.id, status: finalStatus }]);
|
|
491
495
|
return finalStatus;
|
|
@@ -845,37 +849,12 @@ var HashManager = class {
|
|
|
845
849
|
if (!cavesToProcess.length) return "无可修复的回声洞";
|
|
846
850
|
let fixedFiles = 0;
|
|
847
851
|
let errorCount = 0;
|
|
848
|
-
const PNG_SIGNATURE = Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]);
|
|
849
|
-
const JPEG_SIGNATURE = Buffer.from([255, 216]);
|
|
850
|
-
const GIF_SIGNATURE = Buffer.from("GIF");
|
|
851
852
|
for (const cave2 of cavesToProcess) {
|
|
852
853
|
const imageElements = cave2.elements.filter((el) => el.type === "image" && el.file);
|
|
853
854
|
for (const element of imageElements) {
|
|
854
855
|
try {
|
|
855
856
|
const originalBuffer = await this.fileManager.readFile(element.file);
|
|
856
|
-
|
|
857
|
-
if (originalBuffer.slice(0, 8).equals(PNG_SIGNATURE)) {
|
|
858
|
-
const IEND_CHUNK = Buffer.from("IEND");
|
|
859
|
-
const iendIndex = originalBuffer.lastIndexOf(IEND_CHUNK);
|
|
860
|
-
if (iendIndex !== -1) {
|
|
861
|
-
const endOfPngData = iendIndex + 8;
|
|
862
|
-
if (originalBuffer.length > endOfPngData) sanitizedBuffer = originalBuffer.slice(0, endOfPngData);
|
|
863
|
-
}
|
|
864
|
-
} else if (originalBuffer.slice(0, 2).equals(JPEG_SIGNATURE)) {
|
|
865
|
-
const EOI_MARKER = Buffer.from([255, 217]);
|
|
866
|
-
const eoiIndex = originalBuffer.lastIndexOf(EOI_MARKER);
|
|
867
|
-
if (eoiIndex !== -1) {
|
|
868
|
-
const endOfJpegData = eoiIndex + 2;
|
|
869
|
-
if (originalBuffer.length > endOfJpegData) sanitizedBuffer = originalBuffer.slice(0, endOfJpegData);
|
|
870
|
-
}
|
|
871
|
-
} else if (originalBuffer.slice(0, 3).equals(GIF_SIGNATURE)) {
|
|
872
|
-
const GIF_TERMINATOR = Buffer.from([59]);
|
|
873
|
-
const terminatorIndex = originalBuffer.lastIndexOf(GIF_TERMINATOR);
|
|
874
|
-
if (terminatorIndex !== -1) {
|
|
875
|
-
const endOfGifData = terminatorIndex + 1;
|
|
876
|
-
if (originalBuffer.length > endOfGifData) sanitizedBuffer = originalBuffer.slice(0, endOfGifData);
|
|
877
|
-
}
|
|
878
|
-
}
|
|
857
|
+
const sanitizedBuffer = this.sanitizeImageBuffer(originalBuffer);
|
|
879
858
|
if (!originalBuffer.equals(sanitizedBuffer)) {
|
|
880
859
|
await this.fileManager.saveFile(element.file, sanitizedBuffer);
|
|
881
860
|
fixedFiles++;
|
|
@@ -895,6 +874,40 @@ var HashManager = class {
|
|
|
895
874
|
}
|
|
896
875
|
});
|
|
897
876
|
}
|
|
877
|
+
/**
|
|
878
|
+
* @description 扫描并修复单个图片 Buffer,移除文件结束符之后的多余数据。
|
|
879
|
+
* @param imageBuffer - 原始的图片 Buffer。
|
|
880
|
+
* @returns 修复后的图片 Buffer。如果无需修复,则返回原始 Buffer。
|
|
881
|
+
*/
|
|
882
|
+
sanitizeImageBuffer(imageBuffer) {
|
|
883
|
+
const PNG_SIGNATURE = Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]);
|
|
884
|
+
const JPEG_SIGNATURE = Buffer.from([255, 216]);
|
|
885
|
+
const GIF_SIGNATURE = Buffer.from("GIF");
|
|
886
|
+
let sanitizedBuffer = imageBuffer;
|
|
887
|
+
if (imageBuffer.slice(0, 8).equals(PNG_SIGNATURE)) {
|
|
888
|
+
const IEND_CHUNK = Buffer.from("IEND");
|
|
889
|
+
const iendIndex = imageBuffer.lastIndexOf(IEND_CHUNK);
|
|
890
|
+
if (iendIndex !== -1) {
|
|
891
|
+
const endOfPngData = iendIndex + 8;
|
|
892
|
+
if (imageBuffer.length > endOfPngData) sanitizedBuffer = imageBuffer.slice(0, endOfPngData);
|
|
893
|
+
}
|
|
894
|
+
} else if (imageBuffer.slice(0, 2).equals(JPEG_SIGNATURE)) {
|
|
895
|
+
const EOI_MARKER = Buffer.from([255, 217]);
|
|
896
|
+
const eoiIndex = imageBuffer.lastIndexOf(EOI_MARKER);
|
|
897
|
+
if (eoiIndex !== -1) {
|
|
898
|
+
const endOfJpegData = eoiIndex + 2;
|
|
899
|
+
if (imageBuffer.length > endOfJpegData) sanitizedBuffer = imageBuffer.slice(0, endOfJpegData);
|
|
900
|
+
}
|
|
901
|
+
} else if (imageBuffer.slice(0, 3).equals(GIF_SIGNATURE)) {
|
|
902
|
+
const GIF_TERMINATOR = Buffer.from([59]);
|
|
903
|
+
const terminatorIndex = imageBuffer.lastIndexOf(GIF_TERMINATOR);
|
|
904
|
+
if (terminatorIndex !== -1) {
|
|
905
|
+
const endOfGifData = terminatorIndex + 1;
|
|
906
|
+
if (imageBuffer.length > endOfGifData) sanitizedBuffer = imageBuffer.slice(0, endOfGifData);
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
return sanitizedBuffer;
|
|
910
|
+
}
|
|
898
911
|
/**
|
|
899
912
|
* @description 执行一维离散余弦变换 (DCT-II) 的方法。
|
|
900
913
|
* @param input - 输入的数字数组。
|
|
@@ -1045,7 +1058,7 @@ var AIManager = class {
|
|
|
1045
1058
|
for (let i = 0; i < cavesToAnalyze.length; i += batchSize) {
|
|
1046
1059
|
const batch = cavesToAnalyze.slice(i, i + batchSize);
|
|
1047
1060
|
this.logger.info(`[${i + 1}/${cavesToAnalyze.length}] 正在分析 ${batch.length} 条回声洞...`);
|
|
1048
|
-
const successCountInBatch = await this.
|
|
1061
|
+
const successCountInBatch = await this.analyzeAndStore(batch);
|
|
1049
1062
|
totalSuccessCount += successCountInBatch;
|
|
1050
1063
|
}
|
|
1051
1064
|
return `已分析 ${totalSuccessCount} 个回声洞`;
|
|
@@ -1054,98 +1067,130 @@ var AIManager = class {
|
|
|
1054
1067
|
return `操作失败: ${error.message}`;
|
|
1055
1068
|
}
|
|
1056
1069
|
});
|
|
1070
|
+
cave.subcommand(".compare", "比较重复性", { hidden: true }).usage("检查回声洞,找出可能重复的内容。").action(async ({ session }) => {
|
|
1071
|
+
if (requireAdmin(session, this.config)) return requireAdmin(session, this.config);
|
|
1072
|
+
await session.send("正在检查,请稍候...");
|
|
1073
|
+
try {
|
|
1074
|
+
const allMeta = await this.ctx.database.get("cave_meta", {});
|
|
1075
|
+
if (allMeta.length < 2) return "无可比较数据";
|
|
1076
|
+
const allCaves = new Map((await this.ctx.database.get("cave", { status: "active" })).map((c) => [c.id, c]));
|
|
1077
|
+
const foundPairs = /* @__PURE__ */ new Set();
|
|
1078
|
+
const checkedPairs = /* @__PURE__ */ new Set();
|
|
1079
|
+
for (let i = 0; i < allMeta.length; i++) {
|
|
1080
|
+
for (let j = i + 1; j < allMeta.length; j++) {
|
|
1081
|
+
const meta1 = allMeta[i];
|
|
1082
|
+
const meta2 = allMeta[j];
|
|
1083
|
+
const pairKey = [meta1.cave, meta2.cave].sort((a, b) => a - b).join("-");
|
|
1084
|
+
if (checkedPairs.has(pairKey)) continue;
|
|
1085
|
+
const keywords1 = new Set(meta1.keywords);
|
|
1086
|
+
const keywords2 = new Set(meta2.keywords);
|
|
1087
|
+
const intersection = new Set([...keywords1].filter((x) => keywords2.has(x)));
|
|
1088
|
+
const union = /* @__PURE__ */ new Set([...keywords1, ...keywords2]);
|
|
1089
|
+
const similarity = union.size > 0 ? intersection.size / union.size : 0;
|
|
1090
|
+
if (similarity * 100 >= 80) {
|
|
1091
|
+
const cave1 = allCaves.get(meta1.cave);
|
|
1092
|
+
const cave2 = allCaves.get(meta2.cave);
|
|
1093
|
+
if (cave1 && cave2 && await this.isContentDuplicateAI(cave1, cave2)) foundPairs.add(`${cave1.id} & ${cave2.id}`);
|
|
1094
|
+
checkedPairs.add(pairKey);
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
if (foundPairs.size === 0) return "未发现高重复性的内容";
|
|
1099
|
+
let report = `已发现 ${foundPairs.size} 组高重复性的内容:
|
|
1100
|
+
`;
|
|
1101
|
+
report += [...foundPairs].join("\n");
|
|
1102
|
+
return report.trim();
|
|
1103
|
+
} catch (error) {
|
|
1104
|
+
this.logger.error("检查重复性失败:", error);
|
|
1105
|
+
return `检查失败: ${error.message}`;
|
|
1106
|
+
}
|
|
1107
|
+
});
|
|
1057
1108
|
}
|
|
1058
1109
|
/**
|
|
1059
1110
|
* @description 对新提交的内容执行 AI 驱动的查重检查。
|
|
1060
1111
|
* @param {StoredElement[]} newElements - 新提交的内容元素数组。
|
|
1061
1112
|
* @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组。
|
|
1062
|
-
* @returns {Promise<{ duplicate: boolean;
|
|
1113
|
+
* @returns {Promise<{ duplicate: boolean; ids?: number[] }>} 一个 Promise,解析为一个对象,指示内容是否重复以及重复的回声洞 ID 数组(如果存在)。
|
|
1063
1114
|
*/
|
|
1064
1115
|
async checkForDuplicates(newElements, mediaBuffers) {
|
|
1065
1116
|
try {
|
|
1066
1117
|
const dummyCave = { id: 0, elements: newElements, channelId: "", userId: "", userName: "", status: "preload", time: /* @__PURE__ */ new Date() };
|
|
1067
|
-
const
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
const
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
};
|
|
1080
|
-
const
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
};
|
|
1118
|
+
const [newAnalysis] = await this.getAnalyses([dummyCave], mediaBuffers ? new Map(mediaBuffers.map((m) => [m.fileName, m.buffer])) : void 0);
|
|
1119
|
+
if (!newAnalysis?.keywords?.length) return { duplicate: false, ids: [] };
|
|
1120
|
+
const allMeta = await this.ctx.database.get("cave_meta", {}, { fields: ["cave", "keywords"] });
|
|
1121
|
+
const newKeywordsSet = new Set(newAnalysis.keywords);
|
|
1122
|
+
const similarCaveIds = allMeta.filter((meta) => {
|
|
1123
|
+
if (!meta.keywords?.length) return false;
|
|
1124
|
+
const existingKeywordsSet = new Set(meta.keywords);
|
|
1125
|
+
const intersection = new Set([...newKeywordsSet].filter((x) => existingKeywordsSet.has(x)));
|
|
1126
|
+
const union = /* @__PURE__ */ new Set([...newKeywordsSet, ...existingKeywordsSet]);
|
|
1127
|
+
const similarity = union.size > 0 ? intersection.size / union.size : 0;
|
|
1128
|
+
return similarity * 100 >= 80;
|
|
1129
|
+
}).map((meta) => meta.cave);
|
|
1130
|
+
if (similarCaveIds.length === 0) return { duplicate: false, ids: [] };
|
|
1131
|
+
const potentialDuplicates = await this.ctx.database.get("cave", { id: { $in: similarCaveIds } });
|
|
1132
|
+
const duplicateIds = [];
|
|
1133
|
+
for (const existingCave of potentialDuplicates) if (await this.isContentDuplicateAI(dummyCave, existingCave)) duplicateIds.push(existingCave.id);
|
|
1134
|
+
return { duplicate: duplicateIds.length > 0, ids: duplicateIds };
|
|
1085
1135
|
} catch (error) {
|
|
1086
1136
|
this.logger.error("查重回声洞出错:", error);
|
|
1087
|
-
return { duplicate: false };
|
|
1137
|
+
return { duplicate: false, ids: [] };
|
|
1088
1138
|
}
|
|
1089
1139
|
}
|
|
1090
1140
|
/**
|
|
1091
|
-
* @description
|
|
1092
|
-
* @param {CaveObject} cave - 要分析的回声洞对象。
|
|
1093
|
-
* @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组,用于新提交内容的分析。
|
|
1094
|
-
* @returns {Promise<void>} 分析和存储操作完成后解析的 Promise。
|
|
1095
|
-
*/
|
|
1096
|
-
async analyzeAndStore(cave, mediaBuffers) {
|
|
1097
|
-
const mediaMap = mediaBuffers ? new Map(mediaBuffers.map((m) => [m.fileName, m.buffer])) : void 0;
|
|
1098
|
-
const [result] = await this.getAnalyses([cave], mediaMap);
|
|
1099
|
-
if (result) {
|
|
1100
|
-
await this.ctx.database.upsert("cave_meta", [{
|
|
1101
|
-
cave: cave.id,
|
|
1102
|
-
keywords: result.keywords || [],
|
|
1103
|
-
description: result.description || "",
|
|
1104
|
-
rating: Math.max(0, Math.min(100, result.rating || 0))
|
|
1105
|
-
}]);
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
/**
|
|
1109
|
-
* @description 对一批回声洞执行分析并存储结果。
|
|
1141
|
+
* @description 对单个或批量回声洞执行完整的分析和存储流程。
|
|
1110
1142
|
* @param {CaveObject[]} caves - 要分析的回声洞对象数组。
|
|
1143
|
+
* @param {{ fileName: string; buffer: Buffer }[]} [mediaBuffers] - 可选的媒体文件缓冲区数组,仅在分析新内容时使用。
|
|
1111
1144
|
* @returns {Promise<number>} 一个 Promise,解析为成功分析和存储的条目数。
|
|
1112
1145
|
*/
|
|
1113
|
-
async
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1146
|
+
async analyzeAndStore(caves, mediaBuffers) {
|
|
1147
|
+
try {
|
|
1148
|
+
const mediaMap = mediaBuffers ? new Map(mediaBuffers.map((m) => [m.fileName, m.buffer])) : void 0;
|
|
1149
|
+
const results = await this.getAnalyses(caves, mediaMap);
|
|
1150
|
+
if (!results?.length) return 0;
|
|
1151
|
+
const caveMetaObjects = results.map((res) => ({
|
|
1152
|
+
cave: res.id,
|
|
1153
|
+
keywords: res.keywords || [],
|
|
1154
|
+
description: res.description || "",
|
|
1155
|
+
rating: Math.max(0, Math.min(100, res.rating || 0))
|
|
1156
|
+
}));
|
|
1157
|
+
await this.ctx.database.upsert("cave_meta", caveMetaObjects);
|
|
1158
|
+
return caveMetaObjects.length;
|
|
1159
|
+
} catch (error) {
|
|
1160
|
+
const caveIds = caves.map((c) => c.id).join(", ");
|
|
1161
|
+
this.logger.error(`分析回声洞 (${caveIds}) 出错:`, error);
|
|
1162
|
+
return 0;
|
|
1163
|
+
}
|
|
1124
1164
|
}
|
|
1125
1165
|
/**
|
|
1126
|
-
* @description
|
|
1127
|
-
* @param {
|
|
1128
|
-
* @
|
|
1166
|
+
* @description 调用 AI 判断两个回声洞内容是否重复或高度相似。
|
|
1167
|
+
* @param {CaveObject} caveA - 第一个回声洞对象。
|
|
1168
|
+
* @param {CaveObject} caveB - 第二个回声洞对象。
|
|
1169
|
+
* @returns {Promise<boolean>} 如果内容相似则返回 true,否则返回 false。
|
|
1129
1170
|
*/
|
|
1130
|
-
async
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1171
|
+
async isContentDuplicateAI(caveA, caveB) {
|
|
1172
|
+
try {
|
|
1173
|
+
const formatContent = /* @__PURE__ */ __name((elements) => elements.filter((el) => el.type === "text" && el.content).map((el) => el.content).join(" "), "formatContent");
|
|
1174
|
+
const userMessage = {
|
|
1175
|
+
role: "user",
|
|
1176
|
+
content: JSON.stringify({
|
|
1177
|
+
content_a: { id: caveA.id, text: formatContent(caveA.elements) },
|
|
1178
|
+
content_b: { id: caveB.id, text: formatContent(caveB.elements) }
|
|
1179
|
+
})
|
|
1180
|
+
};
|
|
1181
|
+
const prompt = `你是一位内容查重专家。请判断 content_a 和 content_b 是否重复或高度相似。你的回复必须且只能是一个包裹在 \`\`\`json ... \`\`\` 代码块中的 JSON 对象,该对象仅包含一个键 "duplicate" (布尔值)。`;
|
|
1182
|
+
const response = await this.requestAI([userMessage], prompt);
|
|
1183
|
+
return response.duplicate || false;
|
|
1184
|
+
} catch (error) {
|
|
1185
|
+
this.logger.error(`比较回声洞(${caveA.id})与(${caveB.id})失败:`, error);
|
|
1186
|
+
return false;
|
|
1187
|
+
}
|
|
1143
1188
|
}
|
|
1144
1189
|
/**
|
|
1145
1190
|
* @description 为一批回声洞准备内容,并向 AI 发送单个请求以获取所有分析结果。
|
|
1146
1191
|
* @param {CaveObject[]} caves - 要分析的回声洞对象数组。
|
|
1147
1192
|
* @param {Map<string, Buffer>} [mediaBufferMap] - 可选的媒体文件名到其缓冲区的映射。
|
|
1148
|
-
* @returns {Promise<
|
|
1193
|
+
* @returns {Promise<AnalysisResult[]>} 一个 Promise,解析为 AI 返回的分析结果数组。
|
|
1149
1194
|
*/
|
|
1150
1195
|
async getAnalyses(caves, mediaBufferMap) {
|
|
1151
1196
|
const batchPayload = await Promise.all(caves.map(async (cave) => {
|
|
@@ -1167,14 +1212,18 @@ var AIManager = class {
|
|
|
1167
1212
|
const nonEmptyPayload = batchPayload.filter((p) => p.text.trim() || p.images.length > 0);
|
|
1168
1213
|
if (nonEmptyPayload.length === 0) return [];
|
|
1169
1214
|
const userMessage = { role: "user", content: JSON.stringify(nonEmptyPayload) };
|
|
1170
|
-
const
|
|
1215
|
+
const analysePrompt = `你是一位内容分析专家。请使用中文,分析我以JSON格式提供的一组内容,为每一项内容总结关键词、概括内容并评分。你的回复必须且只能是一个包裹在 \`\`\`json ... \`\`\` 代码块中的有效 JSON 对象。该JSON对象应有一个 "analyses" 键,其值为一个数组。数组中的每个对象都必须包含 "id" (整数), "keywords" (字符串数组), "description" (字符串), 和 "rating" (0-100的整数)。`;
|
|
1216
|
+
const response = await this.requestAI([userMessage], analysePrompt);
|
|
1171
1217
|
return response.analyses || [];
|
|
1172
1218
|
}
|
|
1173
1219
|
/**
|
|
1174
|
-
* @description
|
|
1175
|
-
* @
|
|
1220
|
+
* @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑,并稳健地解析 JSON 响应。
|
|
1221
|
+
* @param {any[]} messages - 发送给 AI 的消息数组,遵循 OpenAI 格式。
|
|
1222
|
+
* @param {string} systemPrompt - 系统提示词,用于指导 AI 的行为。
|
|
1223
|
+
* @returns {Promise<T>} 一个 Promise,解析为从 AI 接收到的、解析后的 JSON 对象。
|
|
1224
|
+
* @throws {Error} 当 AI 返回空或无效内容时抛出错误。
|
|
1176
1225
|
*/
|
|
1177
|
-
async
|
|
1226
|
+
async requestAI(messages, systemPrompt) {
|
|
1178
1227
|
const now = Date.now();
|
|
1179
1228
|
if (now > this.rateLimitResetTime) {
|
|
1180
1229
|
this.rateLimitResetTime = now + 6e4;
|
|
@@ -1186,28 +1235,9 @@ var AIManager = class {
|
|
|
1186
1235
|
this.rateLimitResetTime = Date.now() + 6e4;
|
|
1187
1236
|
this.requestCount = 0;
|
|
1188
1237
|
}
|
|
1189
|
-
}
|
|
1190
|
-
/**
|
|
1191
|
-
* @description 封装了向 OpenAI 兼容的 API 发送请求的底层逻辑。
|
|
1192
|
-
* @param {any[]} messages - 发送给 AI 的消息数组,遵循 OpenAI 格式。
|
|
1193
|
-
* @param {string} systemPrompt - 系统提示词,用于指导 AI 的行为。
|
|
1194
|
-
* @param {string} schemaString - 定义期望响应格式的 JSON Schema 字符串。
|
|
1195
|
-
* @returns {Promise<any>} 一个 Promise,解析为从 AI 接收到的、解析后的 JSON 对象。
|
|
1196
|
-
* @throws {Error} 当 AI 返回空或无效内容时抛出错误。
|
|
1197
|
-
*/
|
|
1198
|
-
async requestAI(messages, systemPrompt, schemaString) {
|
|
1199
|
-
await this.ensureRateLimit();
|
|
1200
1238
|
const payload = {
|
|
1201
1239
|
model: this.config.aiModel,
|
|
1202
|
-
messages: [{ role: "system", content: systemPrompt }, ...messages]
|
|
1203
|
-
response_format: {
|
|
1204
|
-
type: "json_schema",
|
|
1205
|
-
json_schema: {
|
|
1206
|
-
name: "extract_data",
|
|
1207
|
-
description: "根据提供的内容提取或分析信息。",
|
|
1208
|
-
schema: JSON.parse(schemaString)
|
|
1209
|
-
}
|
|
1210
|
-
}
|
|
1240
|
+
messages: [{ role: "system", content: systemPrompt }, ...messages]
|
|
1211
1241
|
};
|
|
1212
1242
|
const fullUrl = `${this.config.aiEndpoint.replace(/\/$/, "")}/chat/completions`;
|
|
1213
1243
|
const headers = {
|
|
@@ -1217,8 +1247,22 @@ var AIManager = class {
|
|
|
1217
1247
|
this.requestCount++;
|
|
1218
1248
|
const response = await this.http.post(fullUrl, payload, { headers, timeout: 9e4 });
|
|
1219
1249
|
const content = response.choices?.[0]?.message?.content;
|
|
1220
|
-
if (typeof content
|
|
1221
|
-
|
|
1250
|
+
if (typeof content !== "string" || !content.trim()) throw new Error("响应无效");
|
|
1251
|
+
try {
|
|
1252
|
+
const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
|
|
1253
|
+
const match = content.match(jsonRegex);
|
|
1254
|
+
let jsonString = "";
|
|
1255
|
+
if (match && match[1]) {
|
|
1256
|
+
jsonString = match[1];
|
|
1257
|
+
} else {
|
|
1258
|
+
jsonString = content;
|
|
1259
|
+
}
|
|
1260
|
+
return JSON.parse(jsonString);
|
|
1261
|
+
} catch (error) {
|
|
1262
|
+
this.logger.error("解析 JSON 失败:", error);
|
|
1263
|
+
this.logger.error("原始响应:", content);
|
|
1264
|
+
throw new Error("解析失败");
|
|
1265
|
+
}
|
|
1222
1266
|
}
|
|
1223
1267
|
};
|
|
1224
1268
|
|
|
@@ -1257,62 +1301,7 @@ var Config = import_koishi3.Schema.intersect([
|
|
|
1257
1301
|
aiEndpoint: import_koishi3.Schema.string().description("端点 (Endpoint)").role("link").default("https://generativelanguage.googleapis.com/v1beta/openai"),
|
|
1258
1302
|
aiApiKey: import_koishi3.Schema.string().description("密钥 (Key)").role("secret"),
|
|
1259
1303
|
aiModel: import_koishi3.Schema.string().description("模型 (Model)").default("gemini-2.5-flash"),
|
|
1260
|
-
aiRPM: import_koishi3.Schema.number().description("每分钟请求数 (RPM)").default(60)
|
|
1261
|
-
AnalysePrompt: import_koishi3.Schema.string().role("textarea").default(`你是一位内容分析专家。请分析我以JSON格式提供的一组内容(每项包含ID、文本和图片),为每一项内容总结关键词、概括内容并评分。你需要返回一个包含所有分析结果的JSON对象。`).description("分析 Prompt"),
|
|
1262
|
-
aiAnalyseSchema: import_koishi3.Schema.string().role("textarea").default(
|
|
1263
|
-
`{
|
|
1264
|
-
"type": "object",
|
|
1265
|
-
"properties": {
|
|
1266
|
-
"analyses": {
|
|
1267
|
-
"type": "array",
|
|
1268
|
-
"description": "分析结果的数组",
|
|
1269
|
-
"items": {
|
|
1270
|
-
"type": "object",
|
|
1271
|
-
"properties": {
|
|
1272
|
-
"id": {
|
|
1273
|
-
"type": "integer",
|
|
1274
|
-
"description": "内容的唯一ID"
|
|
1275
|
-
},
|
|
1276
|
-
"keywords": {
|
|
1277
|
-
"type": "array",
|
|
1278
|
-
"items": { "type": "string" },
|
|
1279
|
-
"description": "使用尽可能多的关键词准确形容内容"
|
|
1280
|
-
},
|
|
1281
|
-
"description": {
|
|
1282
|
-
"type": "string",
|
|
1283
|
-
"description": "概括或描述这部分内容"
|
|
1284
|
-
},
|
|
1285
|
-
"rating": {
|
|
1286
|
-
"type": "integer",
|
|
1287
|
-
"description": "对内容的综合质量进行评分",
|
|
1288
|
-
"minimum": 0,
|
|
1289
|
-
"maximum": 100
|
|
1290
|
-
}
|
|
1291
|
-
},
|
|
1292
|
-
"required": ["id", "keywords", "description", "rating"]
|
|
1293
|
-
}
|
|
1294
|
-
}
|
|
1295
|
-
},
|
|
1296
|
-
"required": ["analyses"]
|
|
1297
|
-
}`
|
|
1298
|
-
).description("分析 JSON Schema"),
|
|
1299
|
-
aiCheckPrompt: import_koishi3.Schema.string().role("textarea").default(`你是一位内容查重专家。请判断我提供的"新内容"是否与"已有内容"重复或高度相似。`).description("查重 Prompt"),
|
|
1300
|
-
aiCheckSchema: import_koishi3.Schema.string().role("textarea").default(
|
|
1301
|
-
`{
|
|
1302
|
-
"type": "object",
|
|
1303
|
-
"properties": {
|
|
1304
|
-
"duplicate": {
|
|
1305
|
-
"type": "boolean",
|
|
1306
|
-
"description": "新内容是否与已有内容重复"
|
|
1307
|
-
},
|
|
1308
|
-
"id": {
|
|
1309
|
-
"type": "integer",
|
|
1310
|
-
"description": "如果重复,此为第一个重复的已有内容的ID"
|
|
1311
|
-
}
|
|
1312
|
-
},
|
|
1313
|
-
"required": ["duplicate"]
|
|
1314
|
-
}`
|
|
1315
|
-
).description("查重 JSON Schema")
|
|
1304
|
+
aiRPM: import_koishi3.Schema.number().description("每分钟请求数 (RPM)").default(60)
|
|
1316
1305
|
}).description("模型配置"),
|
|
1317
1306
|
import_koishi3.Schema.object({
|
|
1318
1307
|
localPath: import_koishi3.Schema.string().description("文件映射路径"),
|
|
@@ -1403,17 +1392,18 @@ function apply(ctx, config) {
|
|
|
1403
1392
|
let textHashesToStore = [];
|
|
1404
1393
|
let imageHashesToStore = [];
|
|
1405
1394
|
if (hashManager) {
|
|
1406
|
-
const
|
|
1395
|
+
for (const media of downloadedMedia) media.buffer = hashManager.sanitizeImageBuffer(media.buffer);
|
|
1396
|
+
const checkResult = await performSimilarityChecks(ctx, config, hashManager, logger, finalElementsForDb, downloadedMedia);
|
|
1407
1397
|
if (checkResult.duplicate) return checkResult.message;
|
|
1408
1398
|
textHashesToStore = checkResult.textHashesToStore;
|
|
1409
1399
|
imageHashesToStore = checkResult.imageHashesToStore;
|
|
1410
1400
|
}
|
|
1411
1401
|
if (aiManager) {
|
|
1412
1402
|
const duplicateResult = await aiManager.checkForDuplicates(finalElementsForDb, downloadedMedia);
|
|
1413
|
-
if (duplicateResult && duplicateResult.
|
|
1403
|
+
if (duplicateResult?.duplicate && duplicateResult.ids?.length > 0) return `内容与回声洞(${duplicateResult.ids.join("|")})重复`;
|
|
1414
1404
|
}
|
|
1415
1405
|
const userName = (config.enableName ? await profileManager.getNickname(session.userId) : null) || session.username;
|
|
1416
|
-
const needsReview = config.enablePend && session.
|
|
1406
|
+
const needsReview = config.enablePend && session.cid !== config.adminChannel;
|
|
1417
1407
|
let finalStatus = hasMedia ? "preload" : needsReview ? "pending" : "active";
|
|
1418
1408
|
const newCave = await ctx.database.create("cave", {
|
|
1419
1409
|
id: newId,
|
|
@@ -1424,10 +1414,10 @@ function apply(ctx, config) {
|
|
|
1424
1414
|
status: finalStatus,
|
|
1425
1415
|
time: creationTime
|
|
1426
1416
|
});
|
|
1427
|
-
if (hasMedia) finalStatus = await handleFileUploads(ctx, config, fileManager, logger, newCave, downloadedMedia, reusableIds,
|
|
1417
|
+
if (hasMedia) finalStatus = await handleFileUploads(ctx, config, fileManager, logger, newCave, downloadedMedia, reusableIds, needsReview);
|
|
1428
1418
|
if (finalStatus !== "preload") {
|
|
1429
1419
|
newCave.status = finalStatus;
|
|
1430
|
-
if (aiManager) await aiManager.analyzeAndStore(newCave, downloadedMedia);
|
|
1420
|
+
if (aiManager) await aiManager.analyzeAndStore([newCave], downloadedMedia);
|
|
1431
1421
|
if (hashManager) {
|
|
1432
1422
|
const allHashesToInsert = [...textHashesToStore, ...imageHashesToStore].map((h4) => ({ ...h4, cave: newCave.id }));
|
|
1433
1423
|
if (allHashesToInsert.length > 0) await ctx.database.upsert("cave_hash", allHashesToInsert);
|
|
@@ -1458,7 +1448,7 @@ function apply(ctx, config) {
|
|
|
1458
1448
|
const [targetCave] = await ctx.database.get("cave", { id, status: "active" });
|
|
1459
1449
|
if (!targetCave) return `回声洞(${id})不存在`;
|
|
1460
1450
|
const isAuthor = targetCave.userId === session.userId;
|
|
1461
|
-
const isAdmin = session.
|
|
1451
|
+
const isAdmin = session.cid === config.adminChannel;
|
|
1462
1452
|
if (!isAuthor && !isAdmin) return "你没有权限删除这条回声洞";
|
|
1463
1453
|
await ctx.database.upsert("cave", [{ id, status: "delete" }]);
|
|
1464
1454
|
const caveMessages = await buildCaveMessage(targetCave, config, fileManager, logger, session.platform, "已删除");
|
|
@@ -1471,8 +1461,8 @@ function apply(ctx, config) {
|
|
|
1471
1461
|
});
|
|
1472
1462
|
cave.subcommand(".list", "查询投稿统计").option("user", "-u <user:user> 指定用户").option("all", "-a 查看排行").action(async ({ session, options }) => {
|
|
1473
1463
|
if (options.all) {
|
|
1474
|
-
const
|
|
1475
|
-
if (
|
|
1464
|
+
const adminError = requireAdmin(session, config);
|
|
1465
|
+
if (adminError) return adminError;
|
|
1476
1466
|
try {
|
|
1477
1467
|
const aggregatedStats = await ctx.database.select("cave", { status: "active" }).groupBy(["userId", "userName"], { count: /* @__PURE__ */ __name((row) => import_koishi3.$.count(row.id), "count") }).execute();
|
|
1478
1468
|
if (!aggregatedStats.length) return "目前没有回声洞投稿";
|