@dan-uni/dan-any-plugin-detaolu 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.babelrc.json +12 -0
- package/LICENSE +674 -0
- package/README.md +25 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +732 -0
- package/dist/index.js.LICENSE.txt +6 -0
- package/dist/index.test.d.ts +1 -0
- package/dist/index.umd.min.js +31217 -0
- package/dist/index.umd.min.js.LICENSE.txt +28 -0
- package/dist/pakku.js/index.d.ts +47 -0
- package/dist/pakku.js/similarity_stub.d.ts +9 -0
- package/dist/pakku.js/types.d.ts +70 -0
- package/dist/static/wasm/54a7637a81e5f86e.module.wasm +0 -0
- package/package.json +39 -0
- package/rslib.config.ts +27 -0
- package/src/index.test.ts +39 -0
- package/src/index.ts +70 -0
- package/src/pakku.js/index.ts +543 -0
- package/src/pakku.js/similarity-gen.js +338 -0
- package/src/pakku.js/similarity-gen.wasm +0 -0
- package/src/pakku.js/similarity_stub.ts +95 -0
- package/src/pakku.js/types.ts +196 -0
- package/tsconfig.json +106 -0
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @author: xmcp(代码主要逻辑来源)
|
|
3
|
+
* @see: https://github.com/xmcp/pakku.js
|
|
4
|
+
* @license: GPL-3.0
|
|
5
|
+
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import fs from 'fs-extra'
|
|
9
|
+
import type { DanmuChunk, DanmuClusterOutput, DanmuObject, int } from './types'
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
begin_chunk,
|
|
13
|
+
begin_index_lock,
|
|
14
|
+
detect_similarity,
|
|
15
|
+
init as sim_init,
|
|
16
|
+
} from './similarity_stub'
|
|
17
|
+
import { Queue, Stats } from './types'
|
|
18
|
+
|
|
19
|
+
export const DEFAULT_CONFIG = {
|
|
20
|
+
// 弹幕合并
|
|
21
|
+
/**
|
|
22
|
+
* 时间阈值(合并n秒内的弹幕):
|
|
23
|
+
* 超长(大概 60 秒以上?)的阈值可能会导致程序运行缓慢
|
|
24
|
+
*/
|
|
25
|
+
THRESHOLD: 30,
|
|
26
|
+
/**
|
|
27
|
+
* 编辑距离合并阈值:
|
|
28
|
+
* 根据编辑距离判断不完全一致但内容相近(例如有错别字)的弹幕,
|
|
29
|
+
* 能有效击杀 "<code>你指尖跃动的电光</code>" 和 "<code>你<b>之间</b>跃动的电光</code>" 等
|
|
30
|
+
* @example 禁用(0),轻微(≤3),中等(≤5),强力(≤8)
|
|
31
|
+
*/
|
|
32
|
+
MAX_DIST: 5,
|
|
33
|
+
/**
|
|
34
|
+
* 词频向量合并阈值:
|
|
35
|
+
* 根据 2-Gram 频率向量的夹角判断不完全一致但内容类似的弹幕,
|
|
36
|
+
* 能有效击杀 "<code>yeah!~</code>" 和 "<code>yeah!~yeah!~yeah!~yeah!~</code>" 等
|
|
37
|
+
* @example 禁用(1000),轻微(60%),中等(45%),强力(30%)
|
|
38
|
+
*/
|
|
39
|
+
MAX_COSINE: 45,
|
|
40
|
+
/**
|
|
41
|
+
* 识别谐音弹幕:
|
|
42
|
+
* 将常用汉字转换为拼音再进行比较,
|
|
43
|
+
* 能有效击杀 "<code>布拉迪巴特福来</code>" 和 "<code>布拉迪·八德福莱</code>" 等
|
|
44
|
+
*/
|
|
45
|
+
TRIM_PINYIN: true,
|
|
46
|
+
// 比较文本时:
|
|
47
|
+
TRIM_ENDING: true, // 忽略末尾标点
|
|
48
|
+
TRIM_SPACE: true, // 忽略多余空格
|
|
49
|
+
TRIM_WIDTH: true, // 忽略全半角差异
|
|
50
|
+
|
|
51
|
+
// 例外设置
|
|
52
|
+
FORCELIST: [
|
|
53
|
+
['^23{2,}$', '23333'],
|
|
54
|
+
['^6{3,}$', '66666'],
|
|
55
|
+
], // 强制合并(符合这些规则的弹幕,在比较是否相同时会先进行替换)
|
|
56
|
+
WHITELIST: [] as [string, string][], // 强制忽略(符合这些规则的弹幕,即使内容相同也不会被合并)
|
|
57
|
+
BLACKLIST: [] as [string, string][], // 强制删除(符合这些规则的弹幕,会直接被删除)
|
|
58
|
+
CROSS_MODE: true, // 合并不同类型的弹幕(取消勾选后,底部弹幕不会跟滚动弹幕合并到一起)
|
|
59
|
+
// 放过特定类型的弹幕:
|
|
60
|
+
PROC_TYPE7: true, // 高级弹幕
|
|
61
|
+
PROC_TYPE4: true, // 底部弹幕
|
|
62
|
+
PROC_POOL1: false, // 字幕弹幕(位于弹幕池1)
|
|
63
|
+
|
|
64
|
+
// // 显示设置
|
|
65
|
+
// DANMU_MARK: 'prefix' as 'prefix' | 'suffix' | 'off', // 弹幕数量标记(开头/结尾/关闭)
|
|
66
|
+
// MARK_THRESHOLD: 1, // 仅当数字大于n时显示
|
|
67
|
+
// DANMU_SUBSCRIPT: true, // 数量标记显示成下标(₍₂₎/[x2])
|
|
68
|
+
// // ENLARGE: true,
|
|
69
|
+
// // SHRINK_THRESHOLD: 0,
|
|
70
|
+
// /**
|
|
71
|
+
// * 自动弹幕优选:
|
|
72
|
+
// * 瞬时弹幕密度大于阈值时,按比例删除低权重弹幕,优先删除未合并弹幕
|
|
73
|
+
// * @example 禁用(0),轻微(>120),中等(>75),强力(>50)
|
|
74
|
+
// */
|
|
75
|
+
// DROP_THRESHOLD: 0,
|
|
76
|
+
// /**
|
|
77
|
+
// * 合并后尽量显示为固定弹幕:
|
|
78
|
+
// * 滚动弹幕和顶部 / 底部弹幕合并后显示在顶部 / 底部
|
|
79
|
+
// */
|
|
80
|
+
// MODE_ELEVATION: true,
|
|
81
|
+
// /**
|
|
82
|
+
// * 合并后的弹幕显示于n百分位弹幕的时间点
|
|
83
|
+
// * @example 0%(0),20%(20),50%(50)
|
|
84
|
+
// */
|
|
85
|
+
// REPRESENTATIVE_PERCENT: 20,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export type Config = Partial<typeof DEFAULT_CONFIG>
|
|
89
|
+
|
|
90
|
+
interface DanmuIr {
|
|
91
|
+
obj: DanmuObject
|
|
92
|
+
str: string // for similarity algorithm
|
|
93
|
+
idx: int
|
|
94
|
+
sim_reason: string
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ')
|
|
98
|
+
// const TRIM_EXTRA_SPACE_RE = /[ \u3000]+/g
|
|
99
|
+
// const TRIM_CJK_SPACE_RE =
|
|
100
|
+
// /([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g
|
|
101
|
+
const WIDTH_TABLE = new Map(
|
|
102
|
+
Object.entries({
|
|
103
|
+
' ': ' ',
|
|
104
|
+
'1': '1',
|
|
105
|
+
'2': '2',
|
|
106
|
+
'3': '3',
|
|
107
|
+
'4': '4',
|
|
108
|
+
'5': '5',
|
|
109
|
+
'6': '6',
|
|
110
|
+
'7': '7',
|
|
111
|
+
'8': '8',
|
|
112
|
+
'9': '9',
|
|
113
|
+
'0': '0',
|
|
114
|
+
'!': '!',
|
|
115
|
+
'@': '@',
|
|
116
|
+
'#': '#',
|
|
117
|
+
'$': '$',
|
|
118
|
+
'%': '%',
|
|
119
|
+
'^': '^',
|
|
120
|
+
'&': '&',
|
|
121
|
+
'*': '*',
|
|
122
|
+
'(': '(',
|
|
123
|
+
')': ')',
|
|
124
|
+
'-': '-',
|
|
125
|
+
'=': '=',
|
|
126
|
+
'_': '_',
|
|
127
|
+
'+': '+',
|
|
128
|
+
'[': '[',
|
|
129
|
+
']': ']',
|
|
130
|
+
'{': '{',
|
|
131
|
+
'}': '}',
|
|
132
|
+
';': ';',
|
|
133
|
+
''': "'",
|
|
134
|
+
':': ':',
|
|
135
|
+
'"': '"',
|
|
136
|
+
',': ',',
|
|
137
|
+
'.': '.',
|
|
138
|
+
'/': '/',
|
|
139
|
+
'<': '<',
|
|
140
|
+
'>': '>',
|
|
141
|
+
'?': '?',
|
|
142
|
+
'\': '\\',
|
|
143
|
+
'|': '|',
|
|
144
|
+
'`': '`',
|
|
145
|
+
'~': '~',
|
|
146
|
+
q: 'q',
|
|
147
|
+
w: 'w',
|
|
148
|
+
e: 'e',
|
|
149
|
+
r: 'r',
|
|
150
|
+
t: 't',
|
|
151
|
+
y: 'y',
|
|
152
|
+
u: 'u',
|
|
153
|
+
i: 'i',
|
|
154
|
+
o: 'o',
|
|
155
|
+
p: 'p',
|
|
156
|
+
a: 'a',
|
|
157
|
+
s: 's',
|
|
158
|
+
d: 'd',
|
|
159
|
+
f: 'f',
|
|
160
|
+
g: 'g',
|
|
161
|
+
h: 'h',
|
|
162
|
+
j: 'j',
|
|
163
|
+
k: 'k',
|
|
164
|
+
l: 'l',
|
|
165
|
+
z: 'z',
|
|
166
|
+
x: 'x',
|
|
167
|
+
c: 'c',
|
|
168
|
+
v: 'v',
|
|
169
|
+
b: 'b',
|
|
170
|
+
n: 'n',
|
|
171
|
+
m: 'm',
|
|
172
|
+
Q: 'Q',
|
|
173
|
+
W: 'W',
|
|
174
|
+
E: 'E',
|
|
175
|
+
R: 'R',
|
|
176
|
+
T: 'T',
|
|
177
|
+
Y: 'Y',
|
|
178
|
+
U: 'U',
|
|
179
|
+
I: 'I',
|
|
180
|
+
O: 'O',
|
|
181
|
+
P: 'P',
|
|
182
|
+
A: 'A',
|
|
183
|
+
S: 'S',
|
|
184
|
+
D: 'D',
|
|
185
|
+
F: 'F',
|
|
186
|
+
G: 'G',
|
|
187
|
+
H: 'H',
|
|
188
|
+
J: 'J',
|
|
189
|
+
K: 'K',
|
|
190
|
+
L: 'L',
|
|
191
|
+
Z: 'Z',
|
|
192
|
+
X: 'X',
|
|
193
|
+
C: 'C',
|
|
194
|
+
V: 'V',
|
|
195
|
+
B: 'B',
|
|
196
|
+
N: 'N',
|
|
197
|
+
M: 'M',
|
|
198
|
+
}),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* 反套路
|
|
203
|
+
*/
|
|
204
|
+
const detaolu = (inp: string, config: Config) => {
|
|
205
|
+
const TRIM_ENDING = config.TRIM_ENDING
|
|
206
|
+
const TRIM_SPACE = config.TRIM_SPACE
|
|
207
|
+
const TRIM_WIDTH = config.TRIM_WIDTH
|
|
208
|
+
const FORCELIST = (config?.FORCELIST ?? DEFAULT_CONFIG.FORCELIST).map(
|
|
209
|
+
([pattern, repl]) => [new RegExp(pattern, 'gi'), repl] as [RegExp, string],
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
let len = inp.length
|
|
213
|
+
let text = ''
|
|
214
|
+
|
|
215
|
+
if (TRIM_ENDING) {
|
|
216
|
+
while (ENDING_CHARS.has(inp.charAt(len - 1)))
|
|
217
|
+
// assert str.charAt(-1)===''
|
|
218
|
+
len--
|
|
219
|
+
if (len === 0)
|
|
220
|
+
// all chars are ending chars, do nothing
|
|
221
|
+
len = inp.length
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (TRIM_WIDTH) {
|
|
225
|
+
for (let i = 0; i < len; i++) {
|
|
226
|
+
const c = inp.charAt(i)
|
|
227
|
+
text += WIDTH_TABLE.get(c) || c
|
|
228
|
+
}
|
|
229
|
+
} else {
|
|
230
|
+
text = inp.slice(0, len)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (TRIM_SPACE) {
|
|
234
|
+
// text = text
|
|
235
|
+
// .replace(TRIM_EXTRA_SPACE_RE, ' ')
|
|
236
|
+
// .replace(TRIM_CJK_SPACE_RE, '$1')
|
|
237
|
+
text = text
|
|
238
|
+
.replaceAll(/[ \u3000]+/g, ' ')
|
|
239
|
+
.replaceAll(
|
|
240
|
+
/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g,
|
|
241
|
+
'$1',
|
|
242
|
+
)
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
for (const taolu of FORCELIST) {
|
|
246
|
+
if (taolu[0].test(text)) {
|
|
247
|
+
text = text.replace(taolu[0], taolu[1])
|
|
248
|
+
return [true, text]
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return [false, text]
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* 白名单处理
|
|
257
|
+
*/
|
|
258
|
+
const whitelisted = (text: string, config: Config) => {
|
|
259
|
+
const WHITELIST = (config?.WHITELIST ?? DEFAULT_CONFIG.WHITELIST).map(
|
|
260
|
+
(x) => new RegExp(x[0], 'i'),
|
|
261
|
+
)
|
|
262
|
+
if (WHITELIST.length === 0) return false
|
|
263
|
+
else return WHITELIST.some((re) => re.test(text))
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* 黑名单处理
|
|
268
|
+
*/
|
|
269
|
+
const blacklisted = (text: string, config: Config) => {
|
|
270
|
+
const BLACKLIST = (config?.BLACKLIST ?? DEFAULT_CONFIG.BLACKLIST).map((x) =>
|
|
271
|
+
x[0] ? new RegExp(x[1]) : x[1].toLowerCase(),
|
|
272
|
+
)
|
|
273
|
+
if (BLACKLIST.length === 0) return null
|
|
274
|
+
else {
|
|
275
|
+
const lower = text.toLowerCase()
|
|
276
|
+
for (const pattern of BLACKLIST) {
|
|
277
|
+
const matched =
|
|
278
|
+
typeof pattern === 'string'
|
|
279
|
+
? lower.includes(pattern)
|
|
280
|
+
: pattern.test(text)
|
|
281
|
+
if (matched) {
|
|
282
|
+
return typeof pattern === 'string'
|
|
283
|
+
? ` ${pattern}`
|
|
284
|
+
: ` /${pattern.source}/`
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return null
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function extract_special_danmu(text: string): string {
|
|
292
|
+
try {
|
|
293
|
+
text = JSON.parse(text)[4]
|
|
294
|
+
} catch {}
|
|
295
|
+
return text
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* 删除换行符/制表符
|
|
300
|
+
*/
|
|
301
|
+
function trim_dispstr(text: string): string {
|
|
302
|
+
return text.replaceAll(/([\r\n\t])/g, '').trim()
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* 选取中间值(最多出现的文字)
|
|
307
|
+
*/
|
|
308
|
+
function select_median_length(strs: string[]): string {
|
|
309
|
+
if (strs.length === 1) return strs[0]
|
|
310
|
+
|
|
311
|
+
const sorted = strs.sort((a, b) => a.length - b.length)
|
|
312
|
+
const mid = Math.floor(sorted.length / 2)
|
|
313
|
+
return sorted[mid]
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async function load_wasm(wasm_mod?: ArrayBuffer) {
|
|
317
|
+
await sim_init(
|
|
318
|
+
wasm_mod ??
|
|
319
|
+
(await fs.readFile(new URL('./similarity-gen.wasm', import.meta.url))),
|
|
320
|
+
)
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
async function merge(
|
|
324
|
+
chunk: DanmuChunk<DanmuObject>,
|
|
325
|
+
// next_chunk: DanmuChunk<DanmuObject>,
|
|
326
|
+
config: Config = DEFAULT_CONFIG,
|
|
327
|
+
): Promise<DanmuClusterOutput> {
|
|
328
|
+
await load_wasm()
|
|
329
|
+
|
|
330
|
+
begin_chunk(config)
|
|
331
|
+
|
|
332
|
+
const ret: DanmuClusterOutput = {
|
|
333
|
+
clusters: [],
|
|
334
|
+
stats: new Stats(),
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function apply_single_cluster(idx: int, obj: DanmuObject, desc: string) {
|
|
338
|
+
ret.clusters.push({
|
|
339
|
+
peers_ptr: [[idx, 'IGN']],
|
|
340
|
+
desc: [desc],
|
|
341
|
+
chosen_str: obj.content,
|
|
342
|
+
// danuni
|
|
343
|
+
danuni_count: 1,
|
|
344
|
+
// danuni_senders: [obj.danuni_sender],
|
|
345
|
+
danuni_dans: [obj],
|
|
346
|
+
})
|
|
347
|
+
}
|
|
348
|
+
function apply_cluster(irs: DanmuIr[]) {
|
|
349
|
+
if (irs.length === 1) {
|
|
350
|
+
ret.clusters.push({
|
|
351
|
+
peers_ptr: irs.map((ir) => [ir.idx, ir.sim_reason]),
|
|
352
|
+
desc: [],
|
|
353
|
+
chosen_str: irs[0].obj.content, // do not use detaolued str for single danmu
|
|
354
|
+
// danuni
|
|
355
|
+
danuni_count: irs.length,
|
|
356
|
+
// danuni_senders: irs.map((ir) => ir.obj.danuni_sender),
|
|
357
|
+
danuni_dans: irs.map((ir) => ir.obj),
|
|
358
|
+
})
|
|
359
|
+
} else {
|
|
360
|
+
const text_cnts = new Map()
|
|
361
|
+
let most_texts: string[] = [],
|
|
362
|
+
most_cnt = 0
|
|
363
|
+
|
|
364
|
+
for (const ir of irs) {
|
|
365
|
+
const text = ir.str
|
|
366
|
+
const cnt = 1 + (text_cnts.get(text) || 0)
|
|
367
|
+
text_cnts.set(text, cnt)
|
|
368
|
+
|
|
369
|
+
if (cnt > most_cnt) {
|
|
370
|
+
most_texts = [text]
|
|
371
|
+
most_cnt = cnt
|
|
372
|
+
} else if (cnt === most_cnt) {
|
|
373
|
+
most_texts.push(text)
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const most_text = select_median_length(most_texts)
|
|
378
|
+
|
|
379
|
+
ret.clusters.push({
|
|
380
|
+
peers_ptr: irs.map((ir) => [ir.idx, ir.sim_reason]),
|
|
381
|
+
desc: most_cnt > 1 ? [`采用了出现 ${most_cnt} 次的文本`] : [],
|
|
382
|
+
chosen_str: most_text,
|
|
383
|
+
// danuni
|
|
384
|
+
danuni_count: most_cnt,
|
|
385
|
+
// danuni_senders: irs.map((ir) => ir.obj.danuni_sender),
|
|
386
|
+
danuni_dans: irs.map((ir) => ir.obj),
|
|
387
|
+
})
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function obj_to_ir(objs: DanmuObject[], s: Stats | null): DanmuIr[] {
|
|
392
|
+
return objs
|
|
393
|
+
.map((obj, idx) => {
|
|
394
|
+
if (!config.PROC_POOL1 && obj.pool === 1) {
|
|
395
|
+
if (s) {
|
|
396
|
+
s.ignored_type++
|
|
397
|
+
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改')
|
|
398
|
+
}
|
|
399
|
+
return null
|
|
400
|
+
}
|
|
401
|
+
// if (!config.PROC_TYPE7 && obj.mode === 7) {
|
|
402
|
+
if (!config.PROC_TYPE7 && obj.mode === 4) {
|
|
403
|
+
if (s) {
|
|
404
|
+
s.ignored_type++
|
|
405
|
+
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改')
|
|
406
|
+
}
|
|
407
|
+
return null
|
|
408
|
+
}
|
|
409
|
+
// if (!config.PROC_TYPE4 && obj.mode === 4) {
|
|
410
|
+
if (!config.PROC_TYPE4 && obj.mode === 1) {
|
|
411
|
+
if (s) {
|
|
412
|
+
s.ignored_type++
|
|
413
|
+
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改')
|
|
414
|
+
}
|
|
415
|
+
return null
|
|
416
|
+
}
|
|
417
|
+
// if (obj.mode === 8) {
|
|
418
|
+
// if (s) {
|
|
419
|
+
// s.ignored_script++
|
|
420
|
+
// apply_single_cluster(idx, obj, '代码弹幕')
|
|
421
|
+
// }
|
|
422
|
+
// return null
|
|
423
|
+
// }
|
|
424
|
+
// if (obj.mode === 9) {
|
|
425
|
+
// if (s) {
|
|
426
|
+
// s.ignored_script++
|
|
427
|
+
// apply_single_cluster(idx, obj, 'BAS弹幕')
|
|
428
|
+
// }
|
|
429
|
+
// return null
|
|
430
|
+
// }
|
|
431
|
+
|
|
432
|
+
const disp_str = trim_dispstr(
|
|
433
|
+
// obj.mode === 7 && obj.content[0] === '['
|
|
434
|
+
obj.mode === 4 && obj.content[0] === '['
|
|
435
|
+
? extract_special_danmu(obj.content)
|
|
436
|
+
: obj.content,
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
// if (obj.mode !== 8 && obj.mode !== 9) {
|
|
440
|
+
if (obj.mode !== 4) {
|
|
441
|
+
const matched = blacklisted(disp_str, config)
|
|
442
|
+
if (matched) {
|
|
443
|
+
if (s) {
|
|
444
|
+
s.deleted_blacklist++
|
|
445
|
+
s.deleted_blacklist_each[matched] =
|
|
446
|
+
(s.deleted_blacklist_each[matched] || 0) + 1
|
|
447
|
+
}
|
|
448
|
+
return null
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
if (whitelisted(disp_str, config)) {
|
|
452
|
+
if (s) {
|
|
453
|
+
s.ignored_whitelist++
|
|
454
|
+
apply_single_cluster(idx, obj, '命中白名单')
|
|
455
|
+
}
|
|
456
|
+
return null
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
const [matched_taolu, detaolued] = detaolu(disp_str, config)
|
|
460
|
+
|
|
461
|
+
if (matched_taolu && s) {
|
|
462
|
+
s.num_taolu_matched++
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
obj,
|
|
467
|
+
str: detaolued,
|
|
468
|
+
idx,
|
|
469
|
+
sim_reason: 'ORIG',
|
|
470
|
+
}
|
|
471
|
+
})
|
|
472
|
+
.filter((obj) => obj !== null) as DanmuIr[]
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const danmus = obj_to_ir(chunk.objs, ret.stats)
|
|
476
|
+
// const next_chunk_danmus = obj_to_ir(next_chunk.objs, null)
|
|
477
|
+
|
|
478
|
+
const nearby_danmus: Queue<DanmuIr[]> = new Queue()
|
|
479
|
+
|
|
480
|
+
const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000
|
|
481
|
+
|
|
482
|
+
for (const dm of danmus) {
|
|
483
|
+
while (true) {
|
|
484
|
+
const peeked = nearby_danmus.peek()
|
|
485
|
+
if (
|
|
486
|
+
peeked === null ||
|
|
487
|
+
dm.obj.time_ms - peeked[0].obj.time_ms <= THRESHOLD_MS
|
|
488
|
+
)
|
|
489
|
+
break
|
|
490
|
+
apply_cluster(peeked)
|
|
491
|
+
nearby_danmus.pop()
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
const sim = detect_similarity(
|
|
495
|
+
dm.str,
|
|
496
|
+
dm.obj.mode,
|
|
497
|
+
nearby_danmus.index_l,
|
|
498
|
+
ret.stats,
|
|
499
|
+
)
|
|
500
|
+
if (sim !== null) {
|
|
501
|
+
const candidate =
|
|
502
|
+
nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff]
|
|
503
|
+
dm.sim_reason = sim.reason
|
|
504
|
+
candidate.push(dm)
|
|
505
|
+
} else {
|
|
506
|
+
nearby_danmus.push([dm])
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// now process last few clusters with the next chunk
|
|
511
|
+
begin_index_lock()
|
|
512
|
+
// outer: for (const dm of next_chunk_danmus) {
|
|
513
|
+
// while (true) {
|
|
514
|
+
// const peeked = nearby_danmus.peek()
|
|
515
|
+
// if (peeked === null) break outer
|
|
516
|
+
// if (dm.obj.time_ms - peeked[0].obj.time_ms <= THRESHOLD_MS) break
|
|
517
|
+
// apply_cluster(peeked)
|
|
518
|
+
// nearby_danmus.pop()
|
|
519
|
+
// }
|
|
520
|
+
|
|
521
|
+
// const sim = detect_similarity(
|
|
522
|
+
// dm.str,
|
|
523
|
+
// dm.obj.mode,
|
|
524
|
+
// nearby_danmus.index_l,
|
|
525
|
+
// ret.stats,
|
|
526
|
+
// )
|
|
527
|
+
// if (sim !== null) {
|
|
528
|
+
// const candidate =
|
|
529
|
+
// nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff]
|
|
530
|
+
// dm.sim_reason = sim.reason
|
|
531
|
+
// candidate.push(dm)
|
|
532
|
+
// }
|
|
533
|
+
// }
|
|
534
|
+
|
|
535
|
+
// finally apply remaining clusters
|
|
536
|
+
for (const candidate of nearby_danmus) {
|
|
537
|
+
apply_cluster(candidate)
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
return ret
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
export default merge
|