@dan-uni/dan-any-plugin-detaolu 1.4.8 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -14
- package/dist/index.d.mts +66 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +2 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +46 -23
- package/.babelrc.json +0 -12
- package/dist/index.d.ts +0 -4
- package/dist/index.js +0 -2775
- package/dist/index.js.LICENSE.txt +0 -9
- package/dist/index.test.d.ts +0 -1
- package/dist/index.umd.min.js +0 -42802
- package/dist/index.umd.min.js.LICENSE.txt +0 -46
- package/dist/pakku.js/index.d.ts +0 -70
- package/dist/pakku.js/similarity_stub.d.ts +0 -9
- package/dist/pakku.js/types.d.ts +0 -76
- package/rslib.config.ts +0 -27
- package/src/index.test.ts +0 -39
- package/src/index.ts +0 -72
- package/src/pakku.js/index.ts +0 -615
- package/src/pakku.js/similarity-gen.js +0 -343
- package/src/pakku.js/similarity-gen.wasm +0 -0
- package/src/pakku.js/similarity_stub.ts +0 -113
- package/src/pakku.js/types.ts +0 -203
- package/tsconfig.json +0 -106
- package/tsconfig.tsbuildinfo +0 -1
- /package/dist/{static/wasm/21072e5de5.module.wasm → 73f7e7f2dd1a4caa.wasm} +0 -0
package/src/pakku.js/index.ts
DELETED
|
@@ -1,615 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @author: xmcp(代码主要逻辑来源)
|
|
3
|
-
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
4
|
-
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
|
|
5
|
-
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
|
|
6
|
-
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
|
|
7
|
-
* @license: GPL-3.0
|
|
8
|
-
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import fs from 'fs-extra'
|
|
12
|
-
import type { DanmuChunk, DanmuClusterOutput, DanmuObject, int } from './types'
|
|
13
|
-
|
|
14
|
-
import {
|
|
15
|
-
begin_chunk,
|
|
16
|
-
begin_index_lock,
|
|
17
|
-
detect_similarity,
|
|
18
|
-
init as sim_init,
|
|
19
|
-
} from './similarity_stub'
|
|
20
|
-
import { Queue, Stats } from './types'
|
|
21
|
-
|
|
22
|
-
export const DEFAULT_CONFIG = {
|
|
23
|
-
// 弹幕合并
|
|
24
|
-
/**
|
|
25
|
-
* 时间阈值:合并时间差在n秒之内的重复弹幕
|
|
26
|
-
* 超长(大概 60 秒以上?)的阈值可能会导致程序运行缓慢
|
|
27
|
-
*/
|
|
28
|
-
THRESHOLD: 30,
|
|
29
|
-
/**
|
|
30
|
-
* 编辑距离合并阈值:
|
|
31
|
-
* 根据编辑距离判断不完全一致但内容相近(例如有错别字)的弹幕
|
|
32
|
-
* 能有效击杀 "你指尖跃动的电光" 和 "你之间跃动的电光" 等
|
|
33
|
-
* @example 禁用(0), 轻微(≤3), 中等(≤5), 强力(≤8)
|
|
34
|
-
*/
|
|
35
|
-
MAX_DIST: 5,
|
|
36
|
-
/**
|
|
37
|
-
* 词频向量合并阈值:
|
|
38
|
-
* 根据 2-Gram 频率向量的夹角判断不完全一致但内容类似的弹幕
|
|
39
|
-
* 能有效击杀 "yeah!~" 和 "yeah!~yeah!~yeah!~yeah!~" 等
|
|
40
|
-
* @example 禁用(1000), 轻微(60%), 中等(45%), 强力(30%)
|
|
41
|
-
*/
|
|
42
|
-
MAX_COSINE: 45,
|
|
43
|
-
/**
|
|
44
|
-
* 识别谐音弹幕:
|
|
45
|
-
* 将常用汉字转换为拼音再进行比较
|
|
46
|
-
* 能有效击杀 "布拉迪巴特福来" 和 "布拉迪·八德福莱" 等
|
|
47
|
-
*/
|
|
48
|
-
TRIM_PINYIN: true,
|
|
49
|
-
// 比较文本时:
|
|
50
|
-
TRIM_ENDING: true, // 忽略末尾标点
|
|
51
|
-
TRIM_SPACE: true, // 忽略多余空格
|
|
52
|
-
TRIM_WIDTH: true, // 忽略全半角差异
|
|
53
|
-
|
|
54
|
-
// 例外设置
|
|
55
|
-
/**
|
|
56
|
-
* 内容替换:符合这些规则的弹幕,判断是否合并前会先对内容进行替换
|
|
57
|
-
*/
|
|
58
|
-
FORCELIST: [
|
|
59
|
-
['^23{2,}$', '23333'],
|
|
60
|
-
['^6{3,}$', '66666'],
|
|
61
|
-
],
|
|
62
|
-
/**
|
|
63
|
-
* 内容替换规则命中时:继续尝试匹配后续规则
|
|
64
|
-
*/
|
|
65
|
-
FORCELIST_CONTINUE_ON_MATCH: true,
|
|
66
|
-
/**
|
|
67
|
-
* 内容替换规则命中时:即使未触发合并也使用替换后的文本
|
|
68
|
-
*/
|
|
69
|
-
FORCELIST_APPLY_SINGULAR: false,
|
|
70
|
-
/**
|
|
71
|
-
* 强制忽略:符合这些规则的弹幕不会被合并,优先级高于内容替换规则
|
|
72
|
-
*/
|
|
73
|
-
WHITELIST: [] as [string, string][],
|
|
74
|
-
/**
|
|
75
|
-
* 强制删除:符合这些规则的弹幕会直接被删除(未实现)
|
|
76
|
-
*/
|
|
77
|
-
BLACKLIST: [] as [string, string][],
|
|
78
|
-
/**
|
|
79
|
-
* 合并不同类型的弹幕(取消勾选后,底部弹幕不会跟滚动弹幕合并到一起)
|
|
80
|
-
*/
|
|
81
|
-
CROSS_MODE: true,
|
|
82
|
-
// 放过特定类型的弹幕:
|
|
83
|
-
PROC_TYPE7: true, // 高级弹幕(特殊弹幕)
|
|
84
|
-
PROC_TYPE4: true, // 底部弹幕
|
|
85
|
-
PROC_POOL1: false, // 字幕弹幕(位于弹幕池1)
|
|
86
|
-
|
|
87
|
-
// // 显示设置
|
|
88
|
-
// DANMU_MARK: 'prefix' as 'prefix' | 'suffix' | 'off', // 弹幕数量标记(开头/结尾/关闭)
|
|
89
|
-
// MARK_THRESHOLD: 1, // 仅当数字大于n时显示
|
|
90
|
-
// DANMU_SUBSCRIPT: true, // 数量标记显示成下标(₍₂₎/[x2])
|
|
91
|
-
// // ENLARGE: true,
|
|
92
|
-
// // SHRINK_THRESHOLD: 0,
|
|
93
|
-
// /**
|
|
94
|
-
// * 自动弹幕优选:
|
|
95
|
-
// * 瞬时弹幕密度大于阈值时,按比例删除低权重弹幕,优先删除未合并弹幕
|
|
96
|
-
// * @example 禁用(0),轻微(>120),中等(>75),强力(>50)
|
|
97
|
-
// */
|
|
98
|
-
// DROP_THRESHOLD: 0,
|
|
99
|
-
// /**
|
|
100
|
-
// * 合并后尽量显示为固定弹幕:
|
|
101
|
-
// * 滚动弹幕和顶部 / 底部弹幕合并后显示在顶部 / 底部
|
|
102
|
-
// */
|
|
103
|
-
// MODE_ELEVATION: true,
|
|
104
|
-
// /**
|
|
105
|
-
// * 合并后的弹幕显示于n百分位弹幕的时间点
|
|
106
|
-
// * @example 0%(0),20%(20),50%(50)
|
|
107
|
-
// */
|
|
108
|
-
// REPRESENTATIVE_PERCENT: 20,
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
export type Config = Partial<typeof DEFAULT_CONFIG>
|
|
112
|
-
type ResolvedConfig = typeof DEFAULT_CONFIG
|
|
113
|
-
|
|
114
|
-
interface DanmuIr {
|
|
115
|
-
obj: DanmuObject
|
|
116
|
-
str: string // for similarity algorithm
|
|
117
|
-
ptr_idx: int
|
|
118
|
-
sim_reason: string
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ')
|
|
122
|
-
// const TRIM_EXTRA_SPACE_RE = /[ \u3000]+/g
|
|
123
|
-
// const TRIM_CJK_SPACE_RE =
|
|
124
|
-
// /([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g
|
|
125
|
-
const WIDTH_TABLE = new Map(
|
|
126
|
-
Object.entries({
|
|
127
|
-
' ': ' ',
|
|
128
|
-
'1': '1',
|
|
129
|
-
'2': '2',
|
|
130
|
-
'3': '3',
|
|
131
|
-
'4': '4',
|
|
132
|
-
'5': '5',
|
|
133
|
-
'6': '6',
|
|
134
|
-
'7': '7',
|
|
135
|
-
'8': '8',
|
|
136
|
-
'9': '9',
|
|
137
|
-
'0': '0',
|
|
138
|
-
'!': '!',
|
|
139
|
-
'@': '@',
|
|
140
|
-
'#': '#',
|
|
141
|
-
'$': '$',
|
|
142
|
-
'%': '%',
|
|
143
|
-
'^': '^',
|
|
144
|
-
'&': '&',
|
|
145
|
-
'*': '*',
|
|
146
|
-
'(': '(',
|
|
147
|
-
')': ')',
|
|
148
|
-
'-': '-',
|
|
149
|
-
'=': '=',
|
|
150
|
-
'_': '_',
|
|
151
|
-
'+': '+',
|
|
152
|
-
'[': '[',
|
|
153
|
-
']': ']',
|
|
154
|
-
'{': '{',
|
|
155
|
-
'}': '}',
|
|
156
|
-
';': ';',
|
|
157
|
-
''': "'",
|
|
158
|
-
':': ':',
|
|
159
|
-
'"': '"',
|
|
160
|
-
',': ',',
|
|
161
|
-
'.': '.',
|
|
162
|
-
'/': '/',
|
|
163
|
-
'<': '<',
|
|
164
|
-
'>': '>',
|
|
165
|
-
'?': '?',
|
|
166
|
-
'\': '\\',
|
|
167
|
-
'|': '|',
|
|
168
|
-
'`': '`',
|
|
169
|
-
'~': '~',
|
|
170
|
-
q: 'q',
|
|
171
|
-
w: 'w',
|
|
172
|
-
e: 'e',
|
|
173
|
-
r: 'r',
|
|
174
|
-
t: 't',
|
|
175
|
-
y: 'y',
|
|
176
|
-
u: 'u',
|
|
177
|
-
i: 'i',
|
|
178
|
-
o: 'o',
|
|
179
|
-
p: 'p',
|
|
180
|
-
a: 'a',
|
|
181
|
-
s: 's',
|
|
182
|
-
d: 'd',
|
|
183
|
-
f: 'f',
|
|
184
|
-
g: 'g',
|
|
185
|
-
h: 'h',
|
|
186
|
-
j: 'j',
|
|
187
|
-
k: 'k',
|
|
188
|
-
l: 'l',
|
|
189
|
-
z: 'z',
|
|
190
|
-
x: 'x',
|
|
191
|
-
c: 'c',
|
|
192
|
-
v: 'v',
|
|
193
|
-
b: 'b',
|
|
194
|
-
n: 'n',
|
|
195
|
-
m: 'm',
|
|
196
|
-
Q: 'Q',
|
|
197
|
-
W: 'W',
|
|
198
|
-
E: 'E',
|
|
199
|
-
R: 'R',
|
|
200
|
-
T: 'T',
|
|
201
|
-
Y: 'Y',
|
|
202
|
-
U: 'U',
|
|
203
|
-
I: 'I',
|
|
204
|
-
O: 'O',
|
|
205
|
-
P: 'P',
|
|
206
|
-
A: 'A',
|
|
207
|
-
S: 'S',
|
|
208
|
-
D: 'D',
|
|
209
|
-
F: 'F',
|
|
210
|
-
G: 'G',
|
|
211
|
-
H: 'H',
|
|
212
|
-
J: 'J',
|
|
213
|
-
K: 'K',
|
|
214
|
-
L: 'L',
|
|
215
|
-
Z: 'Z',
|
|
216
|
-
X: 'X',
|
|
217
|
-
C: 'C',
|
|
218
|
-
V: 'V',
|
|
219
|
-
B: 'B',
|
|
220
|
-
N: 'N',
|
|
221
|
-
M: 'M',
|
|
222
|
-
}),
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* 反套路
|
|
227
|
-
*/
|
|
228
|
-
function detaolu_meta(
|
|
229
|
-
config: ResolvedConfig,
|
|
230
|
-
): (text: string) => [boolean, string] {
|
|
231
|
-
const TRIM_ENDING = config.TRIM_ENDING
|
|
232
|
-
const TRIM_SPACE = config.TRIM_SPACE
|
|
233
|
-
const TRIM_WIDTH = config.TRIM_WIDTH
|
|
234
|
-
const FORCELIST = config.FORCELIST.map(
|
|
235
|
-
([pattern, repl]) => [new RegExp(pattern, 'giu'), repl] as [RegExp, string],
|
|
236
|
-
)
|
|
237
|
-
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH
|
|
238
|
-
|
|
239
|
-
return (inp: string) => {
|
|
240
|
-
let len = inp.length
|
|
241
|
-
let text = ''
|
|
242
|
-
|
|
243
|
-
if (TRIM_ENDING) {
|
|
244
|
-
while (ENDING_CHARS.has(inp.charAt(len - 1)))
|
|
245
|
-
// assert str.charAt(-1)===''
|
|
246
|
-
len--
|
|
247
|
-
if (len === 0)
|
|
248
|
-
// all chars are ending chars, do nothing
|
|
249
|
-
len = inp.length
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
if (TRIM_WIDTH) {
|
|
253
|
-
for (let i = 0; i < len; i++) {
|
|
254
|
-
const c = inp.charAt(i)
|
|
255
|
-
text += WIDTH_TABLE.get(c) || c
|
|
256
|
-
}
|
|
257
|
-
} else {
|
|
258
|
-
text = inp.slice(0, len)
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
if (TRIM_SPACE) {
|
|
262
|
-
// text = text
|
|
263
|
-
// .replace(TRIM_EXTRA_SPACE_RE, ' ')
|
|
264
|
-
// .replace(TRIM_CJK_SPACE_RE, '$1')
|
|
265
|
-
text = text
|
|
266
|
-
.replaceAll(/[ \u3000]+/g, ' ')
|
|
267
|
-
.replaceAll(
|
|
268
|
-
/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g,
|
|
269
|
-
'$1',
|
|
270
|
-
)
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
let taolu_matched = false
|
|
274
|
-
for (const taolu of FORCELIST) {
|
|
275
|
-
if (taolu[0].test(text)) {
|
|
276
|
-
text = text.replace(taolu[0], taolu[1])
|
|
277
|
-
taolu_matched = true
|
|
278
|
-
if (FORCELIST_BREAK_ON_MATCH) break
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
return [taolu_matched, text]
|
|
283
|
-
}
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* 白名单处理
|
|
288
|
-
*/
|
|
289
|
-
function whitelisted_meta(config: ResolvedConfig): (text: string) => boolean {
|
|
290
|
-
const WHITELIST = config.WHITELIST.map((x) => new RegExp(x[0], 'iu'))
|
|
291
|
-
|
|
292
|
-
if (WHITELIST.length === 0) return () => false
|
|
293
|
-
|
|
294
|
-
return (text: string) => WHITELIST.some((re) => re.test(text))
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
/**
|
|
298
|
-
* 黑名单处理
|
|
299
|
-
*/
|
|
300
|
-
function blacklisted_meta(
|
|
301
|
-
config: ResolvedConfig,
|
|
302
|
-
): (text: string) => string | null {
|
|
303
|
-
const BLACKLIST = config.BLACKLIST.map((x) =>
|
|
304
|
-
x[0] ? new RegExp(x[1]) : x[1].toLowerCase(),
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
if (BLACKLIST.length === 0) return () => null
|
|
308
|
-
|
|
309
|
-
return (text: string) => {
|
|
310
|
-
const lower = text.toLowerCase()
|
|
311
|
-
for (const pattern of BLACKLIST) {
|
|
312
|
-
const matched =
|
|
313
|
-
typeof pattern === 'string'
|
|
314
|
-
? lower.includes(pattern)
|
|
315
|
-
: pattern.test(text)
|
|
316
|
-
if (matched) {
|
|
317
|
-
return typeof pattern === 'string'
|
|
318
|
-
? ` ${pattern}`
|
|
319
|
-
: ` /${pattern.source}/`
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
return null
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
function extract_special_danmu(text: string): string {
|
|
327
|
-
try {
|
|
328
|
-
text = JSON.parse(text)[4]
|
|
329
|
-
} catch {}
|
|
330
|
-
return text
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
/**
|
|
334
|
-
* 删除换行符/制表符
|
|
335
|
-
*/
|
|
336
|
-
function trim_dispstr(text: string): string {
|
|
337
|
-
return text.replaceAll(/([\r\n\t])/g, '').trim()
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
/**
|
|
341
|
-
* 选取中间值(最多出现的文字)
|
|
342
|
-
*/
|
|
343
|
-
function select_median_length(strs: string[]): string {
|
|
344
|
-
if (strs.length === 1) return strs[0]
|
|
345
|
-
|
|
346
|
-
const sorted = strs.toSorted((a, b) => a.length - b.length)
|
|
347
|
-
const mid = Math.floor(sorted.length / 2)
|
|
348
|
-
return sorted[mid]
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
function u8array_to_arraybuffer(array: Uint8Array): ArrayBuffer {
|
|
352
|
-
return array.buffer.slice(
|
|
353
|
-
array.byteOffset,
|
|
354
|
-
array.byteOffset + array.byteLength,
|
|
355
|
-
) as ArrayBuffer
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
async function load_wasm(wasm_mod?: ArrayBuffer) {
|
|
359
|
-
if (wasm_mod) {
|
|
360
|
-
await sim_init(wasm_mod)
|
|
361
|
-
return
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
const wasm_path = new URL('similarity-gen.wasm', import.meta.url).pathname
|
|
365
|
-
const wasm_u8 = await fs.readFile(wasm_path)
|
|
366
|
-
await sim_init(u8array_to_arraybuffer(wasm_u8))
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
function make_ptr_idx(idx: int, is_next_chunk: boolean): int {
|
|
370
|
-
return is_next_chunk ? -1 - idx : idx
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
async function merge(
|
|
374
|
-
chunk: DanmuChunk<DanmuObject>,
|
|
375
|
-
// next_chunk: DanmuChunk<DanmuObject>,
|
|
376
|
-
config: Config = DEFAULT_CONFIG,
|
|
377
|
-
): Promise<DanmuClusterOutput> {
|
|
378
|
-
const local_config: ResolvedConfig = { ...DEFAULT_CONFIG, ...config }
|
|
379
|
-
|
|
380
|
-
await load_wasm()
|
|
381
|
-
|
|
382
|
-
begin_chunk(local_config)
|
|
383
|
-
|
|
384
|
-
const ret: DanmuClusterOutput = {
|
|
385
|
-
clusters: [],
|
|
386
|
-
stats: new Stats(),
|
|
387
|
-
deleted_chunk: [],
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
function apply_single_cluster(idx: int, obj: DanmuObject, desc: string) {
|
|
391
|
-
ret.clusters.push({
|
|
392
|
-
peers_ptr: [[idx, 'IGN']],
|
|
393
|
-
desc: [desc],
|
|
394
|
-
chosen_str: obj.content,
|
|
395
|
-
// danuni
|
|
396
|
-
danuni_count: 1,
|
|
397
|
-
// danuni_senders: [obj.danuni_sender],
|
|
398
|
-
danuni_dans: [obj],
|
|
399
|
-
})
|
|
400
|
-
}
|
|
401
|
-
function apply_cluster(irs: DanmuIr[]) {
|
|
402
|
-
if (irs.length === 1) {
|
|
403
|
-
ret.clusters.push({
|
|
404
|
-
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
405
|
-
desc: [],
|
|
406
|
-
chosen_str: irs[0].obj.content,
|
|
407
|
-
// danuni
|
|
408
|
-
danuni_count: irs.length,
|
|
409
|
-
// danuni_senders: irs.map((ir) => ir.obj.danuni_sender),
|
|
410
|
-
danuni_dans: irs.map((ir) => ir.obj),
|
|
411
|
-
})
|
|
412
|
-
} else {
|
|
413
|
-
const text_cnts = new Map()
|
|
414
|
-
let most_texts: string[] = []
|
|
415
|
-
let most_cnt = 0
|
|
416
|
-
|
|
417
|
-
for (const ir of irs) {
|
|
418
|
-
const text = ir.str
|
|
419
|
-
const cnt = 1 + (text_cnts.get(text) || 0)
|
|
420
|
-
text_cnts.set(text, cnt)
|
|
421
|
-
|
|
422
|
-
if (cnt > most_cnt) {
|
|
423
|
-
most_texts = [text]
|
|
424
|
-
most_cnt = cnt
|
|
425
|
-
} else if (cnt === most_cnt) {
|
|
426
|
-
most_texts.push(text)
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
const most_text = select_median_length(most_texts)
|
|
431
|
-
|
|
432
|
-
ret.clusters.push({
|
|
433
|
-
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
434
|
-
desc: most_cnt > 1 ? [`采用了出现 ${most_cnt} 次的文本`] : [],
|
|
435
|
-
chosen_str: most_text,
|
|
436
|
-
// danuni
|
|
437
|
-
danuni_count: most_cnt,
|
|
438
|
-
// danuni_senders: irs.map((ir) => ir.obj.danuni_sender),
|
|
439
|
-
danuni_dans: irs.map((ir) => ir.obj),
|
|
440
|
-
})
|
|
441
|
-
}
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
const detaolu = detaolu_meta(local_config)
|
|
445
|
-
const whitelisted = whitelisted_meta(local_config)
|
|
446
|
-
const blacklisted = blacklisted_meta(local_config)
|
|
447
|
-
|
|
448
|
-
function obj_to_ir(
|
|
449
|
-
objs: DanmuObject[],
|
|
450
|
-
s: Stats | null,
|
|
451
|
-
is_next_chunk: boolean,
|
|
452
|
-
): DanmuIr[] {
|
|
453
|
-
return objs
|
|
454
|
-
.map((obj, idx) => {
|
|
455
|
-
if (!local_config.PROC_POOL1 && obj.pool === 1) {
|
|
456
|
-
if (s) {
|
|
457
|
-
s.ignored_type++
|
|
458
|
-
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改')
|
|
459
|
-
}
|
|
460
|
-
return null
|
|
461
|
-
}
|
|
462
|
-
// if (!config.PROC_TYPE7 && obj.mode === 7) {
|
|
463
|
-
if (!local_config.PROC_TYPE7 && obj.mode === 4) {
|
|
464
|
-
if (s) {
|
|
465
|
-
s.ignored_type++
|
|
466
|
-
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改')
|
|
467
|
-
}
|
|
468
|
-
return null
|
|
469
|
-
}
|
|
470
|
-
// if (!config.PROC_TYPE4 && obj.mode === 4) {
|
|
471
|
-
if (!local_config.PROC_TYPE4 && obj.mode === 1) {
|
|
472
|
-
if (s) {
|
|
473
|
-
s.ignored_type++
|
|
474
|
-
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改')
|
|
475
|
-
}
|
|
476
|
-
return null
|
|
477
|
-
}
|
|
478
|
-
// if (obj.mode === 8) {
|
|
479
|
-
// if (s) {
|
|
480
|
-
// s.ignored_script++
|
|
481
|
-
// apply_single_cluster(idx, obj, '代码弹幕')
|
|
482
|
-
// }
|
|
483
|
-
// return null
|
|
484
|
-
// }
|
|
485
|
-
// if (obj.mode === 9) {
|
|
486
|
-
// if (s) {
|
|
487
|
-
// s.ignored_script++
|
|
488
|
-
// apply_single_cluster(idx, obj, 'BAS弹幕')
|
|
489
|
-
// }
|
|
490
|
-
// return null
|
|
491
|
-
// }
|
|
492
|
-
|
|
493
|
-
const disp_str = trim_dispstr(
|
|
494
|
-
// obj.mode === 7 && obj.content[0] === '['
|
|
495
|
-
obj.mode === 4 && obj.content[0] === '['
|
|
496
|
-
? extract_special_danmu(obj.content)
|
|
497
|
-
: obj.content,
|
|
498
|
-
)
|
|
499
|
-
|
|
500
|
-
// if (obj.mode !== 8 && obj.mode !== 9) {
|
|
501
|
-
if (obj.mode !== 4) {
|
|
502
|
-
const matched = blacklisted(disp_str)
|
|
503
|
-
if (matched) {
|
|
504
|
-
if (s) {
|
|
505
|
-
s.deleted_blacklist++
|
|
506
|
-
s.deleted_blacklist_each[matched] =
|
|
507
|
-
(s.deleted_blacklist_each[matched] || 0) + 1
|
|
508
|
-
ret.deleted_chunk.push({
|
|
509
|
-
...obj,
|
|
510
|
-
pakku: {
|
|
511
|
-
deleted_reason: `命中黑名单:${matched}`,
|
|
512
|
-
},
|
|
513
|
-
})
|
|
514
|
-
}
|
|
515
|
-
return null
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
if (whitelisted(disp_str)) {
|
|
519
|
-
if (s) {
|
|
520
|
-
s.ignored_whitelist++
|
|
521
|
-
apply_single_cluster(idx, obj, '命中白名单')
|
|
522
|
-
}
|
|
523
|
-
return null
|
|
524
|
-
}
|
|
525
|
-
|
|
526
|
-
const [matched_taolu, detaolued] = detaolu(disp_str)
|
|
527
|
-
|
|
528
|
-
if (matched_taolu) {
|
|
529
|
-
if (s) s.num_taolu_matched++
|
|
530
|
-
if (local_config.FORCELIST_APPLY_SINGULAR)
|
|
531
|
-
obj = {
|
|
532
|
-
...obj,
|
|
533
|
-
content: detaolued,
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
|
|
537
|
-
return {
|
|
538
|
-
obj,
|
|
539
|
-
str: detaolued,
|
|
540
|
-
ptr_idx: make_ptr_idx(idx, is_next_chunk),
|
|
541
|
-
sim_reason: 'ORIG',
|
|
542
|
-
}
|
|
543
|
-
})
|
|
544
|
-
.filter((obj) => obj !== null) as DanmuIr[]
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
const danmus = obj_to_ir(chunk.objs, ret.stats, false)
|
|
548
|
-
// const next_chunk_danmus = obj_to_ir(next_chunk.objs, null, true)
|
|
549
|
-
|
|
550
|
-
const nearby_danmus: Queue<DanmuIr[]> = new Queue()
|
|
551
|
-
|
|
552
|
-
const THRESHOLD_MS = local_config.THRESHOLD * 1000
|
|
553
|
-
|
|
554
|
-
for (const dm of danmus) {
|
|
555
|
-
while (true) {
|
|
556
|
-
const peeked = nearby_danmus.peek()
|
|
557
|
-
if (
|
|
558
|
-
peeked === null ||
|
|
559
|
-
dm.obj.time_ms - peeked[0].obj.time_ms <= THRESHOLD_MS
|
|
560
|
-
)
|
|
561
|
-
break
|
|
562
|
-
apply_cluster(peeked)
|
|
563
|
-
nearby_danmus.pop()
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
const sim = detect_similarity(
|
|
567
|
-
dm.str,
|
|
568
|
-
dm.obj.mode,
|
|
569
|
-
nearby_danmus.index_l,
|
|
570
|
-
ret.stats,
|
|
571
|
-
)
|
|
572
|
-
if (sim === null) {
|
|
573
|
-
nearby_danmus.push([dm])
|
|
574
|
-
} else {
|
|
575
|
-
const candidate =
|
|
576
|
-
nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff]
|
|
577
|
-
dm.sim_reason = sim.reason
|
|
578
|
-
candidate.push(dm)
|
|
579
|
-
}
|
|
580
|
-
}
|
|
581
|
-
|
|
582
|
-
// now process last few clusters with the next chunk
|
|
583
|
-
begin_index_lock()
|
|
584
|
-
// outer: for (const dm of next_chunk_danmus) {
|
|
585
|
-
// while (true) {
|
|
586
|
-
// const peeked = nearby_danmus.peek()
|
|
587
|
-
// if (peeked === null) break outer
|
|
588
|
-
// if (dm.obj.time_ms - peeked[0].obj.time_ms <= THRESHOLD_MS) break
|
|
589
|
-
// apply_cluster(peeked)
|
|
590
|
-
// nearby_danmus.pop()
|
|
591
|
-
// }
|
|
592
|
-
|
|
593
|
-
// const sim = detect_similarity(
|
|
594
|
-
// dm.str,
|
|
595
|
-
// dm.obj.mode,
|
|
596
|
-
// nearby_danmus.index_l,
|
|
597
|
-
// ret.stats,
|
|
598
|
-
// )
|
|
599
|
-
// if (sim !== null) {
|
|
600
|
-
// const candidate =
|
|
601
|
-
// nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff]
|
|
602
|
-
// dm.sim_reason = sim.reason
|
|
603
|
-
// candidate.push(dm)
|
|
604
|
-
// }
|
|
605
|
-
// }
|
|
606
|
-
|
|
607
|
-
// finally apply remaining clusters
|
|
608
|
-
for (const candidate of nearby_danmus) {
|
|
609
|
-
apply_cluster(candidate)
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
return ret
|
|
613
|
-
}
|
|
614
|
-
|
|
615
|
-
export default merge
|