@dan-uni/dan-any-plugin-detaolu 0.9.5 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +22 -11
- package/dist/index.js.LICENSE.txt +3 -0
- package/dist/index.umd.min.js +1124 -1598
- package/dist/index.umd.min.js.LICENSE.txt +3 -0
- package/dist/pakku.js/index.d.ts +32 -9
- package/package.json +10 -10
- package/src/pakku.js/index.ts +53 -22
|
@@ -38,6 +38,9 @@ and limitations under the License.
|
|
|
38
38
|
/**
|
|
39
39
|
* @author: xmcp(代码主要逻辑来源)
|
|
40
40
|
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
41
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
|
|
42
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
|
|
43
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
|
|
41
44
|
* @license: GPL-3.0
|
|
42
45
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
43
46
|
*/
|
package/dist/pakku.js/index.d.ts
CHANGED
|
@@ -1,42 +1,65 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @author: xmcp(代码主要逻辑来源)
|
|
3
3
|
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
4
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
|
|
5
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
|
|
6
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
|
|
4
7
|
* @license: GPL-3.0
|
|
5
8
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
6
9
|
*/
|
|
7
10
|
import type { DanmuChunk, DanmuClusterOutput, DanmuObject } from './types';
|
|
8
11
|
export declare const DEFAULT_CONFIG: {
|
|
9
12
|
/**
|
|
10
|
-
*
|
|
13
|
+
* 时间阈值:合并时间差在n秒之内的重复弹幕
|
|
11
14
|
* 超长(大概 60 秒以上?)的阈值可能会导致程序运行缓慢
|
|
12
15
|
*/
|
|
13
16
|
THRESHOLD: number;
|
|
14
17
|
/**
|
|
15
18
|
* 编辑距离合并阈值:
|
|
16
|
-
*
|
|
17
|
-
* 能有效击杀 "
|
|
18
|
-
* @example 禁用(0)
|
|
19
|
+
* 根据编辑距离判断不完全一致但内容相近(例如有错别字)的弹幕
|
|
20
|
+
* 能有效击杀 "你指尖跃动的电光" 和 "你之间跃动的电光" 等
|
|
21
|
+
* @example 禁用(0), 轻微(≤3), 中等(≤5), 强力(≤8)
|
|
19
22
|
*/
|
|
20
23
|
MAX_DIST: number;
|
|
21
24
|
/**
|
|
22
25
|
* 词频向量合并阈值:
|
|
23
|
-
* 根据 2-Gram
|
|
24
|
-
* 能有效击杀 "
|
|
25
|
-
* @example 禁用(1000)
|
|
26
|
+
* 根据 2-Gram 频率向量的夹角判断不完全一致但内容类似的弹幕
|
|
27
|
+
* 能有效击杀 "yeah!~" 和 "yeah!~yeah!~yeah!~yeah!~" 等
|
|
28
|
+
* @example 禁用(1000), 轻微(60%), 中等(45%), 强力(30%)
|
|
26
29
|
*/
|
|
27
30
|
MAX_COSINE: number;
|
|
28
31
|
/**
|
|
29
32
|
* 识别谐音弹幕:
|
|
30
|
-
*
|
|
31
|
-
* 能有效击杀 "
|
|
33
|
+
* 将常用汉字转换为拼音再进行比较
|
|
34
|
+
* 能有效击杀 "布拉迪巴特福来" 和 "布拉迪·八德福莱" 等
|
|
32
35
|
*/
|
|
33
36
|
TRIM_PINYIN: boolean;
|
|
34
37
|
TRIM_ENDING: boolean;
|
|
35
38
|
TRIM_SPACE: boolean;
|
|
36
39
|
TRIM_WIDTH: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* 内容替换:符合这些规则的弹幕,判断是否合并前会先对内容进行替换
|
|
42
|
+
*/
|
|
37
43
|
FORCELIST: string[][];
|
|
44
|
+
/**
|
|
45
|
+
* 内容替换规则命中时:继续尝试匹配后续规则
|
|
46
|
+
*/
|
|
47
|
+
FORCELIST_CONTINUE_ON_MATCH: boolean;
|
|
48
|
+
/**
|
|
49
|
+
* 内容替换规则命中时:即使未触发合并也使用替换后的文本
|
|
50
|
+
*/
|
|
51
|
+
FORCELIST_APPLY_SINGULAR: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* 强制忽略:符合这些规则的弹幕不会被合并,优先级高于内容替换规则
|
|
54
|
+
*/
|
|
38
55
|
WHITELIST: [string, string][];
|
|
56
|
+
/**
|
|
57
|
+
* 强制删除:符合这些规则的弹幕会直接被删除(未实现)
|
|
58
|
+
*/
|
|
39
59
|
BLACKLIST: [string, string][];
|
|
60
|
+
/**
|
|
61
|
+
* 合并不同类型的弹幕(取消勾选后,底部弹幕不会跟滚动弹幕合并到一起)
|
|
62
|
+
*/
|
|
40
63
|
CROSS_MODE: boolean;
|
|
41
64
|
PROC_TYPE7: boolean;
|
|
42
65
|
PROC_TYPE4: boolean;
|
package/package.json
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dan-uni/dan-any-plugin-detaolu",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.1",
|
|
4
4
|
"description": "A filter, dedupe and anti-spam plugin of dan-any, a danmaku transformer lib, based on pakku.js.",
|
|
5
|
-
"
|
|
6
|
-
"bangumi",
|
|
7
|
-
"danmaku"
|
|
8
|
-
],
|
|
5
|
+
"author": "rinne",
|
|
9
6
|
"license": "GPL-3.0-or-later",
|
|
10
7
|
"homepage": "https://github.com/ani-uni/danuni/tree/master/packages/dan-any-plugin-detaolu#readme",
|
|
11
|
-
"bugs": {
|
|
12
|
-
"url": "https://github.com/ani-uni/danuni/issues"
|
|
13
|
-
},
|
|
14
8
|
"repository": {
|
|
15
9
|
"type": "git",
|
|
16
10
|
"url": "git+https://github.com/ani-uni/danuni.git"
|
|
17
11
|
},
|
|
18
|
-
"
|
|
12
|
+
"bugs": {
|
|
13
|
+
"url": "https://github.com/ani-uni/danuni/issues"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"bangumi",
|
|
17
|
+
"danmaku"
|
|
18
|
+
],
|
|
19
19
|
"main": "dist/index.js",
|
|
20
20
|
"module": "src/index.ts",
|
|
21
21
|
"types": "dist/index.d.ts",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@dan-uni/dan-any": "workspace:^",
|
|
34
|
-
"fs-extra": "^11.3.
|
|
34
|
+
"fs-extra": "^11.3.3"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/fs-extra": "^11.0.4"
|
package/src/pakku.js/index.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @author: xmcp(代码主要逻辑来源)
|
|
3
3
|
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
4
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
|
|
5
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
|
|
6
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
|
|
4
7
|
* @license: GPL-3.0
|
|
5
8
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
6
9
|
*/
|
|
@@ -19,28 +22,28 @@ import { Queue, Stats } from './types'
|
|
|
19
22
|
export const DEFAULT_CONFIG = {
|
|
20
23
|
// 弹幕合并
|
|
21
24
|
/**
|
|
22
|
-
*
|
|
25
|
+
* 时间阈值:合并时间差在n秒之内的重复弹幕
|
|
23
26
|
* 超长(大概 60 秒以上?)的阈值可能会导致程序运行缓慢
|
|
24
27
|
*/
|
|
25
28
|
THRESHOLD: 30,
|
|
26
29
|
/**
|
|
27
30
|
* 编辑距离合并阈值:
|
|
28
|
-
*
|
|
29
|
-
* 能有效击杀 "
|
|
30
|
-
* @example 禁用(0)
|
|
31
|
+
* 根据编辑距离判断不完全一致但内容相近(例如有错别字)的弹幕
|
|
32
|
+
* 能有效击杀 "你指尖跃动的电光" 和 "你之间跃动的电光" 等
|
|
33
|
+
* @example 禁用(0), 轻微(≤3), 中等(≤5), 强力(≤8)
|
|
31
34
|
*/
|
|
32
35
|
MAX_DIST: 5,
|
|
33
36
|
/**
|
|
34
37
|
* 词频向量合并阈值:
|
|
35
|
-
* 根据 2-Gram
|
|
36
|
-
* 能有效击杀 "
|
|
37
|
-
* @example 禁用(1000)
|
|
38
|
+
* 根据 2-Gram 频率向量的夹角判断不完全一致但内容类似的弹幕
|
|
39
|
+
* 能有效击杀 "yeah!~" 和 "yeah!~yeah!~yeah!~yeah!~" 等
|
|
40
|
+
* @example 禁用(1000), 轻微(60%), 中等(45%), 强力(30%)
|
|
38
41
|
*/
|
|
39
42
|
MAX_COSINE: 45,
|
|
40
43
|
/**
|
|
41
44
|
* 识别谐音弹幕:
|
|
42
|
-
*
|
|
43
|
-
* 能有效击杀 "
|
|
45
|
+
* 将常用汉字转换为拼音再进行比较
|
|
46
|
+
* 能有效击杀 "布拉迪巴特福来" 和 "布拉迪·八德福莱" 等
|
|
44
47
|
*/
|
|
45
48
|
TRIM_PINYIN: true,
|
|
46
49
|
// 比较文本时:
|
|
@@ -49,15 +52,35 @@ export const DEFAULT_CONFIG = {
|
|
|
49
52
|
TRIM_WIDTH: true, // 忽略全半角差异
|
|
50
53
|
|
|
51
54
|
// 例外设置
|
|
55
|
+
/**
|
|
56
|
+
* 内容替换:符合这些规则的弹幕,判断是否合并前会先对内容进行替换
|
|
57
|
+
*/
|
|
52
58
|
FORCELIST: [
|
|
53
59
|
['^23{2,}$', '23333'],
|
|
54
60
|
['^6{3,}$', '66666'],
|
|
55
|
-
],
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
61
|
+
],
|
|
62
|
+
/**
|
|
63
|
+
* 内容替换规则命中时:继续尝试匹配后续规则
|
|
64
|
+
*/
|
|
65
|
+
FORCELIST_CONTINUE_ON_MATCH: true,
|
|
66
|
+
/**
|
|
67
|
+
* 内容替换规则命中时:即使未触发合并也使用替换后的文本
|
|
68
|
+
*/
|
|
69
|
+
FORCELIST_APPLY_SINGULAR: false,
|
|
70
|
+
/**
|
|
71
|
+
* 强制忽略:符合这些规则的弹幕不会被合并,优先级高于内容替换规则
|
|
72
|
+
*/
|
|
73
|
+
WHITELIST: [] as [string, string][],
|
|
74
|
+
/**
|
|
75
|
+
* 强制删除:符合这些规则的弹幕会直接被删除(未实现)
|
|
76
|
+
*/
|
|
77
|
+
BLACKLIST: [] as [string, string][],
|
|
78
|
+
/**
|
|
79
|
+
* 合并不同类型的弹幕(取消勾选后,底部弹幕不会跟滚动弹幕合并到一起)
|
|
80
|
+
*/
|
|
81
|
+
CROSS_MODE: true,
|
|
59
82
|
// 放过特定类型的弹幕:
|
|
60
|
-
PROC_TYPE7: true, //
|
|
83
|
+
PROC_TYPE7: true, // 高级弹幕(特殊弹幕)
|
|
61
84
|
PROC_TYPE4: true, // 底部弹幕
|
|
62
85
|
PROC_POOL1: false, // 字幕弹幕(位于弹幕池1)
|
|
63
86
|
|
|
@@ -201,13 +224,14 @@ const WIDTH_TABLE = new Map(
|
|
|
201
224
|
/**
|
|
202
225
|
* 反套路
|
|
203
226
|
*/
|
|
204
|
-
const detaolu = (inp: string, config: Config) => {
|
|
227
|
+
const detaolu = (inp: string, config: Config): [boolean, string] => {
|
|
205
228
|
const TRIM_ENDING = config.TRIM_ENDING
|
|
206
229
|
const TRIM_SPACE = config.TRIM_SPACE
|
|
207
230
|
const TRIM_WIDTH = config.TRIM_WIDTH
|
|
208
231
|
const FORCELIST = (config?.FORCELIST ?? DEFAULT_CONFIG.FORCELIST).map(
|
|
209
|
-
([pattern, repl]) => [new RegExp(pattern, '
|
|
232
|
+
([pattern, repl]) => [new RegExp(pattern, 'giu'), repl] as [RegExp, string],
|
|
210
233
|
)
|
|
234
|
+
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH
|
|
211
235
|
|
|
212
236
|
let len = inp.length
|
|
213
237
|
let text = ''
|
|
@@ -242,14 +266,16 @@ const detaolu = (inp: string, config: Config) => {
|
|
|
242
266
|
)
|
|
243
267
|
}
|
|
244
268
|
|
|
269
|
+
let taolu_matched = false
|
|
245
270
|
for (const taolu of FORCELIST) {
|
|
246
271
|
if (taolu[0].test(text)) {
|
|
247
272
|
text = text.replace(taolu[0], taolu[1])
|
|
248
|
-
|
|
273
|
+
taolu_matched = true
|
|
274
|
+
if (FORCELIST_BREAK_ON_MATCH) break
|
|
249
275
|
}
|
|
250
276
|
}
|
|
251
277
|
|
|
252
|
-
return [
|
|
278
|
+
return [taolu_matched, text]
|
|
253
279
|
}
|
|
254
280
|
|
|
255
281
|
/**
|
|
@@ -257,7 +283,7 @@ const detaolu = (inp: string, config: Config) => {
|
|
|
257
283
|
*/
|
|
258
284
|
const whitelisted = (text: string, config: Config) => {
|
|
259
285
|
const WHITELIST = (config?.WHITELIST ?? DEFAULT_CONFIG.WHITELIST).map(
|
|
260
|
-
(x) => new RegExp(x[0], '
|
|
286
|
+
(x) => new RegExp(x[0], 'iu'),
|
|
261
287
|
)
|
|
262
288
|
if (WHITELIST.length === 0) return false
|
|
263
289
|
else return WHITELIST.some((re) => re.test(text))
|
|
@@ -355,7 +381,7 @@ async function merge(
|
|
|
355
381
|
ret.clusters.push({
|
|
356
382
|
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
357
383
|
desc: [],
|
|
358
|
-
chosen_str: irs[0].obj.content,
|
|
384
|
+
chosen_str: irs[0].obj.content,
|
|
359
385
|
// danuni
|
|
360
386
|
danuni_count: irs.length,
|
|
361
387
|
// danuni_senders: irs.map((ir) => ir.obj.danuni_sender),
|
|
@@ -473,8 +499,13 @@ async function merge(
|
|
|
473
499
|
|
|
474
500
|
const [matched_taolu, detaolued] = detaolu(disp_str, config)
|
|
475
501
|
|
|
476
|
-
if (matched_taolu
|
|
477
|
-
s.num_taolu_matched++
|
|
502
|
+
if (matched_taolu) {
|
|
503
|
+
if (s) s.num_taolu_matched++
|
|
504
|
+
if (config.FORCELIST_APPLY_SINGULAR)
|
|
505
|
+
obj = {
|
|
506
|
+
...obj,
|
|
507
|
+
content: detaolued,
|
|
508
|
+
}
|
|
478
509
|
}
|
|
479
510
|
|
|
480
511
|
return {
|