@dan-uni/dan-any-plugin-detaolu 1.0.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +2029 -54
- package/dist/index.umd.min.js +6574 -867
- package/dist/index.umd.min.js.LICENSE.txt +1 -1
- package/dist/pakku.js/similarity_stub.d.ts +1 -1
- package/dist/static/wasm/{54a7637a.module.wasm → 21072e5de5.module.wasm} +0 -0
- package/package.json +2 -6
- package/rslib.config.ts +2 -2
- package/src/pakku.js/index.ts +88 -62
- package/src/pakku.js/similarity-gen.js +21 -2
- package/src/pakku.js/similarity-gen.wasm +0 -0
- package/src/pakku.js/similarity_stub.ts +9 -5
- package/tsconfig.json +2 -2
- package/tsconfig.tsbuildinfo +1 -0
- package/types/tsconfig.tsbuildinfo +0 -1
|
@@ -27,7 +27,7 @@ See the Apache Version 2.0 License for specific language governing permissions
|
|
|
27
27
|
and limitations under the License.
|
|
28
28
|
***************************************************************************** */
|
|
29
29
|
|
|
30
|
-
/*!
|
|
30
|
+
/*! LICENSE: index.min.js.LICENSE.txt */
|
|
31
31
|
|
|
32
32
|
/*! ieee754. BSD-3-Clause License. Feross Aboukhadijeh <https://feross.org/opensource> */
|
|
33
33
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Config } from '.';
|
|
2
2
|
import type { int, Stats } from './types';
|
|
3
|
-
export declare function init(wasm_module: ArrayBuffer
|
|
3
|
+
export declare function init(wasm_module: ArrayBuffer): Promise<void>;
|
|
4
4
|
export declare function begin_chunk(config: Config): void;
|
|
5
5
|
export declare function begin_index_lock(): void;
|
|
6
6
|
export declare function detect_similarity(str: string, mode: number, index_l: int, S: Stats): null | {
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dan-uni/dan-any-plugin-detaolu",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "A filter, dedupe and anti-spam plugin of dan-any, a danmaku transformer lib, based on pakku.js.",
|
|
5
5
|
"author": "rinne",
|
|
6
6
|
"license": "GPL-3.0-or-later",
|
|
@@ -30,10 +30,6 @@
|
|
|
30
30
|
"build:types": "tsc"
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
|
-
"@dan-uni/dan-any": "workspace:^"
|
|
34
|
-
"fs-extra": "^11.3.3"
|
|
35
|
-
},
|
|
36
|
-
"devDependencies": {
|
|
37
|
-
"@types/fs-extra": "^11.0.4"
|
|
33
|
+
"@dan-uni/dan-any": "workspace:^"
|
|
38
34
|
}
|
|
39
35
|
}
|
package/rslib.config.ts
CHANGED
|
@@ -8,7 +8,7 @@ export default defineConfig({
|
|
|
8
8
|
{
|
|
9
9
|
format: 'esm',
|
|
10
10
|
output: {
|
|
11
|
-
filename: { js: '
|
|
11
|
+
filename: { js: '[name].js' },
|
|
12
12
|
target: 'node',
|
|
13
13
|
},
|
|
14
14
|
dts: true,
|
|
@@ -16,7 +16,7 @@ export default defineConfig({
|
|
|
16
16
|
{
|
|
17
17
|
format: 'umd',
|
|
18
18
|
output: {
|
|
19
|
-
filename: { js: '
|
|
19
|
+
filename: { js: '[name].umd.min.js' },
|
|
20
20
|
target: 'web',
|
|
21
21
|
},
|
|
22
22
|
dts: true,
|
package/src/pakku.js/index.ts
CHANGED
|
@@ -109,6 +109,7 @@ export const DEFAULT_CONFIG = {
|
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
export type Config = Partial<typeof DEFAULT_CONFIG>
|
|
112
|
+
type ResolvedConfig = typeof DEFAULT_CONFIG
|
|
112
113
|
|
|
113
114
|
interface DanmuIr {
|
|
114
115
|
obj: DanmuObject
|
|
@@ -224,80 +225,88 @@ const WIDTH_TABLE = new Map(
|
|
|
224
225
|
/**
|
|
225
226
|
* 反套路
|
|
226
227
|
*/
|
|
227
|
-
|
|
228
|
+
function detaolu_meta(
|
|
229
|
+
config: ResolvedConfig,
|
|
230
|
+
): (text: string) => [boolean, string] {
|
|
228
231
|
const TRIM_ENDING = config.TRIM_ENDING
|
|
229
232
|
const TRIM_SPACE = config.TRIM_SPACE
|
|
230
233
|
const TRIM_WIDTH = config.TRIM_WIDTH
|
|
231
|
-
const FORCELIST =
|
|
234
|
+
const FORCELIST = config.FORCELIST.map(
|
|
232
235
|
([pattern, repl]) => [new RegExp(pattern, 'giu'), repl] as [RegExp, string],
|
|
233
236
|
)
|
|
234
237
|
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH
|
|
235
238
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
239
|
+
return (inp: string) => {
|
|
240
|
+
let len = inp.length
|
|
241
|
+
let text = ''
|
|
242
|
+
|
|
243
|
+
if (TRIM_ENDING) {
|
|
244
|
+
while (ENDING_CHARS.has(inp.charAt(len - 1)))
|
|
245
|
+
// assert str.charAt(-1)===''
|
|
246
|
+
len--
|
|
247
|
+
if (len === 0)
|
|
248
|
+
// all chars are ending chars, do nothing
|
|
249
|
+
len = inp.length
|
|
250
|
+
}
|
|
247
251
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
+
if (TRIM_WIDTH) {
|
|
253
|
+
for (let i = 0; i < len; i++) {
|
|
254
|
+
const c = inp.charAt(i)
|
|
255
|
+
text += WIDTH_TABLE.get(c) || c
|
|
256
|
+
}
|
|
257
|
+
} else {
|
|
258
|
+
text = inp.slice(0, len)
|
|
252
259
|
}
|
|
253
|
-
} else {
|
|
254
|
-
text = inp.slice(0, len)
|
|
255
|
-
}
|
|
256
260
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
261
|
+
if (TRIM_SPACE) {
|
|
262
|
+
// text = text
|
|
263
|
+
// .replace(TRIM_EXTRA_SPACE_RE, ' ')
|
|
264
|
+
// .replace(TRIM_CJK_SPACE_RE, '$1')
|
|
265
|
+
text = text
|
|
266
|
+
.replaceAll(/[ \u3000]+/g, ' ')
|
|
267
|
+
.replaceAll(
|
|
268
|
+
/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g,
|
|
269
|
+
'$1',
|
|
270
|
+
)
|
|
271
|
+
}
|
|
268
272
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
273
|
+
let taolu_matched = false
|
|
274
|
+
for (const taolu of FORCELIST) {
|
|
275
|
+
if (taolu[0].test(text)) {
|
|
276
|
+
text = text.replace(taolu[0], taolu[1])
|
|
277
|
+
taolu_matched = true
|
|
278
|
+
if (FORCELIST_BREAK_ON_MATCH) break
|
|
279
|
+
}
|
|
275
280
|
}
|
|
276
|
-
}
|
|
277
281
|
|
|
278
|
-
|
|
282
|
+
return [taolu_matched, text]
|
|
283
|
+
}
|
|
279
284
|
}
|
|
280
285
|
|
|
281
286
|
/**
|
|
282
287
|
* 白名单处理
|
|
283
288
|
*/
|
|
284
|
-
|
|
285
|
-
const WHITELIST =
|
|
286
|
-
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
|
|
289
|
+
function whitelisted_meta(config: ResolvedConfig): (text: string) => boolean {
|
|
290
|
+
const WHITELIST = config.WHITELIST.map((x) => new RegExp(x[0], 'iu'))
|
|
291
|
+
|
|
292
|
+
if (WHITELIST.length === 0) return () => false
|
|
293
|
+
|
|
294
|
+
return (text: string) => WHITELIST.some((re) => re.test(text))
|
|
290
295
|
}
|
|
291
296
|
|
|
292
297
|
/**
|
|
293
298
|
* 黑名单处理
|
|
294
299
|
*/
|
|
295
|
-
|
|
296
|
-
|
|
300
|
+
function blacklisted_meta(
|
|
301
|
+
config: ResolvedConfig,
|
|
302
|
+
): (text: string) => string | null {
|
|
303
|
+
const BLACKLIST = config.BLACKLIST.map((x) =>
|
|
297
304
|
x[0] ? new RegExp(x[1]) : x[1].toLowerCase(),
|
|
298
305
|
)
|
|
299
|
-
|
|
300
|
-
|
|
306
|
+
|
|
307
|
+
if (BLACKLIST.length === 0) return () => null
|
|
308
|
+
|
|
309
|
+
return (text: string) => {
|
|
301
310
|
const lower = text.toLowerCase()
|
|
302
311
|
for (const pattern of BLACKLIST) {
|
|
303
312
|
const matched =
|
|
@@ -339,11 +348,22 @@ function select_median_length(strs: string[]): string {
|
|
|
339
348
|
return sorted[mid]
|
|
340
349
|
}
|
|
341
350
|
|
|
351
|
+
function u8array_to_arraybuffer(array: Uint8Array): ArrayBuffer {
|
|
352
|
+
return array.buffer.slice(
|
|
353
|
+
array.byteOffset,
|
|
354
|
+
array.byteOffset + array.byteLength,
|
|
355
|
+
) as ArrayBuffer
|
|
356
|
+
}
|
|
357
|
+
|
|
342
358
|
async function load_wasm(wasm_mod?: ArrayBuffer) {
|
|
343
|
-
|
|
344
|
-
wasm_mod
|
|
345
|
-
|
|
346
|
-
|
|
359
|
+
if (wasm_mod) {
|
|
360
|
+
await sim_init(wasm_mod)
|
|
361
|
+
return
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const wasm_path = new URL('similarity-gen.wasm', import.meta.url).pathname
|
|
365
|
+
const wasm_u8 = await fs.readFile(wasm_path)
|
|
366
|
+
await sim_init(u8array_to_arraybuffer(wasm_u8))
|
|
347
367
|
}
|
|
348
368
|
|
|
349
369
|
function make_ptr_idx(idx: int, is_next_chunk: boolean): int {
|
|
@@ -355,9 +375,11 @@ async function merge(
|
|
|
355
375
|
// next_chunk: DanmuChunk<DanmuObject>,
|
|
356
376
|
config: Config = DEFAULT_CONFIG,
|
|
357
377
|
): Promise<DanmuClusterOutput> {
|
|
378
|
+
const local_config: ResolvedConfig = { ...DEFAULT_CONFIG, ...config }
|
|
379
|
+
|
|
358
380
|
await load_wasm()
|
|
359
381
|
|
|
360
|
-
begin_chunk(
|
|
382
|
+
begin_chunk(local_config)
|
|
361
383
|
|
|
362
384
|
const ret: DanmuClusterOutput = {
|
|
363
385
|
clusters: [],
|
|
@@ -419,6 +441,10 @@ async function merge(
|
|
|
419
441
|
}
|
|
420
442
|
}
|
|
421
443
|
|
|
444
|
+
const detaolu = detaolu_meta(local_config)
|
|
445
|
+
const whitelisted = whitelisted_meta(local_config)
|
|
446
|
+
const blacklisted = blacklisted_meta(local_config)
|
|
447
|
+
|
|
422
448
|
function obj_to_ir(
|
|
423
449
|
objs: DanmuObject[],
|
|
424
450
|
s: Stats | null,
|
|
@@ -426,7 +452,7 @@ async function merge(
|
|
|
426
452
|
): DanmuIr[] {
|
|
427
453
|
return objs
|
|
428
454
|
.map((obj, idx) => {
|
|
429
|
-
if (!
|
|
455
|
+
if (!local_config.PROC_POOL1 && obj.pool === 1) {
|
|
430
456
|
if (s) {
|
|
431
457
|
s.ignored_type++
|
|
432
458
|
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改')
|
|
@@ -434,7 +460,7 @@ async function merge(
|
|
|
434
460
|
return null
|
|
435
461
|
}
|
|
436
462
|
// if (!config.PROC_TYPE7 && obj.mode === 7) {
|
|
437
|
-
if (!
|
|
463
|
+
if (!local_config.PROC_TYPE7 && obj.mode === 4) {
|
|
438
464
|
if (s) {
|
|
439
465
|
s.ignored_type++
|
|
440
466
|
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改')
|
|
@@ -442,7 +468,7 @@ async function merge(
|
|
|
442
468
|
return null
|
|
443
469
|
}
|
|
444
470
|
// if (!config.PROC_TYPE4 && obj.mode === 4) {
|
|
445
|
-
if (!
|
|
471
|
+
if (!local_config.PROC_TYPE4 && obj.mode === 1) {
|
|
446
472
|
if (s) {
|
|
447
473
|
s.ignored_type++
|
|
448
474
|
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改')
|
|
@@ -473,7 +499,7 @@ async function merge(
|
|
|
473
499
|
|
|
474
500
|
// if (obj.mode !== 8 && obj.mode !== 9) {
|
|
475
501
|
if (obj.mode !== 4) {
|
|
476
|
-
const matched = blacklisted(disp_str
|
|
502
|
+
const matched = blacklisted(disp_str)
|
|
477
503
|
if (matched) {
|
|
478
504
|
if (s) {
|
|
479
505
|
s.deleted_blacklist++
|
|
@@ -489,7 +515,7 @@ async function merge(
|
|
|
489
515
|
return null
|
|
490
516
|
}
|
|
491
517
|
}
|
|
492
|
-
if (whitelisted(disp_str
|
|
518
|
+
if (whitelisted(disp_str)) {
|
|
493
519
|
if (s) {
|
|
494
520
|
s.ignored_whitelist++
|
|
495
521
|
apply_single_cluster(idx, obj, '命中白名单')
|
|
@@ -497,11 +523,11 @@ async function merge(
|
|
|
497
523
|
return null
|
|
498
524
|
}
|
|
499
525
|
|
|
500
|
-
const [matched_taolu, detaolued] = detaolu(disp_str
|
|
526
|
+
const [matched_taolu, detaolued] = detaolu(disp_str)
|
|
501
527
|
|
|
502
528
|
if (matched_taolu) {
|
|
503
529
|
if (s) s.num_taolu_matched++
|
|
504
|
-
if (
|
|
530
|
+
if (local_config.FORCELIST_APPLY_SINGULAR)
|
|
505
531
|
obj = {
|
|
506
532
|
...obj,
|
|
507
533
|
content: detaolued,
|
|
@@ -523,7 +549,7 @@ async function merge(
|
|
|
523
549
|
|
|
524
550
|
const nearby_danmus: Queue<DanmuIr[]> = new Queue()
|
|
525
551
|
|
|
526
|
-
const THRESHOLD_MS =
|
|
552
|
+
const THRESHOLD_MS = local_config.THRESHOLD * 1000
|
|
527
553
|
|
|
528
554
|
for (const dm of danmus) {
|
|
529
555
|
while (true) {
|
|
@@ -1,5 +1,24 @@
|
|
|
1
|
-
/* eslint-disable
|
|
2
|
-
/* eslint-disable */
|
|
1
|
+
/* eslint-disable import/no-default-export */
|
|
2
|
+
/* eslint-disable unicorn/prefer-number-properties */
|
|
3
|
+
/* eslint-disable object-shorthand */
|
|
4
|
+
/* eslint-disable block-scoped-var */
|
|
5
|
+
/* eslint-disable unicorn/catch-error-name */
|
|
6
|
+
/* eslint-disable getter-return */
|
|
7
|
+
/* eslint-disable no-setter-return */
|
|
8
|
+
/* eslint-disable prefer-template */
|
|
9
|
+
/* eslint-disable unicorn/throw-new-error */
|
|
10
|
+
/* eslint-disable unicorn/new-for-builtins */
|
|
11
|
+
/* eslint-disable unicorn/prefer-code-point */
|
|
12
|
+
/* eslint-disable unicorn/no-negated-condition */
|
|
13
|
+
/* eslint-disable no-console */
|
|
14
|
+
/* eslint-disable unicorn/consistent-function-scoping */
|
|
15
|
+
/* eslint-disable no-unused-expressions */
|
|
16
|
+
/* eslint-disable vars-on-top */
|
|
17
|
+
/* eslint-disable one-var */
|
|
18
|
+
/* eslint-disable import/no-mutable-exports */
|
|
19
|
+
/* eslint-disable no-var */
|
|
20
|
+
/* eslint-disable prettier/prettier */
|
|
21
|
+
|
|
3
22
|
/** @nocollapse */ var Module = function (moduleArg = {}) {
|
|
4
23
|
var moduleRtn
|
|
5
24
|
|
|
Binary file
|
|
@@ -9,7 +9,7 @@ let ptr_buf: number
|
|
|
9
9
|
|
|
10
10
|
const MAX_STRING_LEN = 16005
|
|
11
11
|
|
|
12
|
-
export async function init(wasm_module: ArrayBuffer
|
|
12
|
+
export async function init(wasm_module: ArrayBuffer) {
|
|
13
13
|
module = await generated_promise({ wasm: wasm_module })
|
|
14
14
|
ptr_buf = module._malloc(MAX_STRING_LEN * 2 + 7)
|
|
15
15
|
if (ptr_buf % 2)
|
|
@@ -27,7 +27,7 @@ export function begin_chunk(config: Config) {
|
|
|
27
27
|
config.CROSS_MODE,
|
|
28
28
|
)
|
|
29
29
|
} catch (error) {
|
|
30
|
-
throw new Error(`wasm error (begin_chunk):\n${error}
|
|
30
|
+
throw new Error(`wasm error (begin_chunk):\n${error}`, { cause: error })
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
33
|
|
|
@@ -35,7 +35,9 @@ export function begin_index_lock() {
|
|
|
35
35
|
try {
|
|
36
36
|
module._begin_index_lock()
|
|
37
37
|
} catch (error) {
|
|
38
|
-
throw new Error(`wasm error (begin_index_lock):\n${error}
|
|
38
|
+
throw new Error(`wasm error (begin_index_lock):\n${error}`, {
|
|
39
|
+
cause: error,
|
|
40
|
+
})
|
|
39
41
|
}
|
|
40
42
|
}
|
|
41
43
|
|
|
@@ -55,14 +57,16 @@ export function detect_similarity(
|
|
|
55
57
|
try {
|
|
56
58
|
module.stringToUTF16(str, ptr_buf, MAX_STRING_LEN * 2)
|
|
57
59
|
} catch (error) {
|
|
58
|
-
throw new Error(`wasm error (write str buf): ${str}\n${error}
|
|
60
|
+
throw new Error(`wasm error (write str buf): ${str}\n${error}`, {
|
|
61
|
+
cause: error,
|
|
62
|
+
})
|
|
59
63
|
}
|
|
60
64
|
|
|
61
65
|
let ret: number
|
|
62
66
|
try {
|
|
63
67
|
ret = module._check_similar(mode, index_l)
|
|
64
68
|
} catch (error) {
|
|
65
|
-
throw new Error(`wasm error (similar): ${str}\n${error}
|
|
69
|
+
throw new Error(`wasm error (similar): ${str}\n${error}`, { cause: error })
|
|
66
70
|
}
|
|
67
71
|
|
|
68
72
|
if (ret === 0)
|
package/tsconfig.json
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
|
11
11
|
/* Language and Environment */
|
|
12
12
|
"target": "ES2023" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
|
|
13
|
+
"rootDir": "./src" /* Specify the root folder within your source files. */,
|
|
13
14
|
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
|
14
15
|
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
|
15
16
|
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
|
@@ -23,8 +24,7 @@
|
|
|
23
24
|
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
|
|
24
25
|
/* Modules */
|
|
25
26
|
"module": "ESNext" /* Skip type checking all .d.ts files. */,
|
|
26
|
-
"moduleResolution": "
|
|
27
|
-
// "rootDir": "./", /* Specify the root folder within your source files. */
|
|
27
|
+
"moduleResolution": "bundler",
|
|
28
28
|
// "moduleResolution": "node10", /* Specify how TypeScript looks up a file from a given module specifier. */
|
|
29
29
|
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
|
|
30
30
|
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":"6.0.2"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":"5.9.3"}
|