@dan-uni/dan-any-plugin-detaolu 0.9.2 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +112 -88
- package/dist/index.js.LICENSE.txt +4 -1
- package/dist/index.umd.min.js +1642 -2122
- package/dist/index.umd.min.js.LICENSE.txt +4 -1
- package/dist/pakku.js/index.d.ts +33 -10
- package/package.json +10 -10
- package/src/index.ts +2 -2
- package/src/pakku.js/index.ts +76 -37
- package/src/pakku.js/similarity-gen.js +2 -15
- package/src/pakku.js/similarity_stub.ts +28 -14
- package/tsconfig.json +2 -2
- package/types/tsconfig.tsbuildinfo +1 -0
- /package/dist/static/wasm/{54a7637a81e5f86e.module.wasm → 54a7637a.module.wasm} +0 -0
package/dist/index.js
CHANGED
|
@@ -50,7 +50,7 @@ var Module = function(moduleArg = {}) {
|
|
|
50
50
|
return 'FS_createPath' === a || 'FS_createDataFile' === a || 'FS_createPreloadedFile' === a || 'FS_unlink' === a || 'addRunDependency' === a || 'FS_createLazyFile' === a || 'FS_createDevice' === a || 'removeRunDependency' === a;
|
|
51
51
|
}
|
|
52
52
|
function L(a, b) {
|
|
53
|
-
|
|
53
|
+
"u" < typeof globalThis || Object.getOwnPropertyDescriptor(globalThis, a) || Object.defineProperty(globalThis, a, {
|
|
54
54
|
configurable: !0,
|
|
55
55
|
get () {
|
|
56
56
|
b();
|
|
@@ -77,7 +77,7 @@ var Module = function(moduleArg = {}) {
|
|
|
77
77
|
var N = (a)=>{
|
|
78
78
|
N.g || (N.g = {});
|
|
79
79
|
N.g[a] || (N.g[a] = 1, t(a));
|
|
80
|
-
}, P =
|
|
80
|
+
}, P = "u" > typeof TextDecoder ? new TextDecoder() : void 0, R = [
|
|
81
81
|
null,
|
|
82
82
|
[],
|
|
83
83
|
[]
|
|
@@ -270,19 +270,26 @@ function detect_similarity(str, mode, index_l, S) {
|
|
|
270
270
|
const dist = ret >>> 19 & 2047;
|
|
271
271
|
const idx_diff = 524287 & ret;
|
|
272
272
|
let reason_str;
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
273
|
+
switch(reason){
|
|
274
|
+
case 0:
|
|
275
|
+
S.combined_identical++;
|
|
276
|
+
reason_str = '==';
|
|
277
|
+
break;
|
|
278
|
+
case 1:
|
|
279
|
+
S.combined_edit_distance++;
|
|
280
|
+
reason_str = `≤${dist}`;
|
|
281
|
+
break;
|
|
282
|
+
case 3:
|
|
283
|
+
S.combined_cosine_distance++;
|
|
284
|
+
reason_str = `${dist}%`;
|
|
285
|
+
break;
|
|
286
|
+
case 2:
|
|
287
|
+
S.combined_pinyin_distance++;
|
|
288
|
+
reason_str = `P≤${dist}`;
|
|
289
|
+
break;
|
|
290
|
+
default:
|
|
291
|
+
throw new Error(`similarity wasm returned unknown reason: ${ret}`);
|
|
292
|
+
}
|
|
286
293
|
return {
|
|
287
294
|
reason: reason_str,
|
|
288
295
|
idx_diff
|
|
@@ -344,7 +351,10 @@ class Queue {
|
|
|
344
351
|
}
|
|
345
352
|
/**
|
|
346
353
|
* @author: xmcp(代码主要逻辑来源)
|
|
347
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
354
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
355
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
|
|
356
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
|
|
357
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
|
|
348
358
|
* @license: GPL-3.0
|
|
349
359
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
350
360
|
*/ const DEFAULT_CONFIG = {
|
|
@@ -365,6 +375,8 @@ class Queue {
|
|
|
365
375
|
'66666'
|
|
366
376
|
]
|
|
367
377
|
],
|
|
378
|
+
FORCELIST_CONTINUE_ON_MATCH: true,
|
|
379
|
+
FORCELIST_APPLY_SINGULAR: false,
|
|
368
380
|
WHITELIST: [],
|
|
369
381
|
BLACKLIST: [],
|
|
370
382
|
CROSS_MODE: true,
|
|
@@ -372,51 +384,51 @@ class Queue {
|
|
|
372
384
|
PROC_TYPE4: true,
|
|
373
385
|
PROC_POOL1: false
|
|
374
386
|
};
|
|
375
|
-
const ENDING_CHARS = new Set(
|
|
387
|
+
const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ');
|
|
376
388
|
const WIDTH_TABLE = new Map(Object.entries({
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
'!':
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
';':
|
|
407
|
-
|
|
408
|
-
':':
|
|
409
|
-
|
|
410
|
-
',':
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
'?':
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
389
|
+
' ': ' ',
|
|
390
|
+
'1': '1',
|
|
391
|
+
'2': '2',
|
|
392
|
+
'3': '3',
|
|
393
|
+
'4': '4',
|
|
394
|
+
'5': '5',
|
|
395
|
+
'6': '6',
|
|
396
|
+
'7': '7',
|
|
397
|
+
'8': '8',
|
|
398
|
+
'9': '9',
|
|
399
|
+
'0': '0',
|
|
400
|
+
'!': '!',
|
|
401
|
+
'@': '@',
|
|
402
|
+
'#': '#',
|
|
403
|
+
'$': '$',
|
|
404
|
+
'%': '%',
|
|
405
|
+
'^': '^',
|
|
406
|
+
'&': '&',
|
|
407
|
+
'*': '*',
|
|
408
|
+
'(': '(',
|
|
409
|
+
')': ')',
|
|
410
|
+
'-': '-',
|
|
411
|
+
'=': '=',
|
|
412
|
+
'_': '_',
|
|
413
|
+
'+': '+',
|
|
414
|
+
'[': '[',
|
|
415
|
+
']': ']',
|
|
416
|
+
'{': '{',
|
|
417
|
+
'}': '}',
|
|
418
|
+
';': ';',
|
|
419
|
+
''': "'",
|
|
420
|
+
':': ':',
|
|
421
|
+
'"': '"',
|
|
422
|
+
',': ',',
|
|
423
|
+
'.': '.',
|
|
424
|
+
'/': '/',
|
|
425
|
+
'<': '<',
|
|
426
|
+
'>': '>',
|
|
427
|
+
'?': '?',
|
|
428
|
+
'\': '\\',
|
|
429
|
+
'|': '|',
|
|
430
|
+
'`': '`',
|
|
431
|
+
'~': '~',
|
|
420
432
|
q: 'q',
|
|
421
433
|
w: 'w',
|
|
422
434
|
e: 'e',
|
|
@@ -475,9 +487,10 @@ const detaolu = (inp, config)=>{
|
|
|
475
487
|
const TRIM_SPACE = config.TRIM_SPACE;
|
|
476
488
|
const TRIM_WIDTH = config.TRIM_WIDTH;
|
|
477
489
|
const FORCELIST = (config?.FORCELIST ?? DEFAULT_CONFIG.FORCELIST).map(([pattern, repl])=>[
|
|
478
|
-
new RegExp(pattern, '
|
|
490
|
+
new RegExp(pattern, 'giu'),
|
|
479
491
|
repl
|
|
480
492
|
]);
|
|
493
|
+
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH;
|
|
481
494
|
let len = inp.length;
|
|
482
495
|
let text = '';
|
|
483
496
|
if (TRIM_ENDING) {
|
|
@@ -490,20 +503,19 @@ const detaolu = (inp, config)=>{
|
|
|
490
503
|
}
|
|
491
504
|
else text = inp.slice(0, len);
|
|
492
505
|
if (TRIM_SPACE) text = text.replaceAll(/[ \u3000]+/g, ' ').replaceAll(/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g, '$1');
|
|
506
|
+
let taolu_matched = false;
|
|
493
507
|
for (const taolu of FORCELIST)if (taolu[0].test(text)) {
|
|
494
508
|
text = text.replace(taolu[0], taolu[1]);
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
text
|
|
498
|
-
];
|
|
509
|
+
taolu_matched = true;
|
|
510
|
+
if (FORCELIST_BREAK_ON_MATCH) break;
|
|
499
511
|
}
|
|
500
512
|
return [
|
|
501
|
-
|
|
513
|
+
taolu_matched,
|
|
502
514
|
text
|
|
503
515
|
];
|
|
504
516
|
};
|
|
505
517
|
const whitelisted = (text, config)=>{
|
|
506
|
-
const WHITELIST = (config?.WHITELIST ?? DEFAULT_CONFIG.WHITELIST).map((x)=>new RegExp(x[0], '
|
|
518
|
+
const WHITELIST = (config?.WHITELIST ?? DEFAULT_CONFIG.WHITELIST).map((x)=>new RegExp(x[0], 'iu'));
|
|
507
519
|
if (0 === WHITELIST.length) return false;
|
|
508
520
|
return WHITELIST.some((re)=>re.test(text));
|
|
509
521
|
};
|
|
@@ -530,12 +542,15 @@ function trim_dispstr(text) {
|
|
|
530
542
|
}
|
|
531
543
|
function select_median_length(strs) {
|
|
532
544
|
if (1 === strs.length) return strs[0];
|
|
533
|
-
const sorted = strs.
|
|
545
|
+
const sorted = strs.toSorted((a, b)=>a.length - b.length);
|
|
534
546
|
const mid = Math.floor(sorted.length / 2);
|
|
535
547
|
return sorted[mid];
|
|
536
548
|
}
|
|
537
549
|
async function load_wasm(wasm_mod) {
|
|
538
|
-
await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('
|
|
550
|
+
await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('similarity-gen.wasm', import.meta.url)));
|
|
551
|
+
}
|
|
552
|
+
function make_ptr_idx(idx, is_next_chunk) {
|
|
553
|
+
return is_next_chunk ? -1 - idx : idx;
|
|
539
554
|
}
|
|
540
555
|
async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
541
556
|
await load_wasm();
|
|
@@ -566,7 +581,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
566
581
|
function apply_cluster(irs) {
|
|
567
582
|
if (1 === irs.length) ret.clusters.push({
|
|
568
583
|
peers_ptr: irs.map((ir)=>[
|
|
569
|
-
ir.
|
|
584
|
+
ir.ptr_idx,
|
|
570
585
|
ir.sim_reason
|
|
571
586
|
]),
|
|
572
587
|
desc: [],
|
|
@@ -576,7 +591,8 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
576
591
|
});
|
|
577
592
|
else {
|
|
578
593
|
const text_cnts = new Map();
|
|
579
|
-
let most_texts = []
|
|
594
|
+
let most_texts = [];
|
|
595
|
+
let most_cnt = 0;
|
|
580
596
|
for (const ir of irs){
|
|
581
597
|
const text = ir.str;
|
|
582
598
|
const cnt = 1 + (text_cnts.get(text) || 0);
|
|
@@ -591,11 +607,11 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
591
607
|
const most_text = select_median_length(most_texts);
|
|
592
608
|
ret.clusters.push({
|
|
593
609
|
peers_ptr: irs.map((ir)=>[
|
|
594
|
-
ir.
|
|
610
|
+
ir.ptr_idx,
|
|
595
611
|
ir.sim_reason
|
|
596
612
|
]),
|
|
597
613
|
desc: most_cnt > 1 ? [
|
|
598
|
-
|
|
614
|
+
`采用了出现 ${most_cnt} 次的文本`
|
|
599
615
|
] : [],
|
|
600
616
|
chosen_str: most_text,
|
|
601
617
|
danuni_count: most_cnt,
|
|
@@ -603,26 +619,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
603
619
|
});
|
|
604
620
|
}
|
|
605
621
|
}
|
|
606
|
-
function obj_to_ir(objs, s) {
|
|
622
|
+
function obj_to_ir(objs, s, is_next_chunk) {
|
|
607
623
|
return objs.map((obj, idx)=>{
|
|
608
624
|
if (!config.PROC_POOL1 && 1 === obj.pool) {
|
|
609
625
|
if (s) {
|
|
610
626
|
s.ignored_type++;
|
|
611
|
-
apply_single_cluster(idx, obj,
|
|
627
|
+
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
|
|
612
628
|
}
|
|
613
629
|
return null;
|
|
614
630
|
}
|
|
615
631
|
if (!config.PROC_TYPE7 && 4 === obj.mode) {
|
|
616
632
|
if (s) {
|
|
617
633
|
s.ignored_type++;
|
|
618
|
-
apply_single_cluster(idx, obj,
|
|
634
|
+
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
|
|
619
635
|
}
|
|
620
636
|
return null;
|
|
621
637
|
}
|
|
622
638
|
if (!config.PROC_TYPE4 && 1 === obj.mode) {
|
|
623
639
|
if (s) {
|
|
624
640
|
s.ignored_type++;
|
|
625
|
-
apply_single_cluster(idx, obj,
|
|
641
|
+
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
|
|
626
642
|
}
|
|
627
643
|
return null;
|
|
628
644
|
}
|
|
@@ -636,7 +652,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
636
652
|
ret.deleted_chunk.push({
|
|
637
653
|
...obj,
|
|
638
654
|
pakku: {
|
|
639
|
-
deleted_reason:
|
|
655
|
+
deleted_reason: `命中黑名单:${matched}`
|
|
640
656
|
}
|
|
641
657
|
});
|
|
642
658
|
}
|
|
@@ -646,21 +662,27 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
646
662
|
if (whitelisted(disp_str, config)) {
|
|
647
663
|
if (s) {
|
|
648
664
|
s.ignored_whitelist++;
|
|
649
|
-
apply_single_cluster(idx, obj,
|
|
665
|
+
apply_single_cluster(idx, obj, '命中白名单');
|
|
650
666
|
}
|
|
651
667
|
return null;
|
|
652
668
|
}
|
|
653
669
|
const [matched_taolu, detaolued] = detaolu(disp_str, config);
|
|
654
|
-
if (matched_taolu
|
|
670
|
+
if (matched_taolu) {
|
|
671
|
+
if (s) s.num_taolu_matched++;
|
|
672
|
+
if (config.FORCELIST_APPLY_SINGULAR) obj = {
|
|
673
|
+
...obj,
|
|
674
|
+
content: detaolued
|
|
675
|
+
};
|
|
676
|
+
}
|
|
655
677
|
return {
|
|
656
678
|
obj,
|
|
657
679
|
str: detaolued,
|
|
658
|
-
idx,
|
|
680
|
+
ptr_idx: make_ptr_idx(idx, is_next_chunk),
|
|
659
681
|
sim_reason: 'ORIG'
|
|
660
682
|
};
|
|
661
683
|
}).filter((obj)=>null !== obj);
|
|
662
684
|
}
|
|
663
|
-
const danmus = obj_to_ir(chunk.objs, ret.stats);
|
|
685
|
+
const danmus = obj_to_ir(chunk.objs, ret.stats, false);
|
|
664
686
|
const nearby_danmus = new Queue();
|
|
665
687
|
const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000;
|
|
666
688
|
for (const dm of danmus){
|
|
@@ -671,13 +693,14 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
671
693
|
nearby_danmus.pop();
|
|
672
694
|
}
|
|
673
695
|
const sim = detect_similarity(dm.str, dm.obj.mode, nearby_danmus.index_l, ret.stats);
|
|
674
|
-
if (null
|
|
696
|
+
if (null === sim) nearby_danmus.push([
|
|
697
|
+
dm
|
|
698
|
+
]);
|
|
699
|
+
else {
|
|
675
700
|
const candidate = nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff];
|
|
676
701
|
dm.sim_reason = sim.reason;
|
|
677
702
|
candidate.push(dm);
|
|
678
|
-
}
|
|
679
|
-
dm
|
|
680
|
-
]);
|
|
703
|
+
}
|
|
681
704
|
}
|
|
682
705
|
begin_index_lock();
|
|
683
706
|
for (const candidate of nearby_danmus)apply_cluster(candidate);
|
|
@@ -697,7 +720,8 @@ async function src_detaolu(that, config) {
|
|
|
697
720
|
const selected = p.clusters.map((p)=>{
|
|
698
721
|
if (1 === p.danuni_dans.length) return p.danuni_dans[0].danuni_dan;
|
|
699
722
|
{
|
|
700
|
-
const dans = p.danuni_dans
|
|
723
|
+
const dans = p.danuni_dans;
|
|
724
|
+
const pool = new UniPool(dans.map((d)=>d.danuni_dan));
|
|
701
725
|
function isAllBottomMode(p) {
|
|
702
726
|
return p.dans.every((d)=>d.mode === UniDMTools.Modes.Bottom);
|
|
703
727
|
}
|
|
@@ -736,4 +760,4 @@ function detaolu_constructor(config) {
|
|
|
736
760
|
return (that)=>src_detaolu(that, config);
|
|
737
761
|
}
|
|
738
762
|
const src = detaolu_constructor;
|
|
739
|
-
export
|
|
763
|
+
export default src;
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @author: xmcp(代码主要逻辑来源)
|
|
3
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
3
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
4
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
|
|
5
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
|
|
6
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
|
|
4
7
|
* @license: GPL-3.0
|
|
5
8
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
6
9
|
*/
|