@dan-uni/dan-any-plugin-detaolu 0.9.2 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +90 -77
- package/dist/index.js.LICENSE.txt +1 -1
- package/dist/index.umd.min.js +1007 -1013
- package/dist/index.umd.min.js.LICENSE.txt +1 -1
- package/dist/pakku.js/index.d.ts +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -2
- package/src/pakku.js/index.ts +23 -15
- package/src/pakku.js/similarity-gen.js +2 -15
- package/src/pakku.js/similarity_stub.ts +28 -14
- package/tsconfig.json +2 -2
- package/types/tsconfig.tsbuildinfo +1 -0
- /package/dist/static/wasm/{54a7637a81e5f86e.module.wasm → 54a7637a.module.wasm} +0 -0
package/dist/index.js
CHANGED
|
@@ -270,19 +270,26 @@ function detect_similarity(str, mode, index_l, S) {
|
|
|
270
270
|
const dist = ret >>> 19 & 2047;
|
|
271
271
|
const idx_diff = 524287 & ret;
|
|
272
272
|
let reason_str;
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
273
|
+
switch(reason){
|
|
274
|
+
case 0:
|
|
275
|
+
S.combined_identical++;
|
|
276
|
+
reason_str = '==';
|
|
277
|
+
break;
|
|
278
|
+
case 1:
|
|
279
|
+
S.combined_edit_distance++;
|
|
280
|
+
reason_str = `≤${dist}`;
|
|
281
|
+
break;
|
|
282
|
+
case 3:
|
|
283
|
+
S.combined_cosine_distance++;
|
|
284
|
+
reason_str = `${dist}%`;
|
|
285
|
+
break;
|
|
286
|
+
case 2:
|
|
287
|
+
S.combined_pinyin_distance++;
|
|
288
|
+
reason_str = `P≤${dist}`;
|
|
289
|
+
break;
|
|
290
|
+
default:
|
|
291
|
+
throw new Error(`similarity wasm returned unknown reason: ${ret}`);
|
|
292
|
+
}
|
|
286
293
|
return {
|
|
287
294
|
reason: reason_str,
|
|
288
295
|
idx_diff
|
|
@@ -344,7 +351,7 @@ class Queue {
|
|
|
344
351
|
}
|
|
345
352
|
/**
|
|
346
353
|
* @author: xmcp(代码主要逻辑来源)
|
|
347
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
354
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
348
355
|
* @license: GPL-3.0
|
|
349
356
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
350
357
|
*/ const DEFAULT_CONFIG = {
|
|
@@ -372,51 +379,51 @@ class Queue {
|
|
|
372
379
|
PROC_TYPE4: true,
|
|
373
380
|
PROC_POOL1: false
|
|
374
381
|
};
|
|
375
|
-
const ENDING_CHARS = new Set(
|
|
382
|
+
const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ');
|
|
376
383
|
const WIDTH_TABLE = new Map(Object.entries({
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
'!':
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
';':
|
|
407
|
-
|
|
408
|
-
':':
|
|
409
|
-
|
|
410
|
-
',':
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
'?':
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
384
|
+
' ': ' ',
|
|
385
|
+
'1': '1',
|
|
386
|
+
'2': '2',
|
|
387
|
+
'3': '3',
|
|
388
|
+
'4': '4',
|
|
389
|
+
'5': '5',
|
|
390
|
+
'6': '6',
|
|
391
|
+
'7': '7',
|
|
392
|
+
'8': '8',
|
|
393
|
+
'9': '9',
|
|
394
|
+
'0': '0',
|
|
395
|
+
'!': '!',
|
|
396
|
+
'@': '@',
|
|
397
|
+
'#': '#',
|
|
398
|
+
'$': '$',
|
|
399
|
+
'%': '%',
|
|
400
|
+
'^': '^',
|
|
401
|
+
'&': '&',
|
|
402
|
+
'*': '*',
|
|
403
|
+
'(': '(',
|
|
404
|
+
')': ')',
|
|
405
|
+
'-': '-',
|
|
406
|
+
'=': '=',
|
|
407
|
+
'_': '_',
|
|
408
|
+
'+': '+',
|
|
409
|
+
'[': '[',
|
|
410
|
+
']': ']',
|
|
411
|
+
'{': '{',
|
|
412
|
+
'}': '}',
|
|
413
|
+
';': ';',
|
|
414
|
+
''': "'",
|
|
415
|
+
':': ':',
|
|
416
|
+
'"': '"',
|
|
417
|
+
',': ',',
|
|
418
|
+
'.': '.',
|
|
419
|
+
'/': '/',
|
|
420
|
+
'<': '<',
|
|
421
|
+
'>': '>',
|
|
422
|
+
'?': '?',
|
|
423
|
+
'\': '\\',
|
|
424
|
+
'|': '|',
|
|
425
|
+
'`': '`',
|
|
426
|
+
'~': '~',
|
|
420
427
|
q: 'q',
|
|
421
428
|
w: 'w',
|
|
422
429
|
e: 'e',
|
|
@@ -530,12 +537,15 @@ function trim_dispstr(text) {
|
|
|
530
537
|
}
|
|
531
538
|
function select_median_length(strs) {
|
|
532
539
|
if (1 === strs.length) return strs[0];
|
|
533
|
-
const sorted = strs.
|
|
540
|
+
const sorted = strs.toSorted((a, b)=>a.length - b.length);
|
|
534
541
|
const mid = Math.floor(sorted.length / 2);
|
|
535
542
|
return sorted[mid];
|
|
536
543
|
}
|
|
537
544
|
async function load_wasm(wasm_mod) {
|
|
538
|
-
await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('
|
|
545
|
+
await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('similarity-gen.wasm', import.meta.url)));
|
|
546
|
+
}
|
|
547
|
+
function make_ptr_idx(idx, is_next_chunk) {
|
|
548
|
+
return is_next_chunk ? -1 - idx : idx;
|
|
539
549
|
}
|
|
540
550
|
async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
541
551
|
await load_wasm();
|
|
@@ -566,7 +576,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
566
576
|
function apply_cluster(irs) {
|
|
567
577
|
if (1 === irs.length) ret.clusters.push({
|
|
568
578
|
peers_ptr: irs.map((ir)=>[
|
|
569
|
-
ir.
|
|
579
|
+
ir.ptr_idx,
|
|
570
580
|
ir.sim_reason
|
|
571
581
|
]),
|
|
572
582
|
desc: [],
|
|
@@ -576,7 +586,8 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
576
586
|
});
|
|
577
587
|
else {
|
|
578
588
|
const text_cnts = new Map();
|
|
579
|
-
let most_texts = []
|
|
589
|
+
let most_texts = [];
|
|
590
|
+
let most_cnt = 0;
|
|
580
591
|
for (const ir of irs){
|
|
581
592
|
const text = ir.str;
|
|
582
593
|
const cnt = 1 + (text_cnts.get(text) || 0);
|
|
@@ -591,11 +602,11 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
591
602
|
const most_text = select_median_length(most_texts);
|
|
592
603
|
ret.clusters.push({
|
|
593
604
|
peers_ptr: irs.map((ir)=>[
|
|
594
|
-
ir.
|
|
605
|
+
ir.ptr_idx,
|
|
595
606
|
ir.sim_reason
|
|
596
607
|
]),
|
|
597
608
|
desc: most_cnt > 1 ? [
|
|
598
|
-
|
|
609
|
+
`采用了出现 ${most_cnt} 次的文本`
|
|
599
610
|
] : [],
|
|
600
611
|
chosen_str: most_text,
|
|
601
612
|
danuni_count: most_cnt,
|
|
@@ -603,26 +614,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
603
614
|
});
|
|
604
615
|
}
|
|
605
616
|
}
|
|
606
|
-
function obj_to_ir(objs, s) {
|
|
617
|
+
function obj_to_ir(objs, s, is_next_chunk) {
|
|
607
618
|
return objs.map((obj, idx)=>{
|
|
608
619
|
if (!config.PROC_POOL1 && 1 === obj.pool) {
|
|
609
620
|
if (s) {
|
|
610
621
|
s.ignored_type++;
|
|
611
|
-
apply_single_cluster(idx, obj,
|
|
622
|
+
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
|
|
612
623
|
}
|
|
613
624
|
return null;
|
|
614
625
|
}
|
|
615
626
|
if (!config.PROC_TYPE7 && 4 === obj.mode) {
|
|
616
627
|
if (s) {
|
|
617
628
|
s.ignored_type++;
|
|
618
|
-
apply_single_cluster(idx, obj,
|
|
629
|
+
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
|
|
619
630
|
}
|
|
620
631
|
return null;
|
|
621
632
|
}
|
|
622
633
|
if (!config.PROC_TYPE4 && 1 === obj.mode) {
|
|
623
634
|
if (s) {
|
|
624
635
|
s.ignored_type++;
|
|
625
|
-
apply_single_cluster(idx, obj,
|
|
636
|
+
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
|
|
626
637
|
}
|
|
627
638
|
return null;
|
|
628
639
|
}
|
|
@@ -636,7 +647,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
636
647
|
ret.deleted_chunk.push({
|
|
637
648
|
...obj,
|
|
638
649
|
pakku: {
|
|
639
|
-
deleted_reason:
|
|
650
|
+
deleted_reason: `命中黑名单:${matched}`
|
|
640
651
|
}
|
|
641
652
|
});
|
|
642
653
|
}
|
|
@@ -646,7 +657,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
646
657
|
if (whitelisted(disp_str, config)) {
|
|
647
658
|
if (s) {
|
|
648
659
|
s.ignored_whitelist++;
|
|
649
|
-
apply_single_cluster(idx, obj,
|
|
660
|
+
apply_single_cluster(idx, obj, '命中白名单');
|
|
650
661
|
}
|
|
651
662
|
return null;
|
|
652
663
|
}
|
|
@@ -655,12 +666,12 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
655
666
|
return {
|
|
656
667
|
obj,
|
|
657
668
|
str: detaolued,
|
|
658
|
-
idx,
|
|
669
|
+
ptr_idx: make_ptr_idx(idx, is_next_chunk),
|
|
659
670
|
sim_reason: 'ORIG'
|
|
660
671
|
};
|
|
661
672
|
}).filter((obj)=>null !== obj);
|
|
662
673
|
}
|
|
663
|
-
const danmus = obj_to_ir(chunk.objs, ret.stats);
|
|
674
|
+
const danmus = obj_to_ir(chunk.objs, ret.stats, false);
|
|
664
675
|
const nearby_danmus = new Queue();
|
|
665
676
|
const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000;
|
|
666
677
|
for (const dm of danmus){
|
|
@@ -671,13 +682,14 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
671
682
|
nearby_danmus.pop();
|
|
672
683
|
}
|
|
673
684
|
const sim = detect_similarity(dm.str, dm.obj.mode, nearby_danmus.index_l, ret.stats);
|
|
674
|
-
if (null
|
|
685
|
+
if (null === sim) nearby_danmus.push([
|
|
686
|
+
dm
|
|
687
|
+
]);
|
|
688
|
+
else {
|
|
675
689
|
const candidate = nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff];
|
|
676
690
|
dm.sim_reason = sim.reason;
|
|
677
691
|
candidate.push(dm);
|
|
678
|
-
}
|
|
679
|
-
dm
|
|
680
|
-
]);
|
|
692
|
+
}
|
|
681
693
|
}
|
|
682
694
|
begin_index_lock();
|
|
683
695
|
for (const candidate of nearby_danmus)apply_cluster(candidate);
|
|
@@ -697,7 +709,8 @@ async function src_detaolu(that, config) {
|
|
|
697
709
|
const selected = p.clusters.map((p)=>{
|
|
698
710
|
if (1 === p.danuni_dans.length) return p.danuni_dans[0].danuni_dan;
|
|
699
711
|
{
|
|
700
|
-
const dans = p.danuni_dans
|
|
712
|
+
const dans = p.danuni_dans;
|
|
713
|
+
const pool = new UniPool(dans.map((d)=>d.danuni_dan));
|
|
701
714
|
function isAllBottomMode(p) {
|
|
702
715
|
return p.dans.every((d)=>d.mode === UniDMTools.Modes.Bottom);
|
|
703
716
|
}
|