@dan-uni/dan-any-plugin-detaolu 0.7.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +97 -77
- package/dist/index.js.LICENSE.txt +1 -1
- package/dist/index.umd.min.js +25452 -25255
- package/dist/index.umd.min.js.LICENSE.txt +2 -2
- package/dist/pakku.js/index.d.ts +1 -1
- package/dist/pakku.js/types.d.ts +6 -0
- package/package.json +2 -2
- package/src/index.ts +2 -2
- package/src/pakku.js/index.ts +30 -15
- package/src/pakku.js/similarity-gen.js +2 -15
- package/src/pakku.js/similarity_stub.ts +28 -14
- package/src/pakku.js/types.ts +7 -0
- package/tsconfig.json +2 -2
- package/types/tsconfig.tsbuildinfo +1 -0
- /package/dist/static/wasm/{54a7637a81e5f86e.module.wasm → 54a7637a.module.wasm} +0 -0
package/dist/index.js
CHANGED
|
@@ -270,19 +270,26 @@ function detect_similarity(str, mode, index_l, S) {
|
|
|
270
270
|
const dist = ret >>> 19 & 2047;
|
|
271
271
|
const idx_diff = 524287 & ret;
|
|
272
272
|
let reason_str;
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
273
|
+
switch(reason){
|
|
274
|
+
case 0:
|
|
275
|
+
S.combined_identical++;
|
|
276
|
+
reason_str = '==';
|
|
277
|
+
break;
|
|
278
|
+
case 1:
|
|
279
|
+
S.combined_edit_distance++;
|
|
280
|
+
reason_str = `≤${dist}`;
|
|
281
|
+
break;
|
|
282
|
+
case 3:
|
|
283
|
+
S.combined_cosine_distance++;
|
|
284
|
+
reason_str = `${dist}%`;
|
|
285
|
+
break;
|
|
286
|
+
case 2:
|
|
287
|
+
S.combined_pinyin_distance++;
|
|
288
|
+
reason_str = `P≤${dist}`;
|
|
289
|
+
break;
|
|
290
|
+
default:
|
|
291
|
+
throw new Error(`similarity wasm returned unknown reason: ${ret}`);
|
|
292
|
+
}
|
|
286
293
|
return {
|
|
287
294
|
reason: reason_str,
|
|
288
295
|
idx_diff
|
|
@@ -344,7 +351,7 @@ class Queue {
|
|
|
344
351
|
}
|
|
345
352
|
/**
|
|
346
353
|
* @author: xmcp(代码主要逻辑来源)
|
|
347
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
354
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
348
355
|
* @license: GPL-3.0
|
|
349
356
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
350
357
|
*/ const DEFAULT_CONFIG = {
|
|
@@ -372,51 +379,51 @@ class Queue {
|
|
|
372
379
|
PROC_TYPE4: true,
|
|
373
380
|
PROC_POOL1: false
|
|
374
381
|
};
|
|
375
|
-
const ENDING_CHARS = new Set(
|
|
382
|
+
const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ');
|
|
376
383
|
const WIDTH_TABLE = new Map(Object.entries({
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
'!':
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
';':
|
|
407
|
-
|
|
408
|
-
':':
|
|
409
|
-
|
|
410
|
-
',':
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
'?':
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
384
|
+
' ': ' ',
|
|
385
|
+
'1': '1',
|
|
386
|
+
'2': '2',
|
|
387
|
+
'3': '3',
|
|
388
|
+
'4': '4',
|
|
389
|
+
'5': '5',
|
|
390
|
+
'6': '6',
|
|
391
|
+
'7': '7',
|
|
392
|
+
'8': '8',
|
|
393
|
+
'9': '9',
|
|
394
|
+
'0': '0',
|
|
395
|
+
'!': '!',
|
|
396
|
+
'@': '@',
|
|
397
|
+
'#': '#',
|
|
398
|
+
'$': '$',
|
|
399
|
+
'%': '%',
|
|
400
|
+
'^': '^',
|
|
401
|
+
'&': '&',
|
|
402
|
+
'*': '*',
|
|
403
|
+
'(': '(',
|
|
404
|
+
')': ')',
|
|
405
|
+
'-': '-',
|
|
406
|
+
'=': '=',
|
|
407
|
+
'_': '_',
|
|
408
|
+
'+': '+',
|
|
409
|
+
'[': '[',
|
|
410
|
+
']': ']',
|
|
411
|
+
'{': '{',
|
|
412
|
+
'}': '}',
|
|
413
|
+
';': ';',
|
|
414
|
+
''': "'",
|
|
415
|
+
':': ':',
|
|
416
|
+
'"': '"',
|
|
417
|
+
',': ',',
|
|
418
|
+
'.': '.',
|
|
419
|
+
'/': '/',
|
|
420
|
+
'<': '<',
|
|
421
|
+
'>': '>',
|
|
422
|
+
'?': '?',
|
|
423
|
+
'\': '\\',
|
|
424
|
+
'|': '|',
|
|
425
|
+
'`': '`',
|
|
426
|
+
'~': '~',
|
|
420
427
|
q: 'q',
|
|
421
428
|
w: 'w',
|
|
422
429
|
e: 'e',
|
|
@@ -530,19 +537,23 @@ function trim_dispstr(text) {
|
|
|
530
537
|
}
|
|
531
538
|
function select_median_length(strs) {
|
|
532
539
|
if (1 === strs.length) return strs[0];
|
|
533
|
-
const sorted = strs.
|
|
540
|
+
const sorted = strs.toSorted((a, b)=>a.length - b.length);
|
|
534
541
|
const mid = Math.floor(sorted.length / 2);
|
|
535
542
|
return sorted[mid];
|
|
536
543
|
}
|
|
537
544
|
async function load_wasm(wasm_mod) {
|
|
538
|
-
await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('
|
|
545
|
+
await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('similarity-gen.wasm', import.meta.url)));
|
|
546
|
+
}
|
|
547
|
+
function make_ptr_idx(idx, is_next_chunk) {
|
|
548
|
+
return is_next_chunk ? -1 - idx : idx;
|
|
539
549
|
}
|
|
540
550
|
async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
541
551
|
await load_wasm();
|
|
542
552
|
begin_chunk(config);
|
|
543
553
|
const ret = {
|
|
544
554
|
clusters: [],
|
|
545
|
-
stats: new Stats()
|
|
555
|
+
stats: new Stats(),
|
|
556
|
+
deleted_chunk: []
|
|
546
557
|
};
|
|
547
558
|
function apply_single_cluster(idx, obj, desc) {
|
|
548
559
|
ret.clusters.push({
|
|
@@ -565,7 +576,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
565
576
|
function apply_cluster(irs) {
|
|
566
577
|
if (1 === irs.length) ret.clusters.push({
|
|
567
578
|
peers_ptr: irs.map((ir)=>[
|
|
568
|
-
ir.
|
|
579
|
+
ir.ptr_idx,
|
|
569
580
|
ir.sim_reason
|
|
570
581
|
]),
|
|
571
582
|
desc: [],
|
|
@@ -575,7 +586,8 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
575
586
|
});
|
|
576
587
|
else {
|
|
577
588
|
const text_cnts = new Map();
|
|
578
|
-
let most_texts = []
|
|
589
|
+
let most_texts = [];
|
|
590
|
+
let most_cnt = 0;
|
|
579
591
|
for (const ir of irs){
|
|
580
592
|
const text = ir.str;
|
|
581
593
|
const cnt = 1 + (text_cnts.get(text) || 0);
|
|
@@ -590,11 +602,11 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
590
602
|
const most_text = select_median_length(most_texts);
|
|
591
603
|
ret.clusters.push({
|
|
592
604
|
peers_ptr: irs.map((ir)=>[
|
|
593
|
-
ir.
|
|
605
|
+
ir.ptr_idx,
|
|
594
606
|
ir.sim_reason
|
|
595
607
|
]),
|
|
596
608
|
desc: most_cnt > 1 ? [
|
|
597
|
-
|
|
609
|
+
`采用了出现 ${most_cnt} 次的文本`
|
|
598
610
|
] : [],
|
|
599
611
|
chosen_str: most_text,
|
|
600
612
|
danuni_count: most_cnt,
|
|
@@ -602,26 +614,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
602
614
|
});
|
|
603
615
|
}
|
|
604
616
|
}
|
|
605
|
-
function obj_to_ir(objs, s) {
|
|
617
|
+
function obj_to_ir(objs, s, is_next_chunk) {
|
|
606
618
|
return objs.map((obj, idx)=>{
|
|
607
619
|
if (!config.PROC_POOL1 && 1 === obj.pool) {
|
|
608
620
|
if (s) {
|
|
609
621
|
s.ignored_type++;
|
|
610
|
-
apply_single_cluster(idx, obj,
|
|
622
|
+
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
|
|
611
623
|
}
|
|
612
624
|
return null;
|
|
613
625
|
}
|
|
614
626
|
if (!config.PROC_TYPE7 && 4 === obj.mode) {
|
|
615
627
|
if (s) {
|
|
616
628
|
s.ignored_type++;
|
|
617
|
-
apply_single_cluster(idx, obj,
|
|
629
|
+
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
|
|
618
630
|
}
|
|
619
631
|
return null;
|
|
620
632
|
}
|
|
621
633
|
if (!config.PROC_TYPE4 && 1 === obj.mode) {
|
|
622
634
|
if (s) {
|
|
623
635
|
s.ignored_type++;
|
|
624
|
-
apply_single_cluster(idx, obj,
|
|
636
|
+
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
|
|
625
637
|
}
|
|
626
638
|
return null;
|
|
627
639
|
}
|
|
@@ -632,6 +644,12 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
632
644
|
if (s) {
|
|
633
645
|
s.deleted_blacklist++;
|
|
634
646
|
s.deleted_blacklist_each[matched] = (s.deleted_blacklist_each[matched] || 0) + 1;
|
|
647
|
+
ret.deleted_chunk.push({
|
|
648
|
+
...obj,
|
|
649
|
+
pakku: {
|
|
650
|
+
deleted_reason: `命中黑名单:${matched}`
|
|
651
|
+
}
|
|
652
|
+
});
|
|
635
653
|
}
|
|
636
654
|
return null;
|
|
637
655
|
}
|
|
@@ -639,7 +657,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
639
657
|
if (whitelisted(disp_str, config)) {
|
|
640
658
|
if (s) {
|
|
641
659
|
s.ignored_whitelist++;
|
|
642
|
-
apply_single_cluster(idx, obj,
|
|
660
|
+
apply_single_cluster(idx, obj, '命中白名单');
|
|
643
661
|
}
|
|
644
662
|
return null;
|
|
645
663
|
}
|
|
@@ -648,12 +666,12 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
648
666
|
return {
|
|
649
667
|
obj,
|
|
650
668
|
str: detaolued,
|
|
651
|
-
idx,
|
|
669
|
+
ptr_idx: make_ptr_idx(idx, is_next_chunk),
|
|
652
670
|
sim_reason: 'ORIG'
|
|
653
671
|
};
|
|
654
672
|
}).filter((obj)=>null !== obj);
|
|
655
673
|
}
|
|
656
|
-
const danmus = obj_to_ir(chunk.objs, ret.stats);
|
|
674
|
+
const danmus = obj_to_ir(chunk.objs, ret.stats, false);
|
|
657
675
|
const nearby_danmus = new Queue();
|
|
658
676
|
const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000;
|
|
659
677
|
for (const dm of danmus){
|
|
@@ -664,13 +682,14 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
664
682
|
nearby_danmus.pop();
|
|
665
683
|
}
|
|
666
684
|
const sim = detect_similarity(dm.str, dm.obj.mode, nearby_danmus.index_l, ret.stats);
|
|
667
|
-
if (null
|
|
685
|
+
if (null === sim) nearby_danmus.push([
|
|
686
|
+
dm
|
|
687
|
+
]);
|
|
688
|
+
else {
|
|
668
689
|
const candidate = nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff];
|
|
669
690
|
dm.sim_reason = sim.reason;
|
|
670
691
|
candidate.push(dm);
|
|
671
|
-
}
|
|
672
|
-
dm
|
|
673
|
-
]);
|
|
692
|
+
}
|
|
674
693
|
}
|
|
675
694
|
begin_index_lock();
|
|
676
695
|
for (const candidate of nearby_danmus)apply_cluster(candidate);
|
|
@@ -690,7 +709,8 @@ async function src_detaolu(that, config) {
|
|
|
690
709
|
const selected = p.clusters.map((p)=>{
|
|
691
710
|
if (1 === p.danuni_dans.length) return p.danuni_dans[0].danuni_dan;
|
|
692
711
|
{
|
|
693
|
-
const dans = p.danuni_dans
|
|
712
|
+
const dans = p.danuni_dans;
|
|
713
|
+
const pool = new UniPool(dans.map((d)=>d.danuni_dan));
|
|
694
714
|
function isAllBottomMode(p) {
|
|
695
715
|
return p.dans.every((d)=>d.mode === UniDMTools.Modes.Bottom);
|
|
696
716
|
}
|