@dan-uni/dan-any-plugin-detaolu 0.7.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -270,19 +270,26 @@ function detect_similarity(str, mode, index_l, S) {
270
270
  const dist = ret >>> 19 & 2047;
271
271
  const idx_diff = 524287 & ret;
272
272
  let reason_str;
273
- if (0 === reason) {
274
- S.combined_identical++;
275
- reason_str = '==';
276
- } else if (1 === reason) {
277
- S.combined_edit_distance++;
278
- reason_str = `\u{2264}${dist}`;
279
- } else if (3 === reason) {
280
- S.combined_cosine_distance++;
281
- reason_str = `${dist}%`;
282
- } else if (2 === reason) {
283
- S.combined_pinyin_distance++;
284
- reason_str = `P\u{2264}${dist}`;
285
- } else throw new Error(`similarity wasm returned unknown reason: ${ret}`);
273
+ switch(reason){
274
+ case 0:
275
+ S.combined_identical++;
276
+ reason_str = '==';
277
+ break;
278
+ case 1:
279
+ S.combined_edit_distance++;
280
+ reason_str = `≤${dist}`;
281
+ break;
282
+ case 3:
283
+ S.combined_cosine_distance++;
284
+ reason_str = `${dist}%`;
285
+ break;
286
+ case 2:
287
+ S.combined_pinyin_distance++;
288
+ reason_str = `P≤${dist}`;
289
+ break;
290
+ default:
291
+ throw new Error(`similarity wasm returned unknown reason: ${ret}`);
292
+ }
286
293
  return {
287
294
  reason: reason_str,
288
295
  idx_diff
@@ -344,7 +351,7 @@ class Queue {
344
351
  }
345
352
  /**
346
353
  * @author: xmcp(代码主要逻辑来源)
347
- * @see: https://github.com/xmcp/pakku.js
354
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
348
355
  * @license: GPL-3.0
349
356
  * 本文件内代码来源见上,经部分修改,并整合config注释
350
357
  */ const DEFAULT_CONFIG = {
@@ -372,51 +379,51 @@ class Queue {
372
379
  PROC_TYPE4: true,
373
380
  PROC_POOL1: false
374
381
  };
375
- const ENDING_CHARS = new Set(".\u3002,\uFF0C/?\uFF1F!\uFF01\u2026~\uFF5E@^\u3001+=-_\u2642\u2640 ");
382
+ const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ');
376
383
  const WIDTH_TABLE = new Map(Object.entries({
377
- "\u3000": ' ',
378
- "\uFF11": '1',
379
- "\uFF12": '2',
380
- "\uFF13": '3',
381
- "\uFF14": '4',
382
- "\uFF15": '5',
383
- "\uFF16": '6',
384
- "\uFF17": '7',
385
- "\uFF18": '8',
386
- "\uFF19": '9',
387
- "\uFF10": '0',
388
- '!': "\uFF01",
389
- "\uFF20": '@',
390
- "\uFF03": '#',
391
- "\uFF04": '$',
392
- "\uFF05": '%',
393
- "\uFF3E": '^',
394
- "\uFF06": '&',
395
- "\uFF0A": '*',
396
- "\uFF08": '(',
397
- "\uFF09": ')',
398
- "\uFF0D": '-',
399
- "\uFF1D": '=',
400
- "\uFF3F": '_',
401
- "\uFF0B": '+',
402
- "\uFF3B": '[',
403
- "\uFF3D": ']',
404
- "\uFF5B": '{',
405
- "\uFF5D": '}',
406
- ';': "\uFF1B",
407
- "\uFF07": "'",
408
- ':': "\uFF1A",
409
- "\uFF02": '"',
410
- ',': "\uFF0C",
411
- "\uFF0E": '.',
412
- "\uFF0F": '/',
413
- "\uFF1C": '<',
414
- "\uFF1E": '>',
415
- '?': "\uFF1F",
416
- "\uFF3C": '\\',
417
- "\uFF5C": '|',
418
- "\uFF40": '`',
419
- "\uFF5E": '~',
384
+ ' ': ' ',
385
+ '1': '1',
386
+ '2': '2',
387
+ '3': '3',
388
+ '4': '4',
389
+ '5': '5',
390
+ '6': '6',
391
+ '7': '7',
392
+ '8': '8',
393
+ '9': '9',
394
+ '0': '0',
395
+ '!': '!',
396
+ '@': '@',
397
+ '#': '#',
398
+ '$': '$',
399
+ '%': '%',
400
+ '^': '^',
401
+ '&': '&',
402
+ '*': '*',
403
+ '(': '(',
404
+ ')': ')',
405
+ '-': '-',
406
+ '=': '=',
407
+ '_': '_',
408
+ '+': '+',
409
+ '[': '[',
410
+ ']': ']',
411
+ '{': '{',
412
+ '}': '}',
413
+ ';': ';',
414
+ ''': "'",
415
+ ':': ':',
416
+ '"': '"',
417
+ ',': ',',
418
+ '.': '.',
419
+ '/': '/',
420
+ '<': '<',
421
+ '>': '>',
422
+ '?': '?',
423
+ '\': '\\',
424
+ '|': '|',
425
+ '`': '`',
426
+ '~': '~',
420
427
  q: 'q',
421
428
  w: 'w',
422
429
  e: 'e',
@@ -530,19 +537,23 @@ function trim_dispstr(text) {
530
537
  }
531
538
  function select_median_length(strs) {
532
539
  if (1 === strs.length) return strs[0];
533
- const sorted = strs.sort((a, b)=>a.length - b.length);
540
+ const sorted = strs.toSorted((a, b)=>a.length - b.length);
534
541
  const mid = Math.floor(sorted.length / 2);
535
542
  return sorted[mid];
536
543
  }
537
544
  async function load_wasm(wasm_mod) {
538
- await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('./similarity-gen.wasm', import.meta.url)));
545
+ await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('similarity-gen.wasm', import.meta.url)));
546
+ }
547
+ function make_ptr_idx(idx, is_next_chunk) {
548
+ return is_next_chunk ? -1 - idx : idx;
539
549
  }
540
550
  async function merge(chunk, config = DEFAULT_CONFIG) {
541
551
  await load_wasm();
542
552
  begin_chunk(config);
543
553
  const ret = {
544
554
  clusters: [],
545
- stats: new Stats()
555
+ stats: new Stats(),
556
+ deleted_chunk: []
546
557
  };
547
558
  function apply_single_cluster(idx, obj, desc) {
548
559
  ret.clusters.push({
@@ -565,7 +576,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
565
576
  function apply_cluster(irs) {
566
577
  if (1 === irs.length) ret.clusters.push({
567
578
  peers_ptr: irs.map((ir)=>[
568
- ir.idx,
579
+ ir.ptr_idx,
569
580
  ir.sim_reason
570
581
  ]),
571
582
  desc: [],
@@ -575,7 +586,8 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
575
586
  });
576
587
  else {
577
588
  const text_cnts = new Map();
578
- let most_texts = [], most_cnt = 0;
589
+ let most_texts = [];
590
+ let most_cnt = 0;
579
591
  for (const ir of irs){
580
592
  const text = ir.str;
581
593
  const cnt = 1 + (text_cnts.get(text) || 0);
@@ -590,11 +602,11 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
590
602
  const most_text = select_median_length(most_texts);
591
603
  ret.clusters.push({
592
604
  peers_ptr: irs.map((ir)=>[
593
- ir.idx,
605
+ ir.ptr_idx,
594
606
  ir.sim_reason
595
607
  ]),
596
608
  desc: most_cnt > 1 ? [
597
- `\u{91C7}\u{7528}\u{4E86}\u{51FA}\u{73B0} ${most_cnt} \u{6B21}\u{7684}\u{6587}\u{672C}`
609
+ `采用了出现 ${most_cnt} 次的文本`
598
610
  ] : [],
599
611
  chosen_str: most_text,
600
612
  danuni_count: most_cnt,
@@ -602,26 +614,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
602
614
  });
603
615
  }
604
616
  }
605
- function obj_to_ir(objs, s) {
617
+ function obj_to_ir(objs, s, is_next_chunk) {
606
618
  return objs.map((obj, idx)=>{
607
619
  if (!config.PROC_POOL1 && 1 === obj.pool) {
608
620
  if (s) {
609
621
  s.ignored_type++;
610
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u5B57\u5E55\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
622
+ apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
611
623
  }
612
624
  return null;
613
625
  }
614
626
  if (!config.PROC_TYPE7 && 4 === obj.mode) {
615
627
  if (s) {
616
628
  s.ignored_type++;
617
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u7279\u6B8A\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
629
+ apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
618
630
  }
619
631
  return null;
620
632
  }
621
633
  if (!config.PROC_TYPE4 && 1 === obj.mode) {
622
634
  if (s) {
623
635
  s.ignored_type++;
624
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u5E95\u90E8\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
636
+ apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
625
637
  }
626
638
  return null;
627
639
  }
@@ -632,6 +644,12 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
632
644
  if (s) {
633
645
  s.deleted_blacklist++;
634
646
  s.deleted_blacklist_each[matched] = (s.deleted_blacklist_each[matched] || 0) + 1;
647
+ ret.deleted_chunk.push({
648
+ ...obj,
649
+ pakku: {
650
+ deleted_reason: `命中黑名单:${matched}`
651
+ }
652
+ });
635
653
  }
636
654
  return null;
637
655
  }
@@ -639,7 +657,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
639
657
  if (whitelisted(disp_str, config)) {
640
658
  if (s) {
641
659
  s.ignored_whitelist++;
642
- apply_single_cluster(idx, obj, "\u547D\u4E2D\u767D\u540D\u5355");
660
+ apply_single_cluster(idx, obj, '命中白名单');
643
661
  }
644
662
  return null;
645
663
  }
@@ -648,12 +666,12 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
648
666
  return {
649
667
  obj,
650
668
  str: detaolued,
651
- idx,
669
+ ptr_idx: make_ptr_idx(idx, is_next_chunk),
652
670
  sim_reason: 'ORIG'
653
671
  };
654
672
  }).filter((obj)=>null !== obj);
655
673
  }
656
- const danmus = obj_to_ir(chunk.objs, ret.stats);
674
+ const danmus = obj_to_ir(chunk.objs, ret.stats, false);
657
675
  const nearby_danmus = new Queue();
658
676
  const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000;
659
677
  for (const dm of danmus){
@@ -664,13 +682,14 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
664
682
  nearby_danmus.pop();
665
683
  }
666
684
  const sim = detect_similarity(dm.str, dm.obj.mode, nearby_danmus.index_l, ret.stats);
667
- if (null !== sim) {
685
+ if (null === sim) nearby_danmus.push([
686
+ dm
687
+ ]);
688
+ else {
668
689
  const candidate = nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff];
669
690
  dm.sim_reason = sim.reason;
670
691
  candidate.push(dm);
671
- } else nearby_danmus.push([
672
- dm
673
- ]);
692
+ }
674
693
  }
675
694
  begin_index_lock();
676
695
  for (const candidate of nearby_danmus)apply_cluster(candidate);
@@ -690,7 +709,8 @@ async function src_detaolu(that, config) {
690
709
  const selected = p.clusters.map((p)=>{
691
710
  if (1 === p.danuni_dans.length) return p.danuni_dans[0].danuni_dan;
692
711
  {
693
- const dans = p.danuni_dans, pool = new UniPool(dans.map((d)=>d.danuni_dan));
712
+ const dans = p.danuni_dans;
713
+ const pool = new UniPool(dans.map((d)=>d.danuni_dan));
694
714
  function isAllBottomMode(p) {
695
715
  return p.dans.every((d)=>d.mode === UniDMTools.Modes.Bottom);
696
716
  }
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * @author: xmcp(代码主要逻辑来源)
3
- * @see: https://github.com/xmcp/pakku.js
3
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
4
4
  * @license: GPL-3.0
5
5
  * 本文件内代码来源见上,经部分修改,并整合config注释
6
6
  */