@dan-uni/dan-any-plugin-detaolu 0.9.2 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -270,19 +270,26 @@ function detect_similarity(str, mode, index_l, S) {
270
270
  const dist = ret >>> 19 & 2047;
271
271
  const idx_diff = 524287 & ret;
272
272
  let reason_str;
273
- if (0 === reason) {
274
- S.combined_identical++;
275
- reason_str = '==';
276
- } else if (1 === reason) {
277
- S.combined_edit_distance++;
278
- reason_str = `\u{2264}${dist}`;
279
- } else if (3 === reason) {
280
- S.combined_cosine_distance++;
281
- reason_str = `${dist}%`;
282
- } else if (2 === reason) {
283
- S.combined_pinyin_distance++;
284
- reason_str = `P\u{2264}${dist}`;
285
- } else throw new Error(`similarity wasm returned unknown reason: ${ret}`);
273
+ switch(reason){
274
+ case 0:
275
+ S.combined_identical++;
276
+ reason_str = '==';
277
+ break;
278
+ case 1:
279
+ S.combined_edit_distance++;
280
+ reason_str = `≤${dist}`;
281
+ break;
282
+ case 3:
283
+ S.combined_cosine_distance++;
284
+ reason_str = `${dist}%`;
285
+ break;
286
+ case 2:
287
+ S.combined_pinyin_distance++;
288
+ reason_str = `P≤${dist}`;
289
+ break;
290
+ default:
291
+ throw new Error(`similarity wasm returned unknown reason: ${ret}`);
292
+ }
286
293
  return {
287
294
  reason: reason_str,
288
295
  idx_diff
@@ -344,7 +351,7 @@ class Queue {
344
351
  }
345
352
  /**
346
353
  * @author: xmcp(代码主要逻辑来源)
347
- * @see: https://github.com/xmcp/pakku.js
354
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
348
355
  * @license: GPL-3.0
349
356
  * 本文件内代码来源见上,经部分修改,并整合config注释
350
357
  */ const DEFAULT_CONFIG = {
@@ -372,51 +379,51 @@ class Queue {
372
379
  PROC_TYPE4: true,
373
380
  PROC_POOL1: false
374
381
  };
375
- const ENDING_CHARS = new Set(".\u3002,\uFF0C/?\uFF1F!\uFF01\u2026~\uFF5E@^\u3001+=-_\u2642\u2640 ");
382
+ const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ');
376
383
  const WIDTH_TABLE = new Map(Object.entries({
377
- "\u3000": ' ',
378
- "\uFF11": '1',
379
- "\uFF12": '2',
380
- "\uFF13": '3',
381
- "\uFF14": '4',
382
- "\uFF15": '5',
383
- "\uFF16": '6',
384
- "\uFF17": '7',
385
- "\uFF18": '8',
386
- "\uFF19": '9',
387
- "\uFF10": '0',
388
- '!': "\uFF01",
389
- "\uFF20": '@',
390
- "\uFF03": '#',
391
- "\uFF04": '$',
392
- "\uFF05": '%',
393
- "\uFF3E": '^',
394
- "\uFF06": '&',
395
- "\uFF0A": '*',
396
- "\uFF08": '(',
397
- "\uFF09": ')',
398
- "\uFF0D": '-',
399
- "\uFF1D": '=',
400
- "\uFF3F": '_',
401
- "\uFF0B": '+',
402
- "\uFF3B": '[',
403
- "\uFF3D": ']',
404
- "\uFF5B": '{',
405
- "\uFF5D": '}',
406
- ';': "\uFF1B",
407
- "\uFF07": "'",
408
- ':': "\uFF1A",
409
- "\uFF02": '"',
410
- ',': "\uFF0C",
411
- "\uFF0E": '.',
412
- "\uFF0F": '/',
413
- "\uFF1C": '<',
414
- "\uFF1E": '>',
415
- '?': "\uFF1F",
416
- "\uFF3C": '\\',
417
- "\uFF5C": '|',
418
- "\uFF40": '`',
419
- "\uFF5E": '~',
384
+ ' ': ' ',
385
+ '1': '1',
386
+ '2': '2',
387
+ '3': '3',
388
+ '4': '4',
389
+ '5': '5',
390
+ '6': '6',
391
+ '7': '7',
392
+ '8': '8',
393
+ '9': '9',
394
+ '0': '0',
395
+ '!': '!',
396
+ '@': '@',
397
+ '#': '#',
398
+ '$': '$',
399
+ '%': '%',
400
+ '^': '^',
401
+ '&': '&',
402
+ '*': '*',
403
+ '(': '(',
404
+ ')': ')',
405
+ '-': '-',
406
+ '=': '=',
407
+ '_': '_',
408
+ '+': '+',
409
+ '[': '[',
410
+ ']': ']',
411
+ '{': '{',
412
+ '}': '}',
413
+ ';': ';',
414
+ ''': "'",
415
+ ':': ':',
416
+ '"': '"',
417
+ ',': ',',
418
+ '.': '.',
419
+ '/': '/',
420
+ '<': '<',
421
+ '>': '>',
422
+ '?': '?',
423
+ '\': '\\',
424
+ '|': '|',
425
+ '`': '`',
426
+ '~': '~',
420
427
  q: 'q',
421
428
  w: 'w',
422
429
  e: 'e',
@@ -530,12 +537,15 @@ function trim_dispstr(text) {
530
537
  }
531
538
  function select_median_length(strs) {
532
539
  if (1 === strs.length) return strs[0];
533
- const sorted = strs.sort((a, b)=>a.length - b.length);
540
+ const sorted = strs.toSorted((a, b)=>a.length - b.length);
534
541
  const mid = Math.floor(sorted.length / 2);
535
542
  return sorted[mid];
536
543
  }
537
544
  async function load_wasm(wasm_mod) {
538
- await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('./similarity-gen.wasm', import.meta.url)));
545
+ await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('similarity-gen.wasm', import.meta.url)));
546
+ }
547
+ function make_ptr_idx(idx, is_next_chunk) {
548
+ return is_next_chunk ? -1 - idx : idx;
539
549
  }
540
550
  async function merge(chunk, config = DEFAULT_CONFIG) {
541
551
  await load_wasm();
@@ -566,7 +576,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
566
576
  function apply_cluster(irs) {
567
577
  if (1 === irs.length) ret.clusters.push({
568
578
  peers_ptr: irs.map((ir)=>[
569
- ir.idx,
579
+ ir.ptr_idx,
570
580
  ir.sim_reason
571
581
  ]),
572
582
  desc: [],
@@ -576,7 +586,8 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
576
586
  });
577
587
  else {
578
588
  const text_cnts = new Map();
579
- let most_texts = [], most_cnt = 0;
589
+ let most_texts = [];
590
+ let most_cnt = 0;
580
591
  for (const ir of irs){
581
592
  const text = ir.str;
582
593
  const cnt = 1 + (text_cnts.get(text) || 0);
@@ -591,11 +602,11 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
591
602
  const most_text = select_median_length(most_texts);
592
603
  ret.clusters.push({
593
604
  peers_ptr: irs.map((ir)=>[
594
- ir.idx,
605
+ ir.ptr_idx,
595
606
  ir.sim_reason
596
607
  ]),
597
608
  desc: most_cnt > 1 ? [
598
- `\u{91C7}\u{7528}\u{4E86}\u{51FA}\u{73B0} ${most_cnt} \u{6B21}\u{7684}\u{6587}\u{672C}`
609
+ `采用了出现 ${most_cnt} 次的文本`
599
610
  ] : [],
600
611
  chosen_str: most_text,
601
612
  danuni_count: most_cnt,
@@ -603,26 +614,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
603
614
  });
604
615
  }
605
616
  }
606
- function obj_to_ir(objs, s) {
617
+ function obj_to_ir(objs, s, is_next_chunk) {
607
618
  return objs.map((obj, idx)=>{
608
619
  if (!config.PROC_POOL1 && 1 === obj.pool) {
609
620
  if (s) {
610
621
  s.ignored_type++;
611
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u5B57\u5E55\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
622
+ apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
612
623
  }
613
624
  return null;
614
625
  }
615
626
  if (!config.PROC_TYPE7 && 4 === obj.mode) {
616
627
  if (s) {
617
628
  s.ignored_type++;
618
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u7279\u6B8A\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
629
+ apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
619
630
  }
620
631
  return null;
621
632
  }
622
633
  if (!config.PROC_TYPE4 && 1 === obj.mode) {
623
634
  if (s) {
624
635
  s.ignored_type++;
625
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u5E95\u90E8\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
636
+ apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
626
637
  }
627
638
  return null;
628
639
  }
@@ -636,7 +647,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
636
647
  ret.deleted_chunk.push({
637
648
  ...obj,
638
649
  pakku: {
639
- deleted_reason: `\u{547D}\u{4E2D}\u{9ED1}\u{540D}\u{5355}\u{FF1A}${matched}`
650
+ deleted_reason: `命中黑名单:${matched}`
640
651
  }
641
652
  });
642
653
  }
@@ -646,7 +657,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
646
657
  if (whitelisted(disp_str, config)) {
647
658
  if (s) {
648
659
  s.ignored_whitelist++;
649
- apply_single_cluster(idx, obj, "\u547D\u4E2D\u767D\u540D\u5355");
660
+ apply_single_cluster(idx, obj, '命中白名单');
650
661
  }
651
662
  return null;
652
663
  }
@@ -655,12 +666,12 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
655
666
  return {
656
667
  obj,
657
668
  str: detaolued,
658
- idx,
669
+ ptr_idx: make_ptr_idx(idx, is_next_chunk),
659
670
  sim_reason: 'ORIG'
660
671
  };
661
672
  }).filter((obj)=>null !== obj);
662
673
  }
663
- const danmus = obj_to_ir(chunk.objs, ret.stats);
674
+ const danmus = obj_to_ir(chunk.objs, ret.stats, false);
664
675
  const nearby_danmus = new Queue();
665
676
  const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000;
666
677
  for (const dm of danmus){
@@ -671,13 +682,14 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
671
682
  nearby_danmus.pop();
672
683
  }
673
684
  const sim = detect_similarity(dm.str, dm.obj.mode, nearby_danmus.index_l, ret.stats);
674
- if (null !== sim) {
685
+ if (null === sim) nearby_danmus.push([
686
+ dm
687
+ ]);
688
+ else {
675
689
  const candidate = nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff];
676
690
  dm.sim_reason = sim.reason;
677
691
  candidate.push(dm);
678
- } else nearby_danmus.push([
679
- dm
680
- ]);
692
+ }
681
693
  }
682
694
  begin_index_lock();
683
695
  for (const candidate of nearby_danmus)apply_cluster(candidate);
@@ -697,7 +709,8 @@ async function src_detaolu(that, config) {
697
709
  const selected = p.clusters.map((p)=>{
698
710
  if (1 === p.danuni_dans.length) return p.danuni_dans[0].danuni_dan;
699
711
  {
700
- const dans = p.danuni_dans, pool = new UniPool(dans.map((d)=>d.danuni_dan));
712
+ const dans = p.danuni_dans;
713
+ const pool = new UniPool(dans.map((d)=>d.danuni_dan));
701
714
  function isAllBottomMode(p) {
702
715
  return p.dans.every((d)=>d.mode === UniDMTools.Modes.Bottom);
703
716
  }
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * @author: xmcp(代码主要逻辑来源)
3
- * @see: https://github.com/xmcp/pakku.js
3
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
4
4
  * @license: GPL-3.0
5
5
  * 本文件内代码来源见上,经部分修改,并整合config注释
6
6
  */