@dan-uni/dan-any-plugin-detaolu 0.9.2 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -50,7 +50,7 @@ var Module = function(moduleArg = {}) {
50
50
  return 'FS_createPath' === a || 'FS_createDataFile' === a || 'FS_createPreloadedFile' === a || 'FS_unlink' === a || 'addRunDependency' === a || 'FS_createLazyFile' === a || 'FS_createDevice' === a || 'removeRunDependency' === a;
51
51
  }
52
52
  function L(a, b) {
53
- 'undefined' == typeof globalThis || Object.getOwnPropertyDescriptor(globalThis, a) || Object.defineProperty(globalThis, a, {
53
+ "u" < typeof globalThis || Object.getOwnPropertyDescriptor(globalThis, a) || Object.defineProperty(globalThis, a, {
54
54
  configurable: !0,
55
55
  get () {
56
56
  b();
@@ -77,7 +77,7 @@ var Module = function(moduleArg = {}) {
77
77
  var N = (a)=>{
78
78
  N.g || (N.g = {});
79
79
  N.g[a] || (N.g[a] = 1, t(a));
80
- }, P = 'undefined' != typeof TextDecoder ? new TextDecoder() : void 0, R = [
80
+ }, P = "u" > typeof TextDecoder ? new TextDecoder() : void 0, R = [
81
81
  null,
82
82
  [],
83
83
  []
@@ -270,19 +270,26 @@ function detect_similarity(str, mode, index_l, S) {
270
270
  const dist = ret >>> 19 & 2047;
271
271
  const idx_diff = 524287 & ret;
272
272
  let reason_str;
273
- if (0 === reason) {
274
- S.combined_identical++;
275
- reason_str = '==';
276
- } else if (1 === reason) {
277
- S.combined_edit_distance++;
278
- reason_str = `\u{2264}${dist}`;
279
- } else if (3 === reason) {
280
- S.combined_cosine_distance++;
281
- reason_str = `${dist}%`;
282
- } else if (2 === reason) {
283
- S.combined_pinyin_distance++;
284
- reason_str = `P\u{2264}${dist}`;
285
- } else throw new Error(`similarity wasm returned unknown reason: ${ret}`);
273
+ switch(reason){
274
+ case 0:
275
+ S.combined_identical++;
276
+ reason_str = '==';
277
+ break;
278
+ case 1:
279
+ S.combined_edit_distance++;
280
+ reason_str = `≤${dist}`;
281
+ break;
282
+ case 3:
283
+ S.combined_cosine_distance++;
284
+ reason_str = `${dist}%`;
285
+ break;
286
+ case 2:
287
+ S.combined_pinyin_distance++;
288
+ reason_str = `P≤${dist}`;
289
+ break;
290
+ default:
291
+ throw new Error(`similarity wasm returned unknown reason: ${ret}`);
292
+ }
286
293
  return {
287
294
  reason: reason_str,
288
295
  idx_diff
@@ -344,7 +351,10 @@ class Queue {
344
351
  }
345
352
  /**
346
353
  * @author: xmcp(代码主要逻辑来源)
347
- * @see: https://github.com/xmcp/pakku.js
354
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
355
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
356
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
357
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
348
358
  * @license: GPL-3.0
349
359
  * 本文件内代码来源见上,经部分修改,并整合config注释
350
360
  */ const DEFAULT_CONFIG = {
@@ -365,6 +375,8 @@ class Queue {
365
375
  '66666'
366
376
  ]
367
377
  ],
378
+ FORCELIST_CONTINUE_ON_MATCH: true,
379
+ FORCELIST_APPLY_SINGULAR: false,
368
380
  WHITELIST: [],
369
381
  BLACKLIST: [],
370
382
  CROSS_MODE: true,
@@ -372,51 +384,51 @@ class Queue {
372
384
  PROC_TYPE4: true,
373
385
  PROC_POOL1: false
374
386
  };
375
- const ENDING_CHARS = new Set(".\u3002,\uFF0C/?\uFF1F!\uFF01\u2026~\uFF5E@^\u3001+=-_\u2642\u2640 ");
387
+ const ENDING_CHARS = new Set('.。,,/??!!…~~@^、+=-_♂♀ ');
376
388
  const WIDTH_TABLE = new Map(Object.entries({
377
- "\u3000": ' ',
378
- "\uFF11": '1',
379
- "\uFF12": '2',
380
- "\uFF13": '3',
381
- "\uFF14": '4',
382
- "\uFF15": '5',
383
- "\uFF16": '6',
384
- "\uFF17": '7',
385
- "\uFF18": '8',
386
- "\uFF19": '9',
387
- "\uFF10": '0',
388
- '!': "\uFF01",
389
- "\uFF20": '@',
390
- "\uFF03": '#',
391
- "\uFF04": '$',
392
- "\uFF05": '%',
393
- "\uFF3E": '^',
394
- "\uFF06": '&',
395
- "\uFF0A": '*',
396
- "\uFF08": '(',
397
- "\uFF09": ')',
398
- "\uFF0D": '-',
399
- "\uFF1D": '=',
400
- "\uFF3F": '_',
401
- "\uFF0B": '+',
402
- "\uFF3B": '[',
403
- "\uFF3D": ']',
404
- "\uFF5B": '{',
405
- "\uFF5D": '}',
406
- ';': "\uFF1B",
407
- "\uFF07": "'",
408
- ':': "\uFF1A",
409
- "\uFF02": '"',
410
- ',': "\uFF0C",
411
- "\uFF0E": '.',
412
- "\uFF0F": '/',
413
- "\uFF1C": '<',
414
- "\uFF1E": '>',
415
- '?': "\uFF1F",
416
- "\uFF3C": '\\',
417
- "\uFF5C": '|',
418
- "\uFF40": '`',
419
- "\uFF5E": '~',
389
+ ' ': ' ',
390
+ '1': '1',
391
+ '2': '2',
392
+ '3': '3',
393
+ '4': '4',
394
+ '5': '5',
395
+ '6': '6',
396
+ '7': '7',
397
+ '8': '8',
398
+ '9': '9',
399
+ '0': '0',
400
+ '!': '!',
401
+ '@': '@',
402
+ '#': '#',
403
+ '$': '$',
404
+ '%': '%',
405
+ '^': '^',
406
+ '&': '&',
407
+ '*': '*',
408
+ '(': '(',
409
+ ')': ')',
410
+ '-': '-',
411
+ '=': '=',
412
+ '_': '_',
413
+ '+': '+',
414
+ '[': '[',
415
+ ']': ']',
416
+ '{': '{',
417
+ '}': '}',
418
+ ';': ';',
419
+ ''': "'",
420
+ ':': ':',
421
+ '"': '"',
422
+ ',': ',',
423
+ '.': '.',
424
+ '/': '/',
425
+ '<': '<',
426
+ '>': '>',
427
+ '?': '?',
428
+ '\': '\\',
429
+ '|': '|',
430
+ '`': '`',
431
+ '~': '~',
420
432
  q: 'q',
421
433
  w: 'w',
422
434
  e: 'e',
@@ -475,9 +487,10 @@ const detaolu = (inp, config)=>{
475
487
  const TRIM_SPACE = config.TRIM_SPACE;
476
488
  const TRIM_WIDTH = config.TRIM_WIDTH;
477
489
  const FORCELIST = (config?.FORCELIST ?? DEFAULT_CONFIG.FORCELIST).map(([pattern, repl])=>[
478
- new RegExp(pattern, 'gi'),
490
+ new RegExp(pattern, 'giu'),
479
491
  repl
480
492
  ]);
493
+ const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH;
481
494
  let len = inp.length;
482
495
  let text = '';
483
496
  if (TRIM_ENDING) {
@@ -490,20 +503,19 @@ const detaolu = (inp, config)=>{
490
503
  }
491
504
  else text = inp.slice(0, len);
492
505
  if (TRIM_SPACE) text = text.replaceAll(/[ \u3000]+/g, ' ').replaceAll(/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g, '$1');
506
+ let taolu_matched = false;
493
507
  for (const taolu of FORCELIST)if (taolu[0].test(text)) {
494
508
  text = text.replace(taolu[0], taolu[1]);
495
- return [
496
- true,
497
- text
498
- ];
509
+ taolu_matched = true;
510
+ if (FORCELIST_BREAK_ON_MATCH) break;
499
511
  }
500
512
  return [
501
- false,
513
+ taolu_matched,
502
514
  text
503
515
  ];
504
516
  };
505
517
  const whitelisted = (text, config)=>{
506
- const WHITELIST = (config?.WHITELIST ?? DEFAULT_CONFIG.WHITELIST).map((x)=>new RegExp(x[0], 'i'));
518
+ const WHITELIST = (config?.WHITELIST ?? DEFAULT_CONFIG.WHITELIST).map((x)=>new RegExp(x[0], 'iu'));
507
519
  if (0 === WHITELIST.length) return false;
508
520
  return WHITELIST.some((re)=>re.test(text));
509
521
  };
@@ -530,12 +542,15 @@ function trim_dispstr(text) {
530
542
  }
531
543
  function select_median_length(strs) {
532
544
  if (1 === strs.length) return strs[0];
533
- const sorted = strs.sort((a, b)=>a.length - b.length);
545
+ const sorted = strs.toSorted((a, b)=>a.length - b.length);
534
546
  const mid = Math.floor(sorted.length / 2);
535
547
  return sorted[mid];
536
548
  }
537
549
  async function load_wasm(wasm_mod) {
538
- await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('./similarity-gen.wasm', import.meta.url)));
550
+ await similarity_stub_init(wasm_mod ?? await fs_extra.readFile(new URL('similarity-gen.wasm', import.meta.url)));
551
+ }
552
+ function make_ptr_idx(idx, is_next_chunk) {
553
+ return is_next_chunk ? -1 - idx : idx;
539
554
  }
540
555
  async function merge(chunk, config = DEFAULT_CONFIG) {
541
556
  await load_wasm();
@@ -566,7 +581,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
566
581
  function apply_cluster(irs) {
567
582
  if (1 === irs.length) ret.clusters.push({
568
583
  peers_ptr: irs.map((ir)=>[
569
- ir.idx,
584
+ ir.ptr_idx,
570
585
  ir.sim_reason
571
586
  ]),
572
587
  desc: [],
@@ -576,7 +591,8 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
576
591
  });
577
592
  else {
578
593
  const text_cnts = new Map();
579
- let most_texts = [], most_cnt = 0;
594
+ let most_texts = [];
595
+ let most_cnt = 0;
580
596
  for (const ir of irs){
581
597
  const text = ir.str;
582
598
  const cnt = 1 + (text_cnts.get(text) || 0);
@@ -591,11 +607,11 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
591
607
  const most_text = select_median_length(most_texts);
592
608
  ret.clusters.push({
593
609
  peers_ptr: irs.map((ir)=>[
594
- ir.idx,
610
+ ir.ptr_idx,
595
611
  ir.sim_reason
596
612
  ]),
597
613
  desc: most_cnt > 1 ? [
598
- `\u{91C7}\u{7528}\u{4E86}\u{51FA}\u{73B0} ${most_cnt} \u{6B21}\u{7684}\u{6587}\u{672C}`
614
+ `采用了出现 ${most_cnt} 次的文本`
599
615
  ] : [],
600
616
  chosen_str: most_text,
601
617
  danuni_count: most_cnt,
@@ -603,26 +619,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
603
619
  });
604
620
  }
605
621
  }
606
- function obj_to_ir(objs, s) {
622
+ function obj_to_ir(objs, s, is_next_chunk) {
607
623
  return objs.map((obj, idx)=>{
608
624
  if (!config.PROC_POOL1 && 1 === obj.pool) {
609
625
  if (s) {
610
626
  s.ignored_type++;
611
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u5B57\u5E55\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
627
+ apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
612
628
  }
613
629
  return null;
614
630
  }
615
631
  if (!config.PROC_TYPE7 && 4 === obj.mode) {
616
632
  if (s) {
617
633
  s.ignored_type++;
618
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u7279\u6B8A\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
634
+ apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
619
635
  }
620
636
  return null;
621
637
  }
622
638
  if (!config.PROC_TYPE4 && 1 === obj.mode) {
623
639
  if (s) {
624
640
  s.ignored_type++;
625
- apply_single_cluster(idx, obj, "\u5DF2\u5FFD\u7565\u5E95\u90E8\u5F39\u5E55\uFF0C\u53EF\u4EE5\u5728\u9009\u9879\u4E2D\u4FEE\u6539");
641
+ apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
626
642
  }
627
643
  return null;
628
644
  }
@@ -636,7 +652,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
636
652
  ret.deleted_chunk.push({
637
653
  ...obj,
638
654
  pakku: {
639
- deleted_reason: `\u{547D}\u{4E2D}\u{9ED1}\u{540D}\u{5355}\u{FF1A}${matched}`
655
+ deleted_reason: `命中黑名单:${matched}`
640
656
  }
641
657
  });
642
658
  }
@@ -646,21 +662,27 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
646
662
  if (whitelisted(disp_str, config)) {
647
663
  if (s) {
648
664
  s.ignored_whitelist++;
649
- apply_single_cluster(idx, obj, "\u547D\u4E2D\u767D\u540D\u5355");
665
+ apply_single_cluster(idx, obj, '命中白名单');
650
666
  }
651
667
  return null;
652
668
  }
653
669
  const [matched_taolu, detaolued] = detaolu(disp_str, config);
654
- if (matched_taolu && s) s.num_taolu_matched++;
670
+ if (matched_taolu) {
671
+ if (s) s.num_taolu_matched++;
672
+ if (config.FORCELIST_APPLY_SINGULAR) obj = {
673
+ ...obj,
674
+ content: detaolued
675
+ };
676
+ }
655
677
  return {
656
678
  obj,
657
679
  str: detaolued,
658
- idx,
680
+ ptr_idx: make_ptr_idx(idx, is_next_chunk),
659
681
  sim_reason: 'ORIG'
660
682
  };
661
683
  }).filter((obj)=>null !== obj);
662
684
  }
663
- const danmus = obj_to_ir(chunk.objs, ret.stats);
685
+ const danmus = obj_to_ir(chunk.objs, ret.stats, false);
664
686
  const nearby_danmus = new Queue();
665
687
  const THRESHOLD_MS = (config?.THRESHOLD ?? DEFAULT_CONFIG.THRESHOLD) * 1000;
666
688
  for (const dm of danmus){
@@ -671,13 +693,14 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
671
693
  nearby_danmus.pop();
672
694
  }
673
695
  const sim = detect_similarity(dm.str, dm.obj.mode, nearby_danmus.index_l, ret.stats);
674
- if (null !== sim) {
696
+ if (null === sim) nearby_danmus.push([
697
+ dm
698
+ ]);
699
+ else {
675
700
  const candidate = nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff];
676
701
  dm.sim_reason = sim.reason;
677
702
  candidate.push(dm);
678
- } else nearby_danmus.push([
679
- dm
680
- ]);
703
+ }
681
704
  }
682
705
  begin_index_lock();
683
706
  for (const candidate of nearby_danmus)apply_cluster(candidate);
@@ -697,7 +720,8 @@ async function src_detaolu(that, config) {
697
720
  const selected = p.clusters.map((p)=>{
698
721
  if (1 === p.danuni_dans.length) return p.danuni_dans[0].danuni_dan;
699
722
  {
700
- const dans = p.danuni_dans, pool = new UniPool(dans.map((d)=>d.danuni_dan));
723
+ const dans = p.danuni_dans;
724
+ const pool = new UniPool(dans.map((d)=>d.danuni_dan));
701
725
  function isAllBottomMode(p) {
702
726
  return p.dans.every((d)=>d.mode === UniDMTools.Modes.Bottom);
703
727
  }
@@ -736,4 +760,4 @@ function detaolu_constructor(config) {
736
760
  return (that)=>src_detaolu(that, config);
737
761
  }
738
762
  const src = detaolu_constructor;
739
- export { src as default };
763
+ export default src;
@@ -1,6 +1,9 @@
1
1
  /**
2
2
  * @author: xmcp(代码主要逻辑来源)
3
- * @see: https://github.com/xmcp/pakku.js
3
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
4
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/background/config.ts
5
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.html
6
+ * @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/page/options.ts
4
7
  * @license: GPL-3.0
5
8
  * 本文件内代码来源见上,经部分修改,并整合config注释
6
9
  */