@dan-uni/dan-any-plugin-detaolu 1.3.9 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +61 -46
- package/dist/index.umd.min.js +70 -50
- package/dist/pakku.js/similarity_stub.d.ts +1 -1
- package/dist/static/wasm/{54a7637a81.module.wasm → 21072e5de5.module.wasm} +0 -0
- package/package.json +1 -1
- package/src/pakku.js/index.ts +88 -62
- package/src/pakku.js/similarity-gen.js +21 -2
- package/src/pakku.js/similarity-gen.wasm +0 -0
- package/src/pakku.js/similarity_stub.ts +1 -1
package/dist/index.js
CHANGED
|
@@ -2442,55 +2442,57 @@ const WIDTH_TABLE = new Map(Object.entries({
|
|
|
2442
2442
|
N: 'N',
|
|
2443
2443
|
M: 'M'
|
|
2444
2444
|
}));
|
|
2445
|
-
|
|
2445
|
+
function detaolu_meta(config) {
|
|
2446
2446
|
const TRIM_ENDING = config.TRIM_ENDING;
|
|
2447
2447
|
const TRIM_SPACE = config.TRIM_SPACE;
|
|
2448
2448
|
const TRIM_WIDTH = config.TRIM_WIDTH;
|
|
2449
|
-
const FORCELIST =
|
|
2449
|
+
const FORCELIST = config.FORCELIST.map(([pattern, repl])=>[
|
|
2450
2450
|
new RegExp(pattern, 'giu'),
|
|
2451
2451
|
repl
|
|
2452
2452
|
]);
|
|
2453
2453
|
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH;
|
|
2454
|
-
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2458
|
-
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
|
|
2463
|
-
|
|
2464
|
-
|
|
2465
|
-
|
|
2466
|
-
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
|
|
2473
|
-
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2480
|
-
|
|
2481
|
-
|
|
2482
|
-
|
|
2483
|
-
|
|
2484
|
-
|
|
2485
|
-
|
|
2454
|
+
return (inp)=>{
|
|
2455
|
+
let len = inp.length;
|
|
2456
|
+
let text = '';
|
|
2457
|
+
if (TRIM_ENDING) {
|
|
2458
|
+
while(ENDING_CHARS.has(inp.charAt(len - 1)))len--;
|
|
2459
|
+
if (0 === len) len = inp.length;
|
|
2460
|
+
}
|
|
2461
|
+
if (TRIM_WIDTH) for(let i = 0; i < len; i++){
|
|
2462
|
+
const c = inp.charAt(i);
|
|
2463
|
+
text += WIDTH_TABLE.get(c) || c;
|
|
2464
|
+
}
|
|
2465
|
+
else text = inp.slice(0, len);
|
|
2466
|
+
if (TRIM_SPACE) text = text.replaceAll(/[ \u3000]+/g, ' ').replaceAll(/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g, '$1');
|
|
2467
|
+
let taolu_matched = false;
|
|
2468
|
+
for (const taolu of FORCELIST)if (taolu[0].test(text)) {
|
|
2469
|
+
text = text.replace(taolu[0], taolu[1]);
|
|
2470
|
+
taolu_matched = true;
|
|
2471
|
+
if (FORCELIST_BREAK_ON_MATCH) break;
|
|
2472
|
+
}
|
|
2473
|
+
return [
|
|
2474
|
+
taolu_matched,
|
|
2475
|
+
text
|
|
2476
|
+
];
|
|
2477
|
+
};
|
|
2478
|
+
}
|
|
2479
|
+
function whitelisted_meta(config) {
|
|
2480
|
+
const WHITELIST = config.WHITELIST.map((x)=>new RegExp(x[0], 'iu'));
|
|
2481
|
+
if (0 === WHITELIST.length) return ()=>false;
|
|
2482
|
+
return (text)=>WHITELIST.some((re)=>re.test(text));
|
|
2483
|
+
}
|
|
2484
|
+
function blacklisted_meta(config) {
|
|
2485
|
+
const BLACKLIST = config.BLACKLIST.map((x)=>x[0] ? new RegExp(x[1]) : x[1].toLowerCase());
|
|
2486
|
+
if (0 === BLACKLIST.length) return ()=>null;
|
|
2487
|
+
return (text)=>{
|
|
2486
2488
|
const lower = text.toLowerCase();
|
|
2487
2489
|
for (const pattern of BLACKLIST){
|
|
2488
2490
|
const matched = 'string' == typeof pattern ? lower.includes(pattern) : pattern.test(text);
|
|
2489
2491
|
if (matched) return 'string' == typeof pattern ? ` ${pattern}` : ` /${pattern.source}/`;
|
|
2490
2492
|
}
|
|
2491
2493
|
return null;
|
|
2492
|
-
}
|
|
2493
|
-
}
|
|
2494
|
+
};
|
|
2495
|
+
}
|
|
2494
2496
|
function extract_special_danmu(text) {
|
|
2495
2497
|
try {
|
|
2496
2498
|
text = JSON.parse(text)[4];
|
|
@@ -2506,15 +2508,25 @@ function select_median_length(strs) {
|
|
|
2506
2508
|
const mid = Math.floor(sorted.length / 2);
|
|
2507
2509
|
return sorted[mid];
|
|
2508
2510
|
}
|
|
2511
|
+
function u8array_to_arraybuffer(array) {
|
|
2512
|
+
return array.buffer.slice(array.byteOffset, array.byteOffset + array.byteLength);
|
|
2513
|
+
}
|
|
2509
2514
|
async function load_wasm(wasm_mod) {
|
|
2510
|
-
|
|
2515
|
+
if (wasm_mod) return void await similarity_stub_init(wasm_mod);
|
|
2516
|
+
const wasm_path = new URL('similarity-gen.wasm', import.meta.url).pathname;
|
|
2517
|
+
const wasm_u8 = await lib_default().readFile(wasm_path);
|
|
2518
|
+
await similarity_stub_init(u8array_to_arraybuffer(wasm_u8));
|
|
2511
2519
|
}
|
|
2512
2520
|
function make_ptr_idx(idx, is_next_chunk) {
|
|
2513
2521
|
return is_next_chunk ? -1 - idx : idx;
|
|
2514
2522
|
}
|
|
2515
2523
|
async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
2524
|
+
const local_config = {
|
|
2525
|
+
...DEFAULT_CONFIG,
|
|
2526
|
+
...config
|
|
2527
|
+
};
|
|
2516
2528
|
await load_wasm();
|
|
2517
|
-
begin_chunk(
|
|
2529
|
+
begin_chunk(local_config);
|
|
2518
2530
|
const ret = {
|
|
2519
2531
|
clusters: [],
|
|
2520
2532
|
stats: new Stats(),
|
|
@@ -2579,23 +2591,26 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
2579
2591
|
});
|
|
2580
2592
|
}
|
|
2581
2593
|
}
|
|
2594
|
+
const detaolu = detaolu_meta(local_config);
|
|
2595
|
+
const whitelisted = whitelisted_meta(local_config);
|
|
2596
|
+
const blacklisted = blacklisted_meta(local_config);
|
|
2582
2597
|
function obj_to_ir(objs, s, is_next_chunk) {
|
|
2583
2598
|
return objs.map((obj, idx)=>{
|
|
2584
|
-
if (!
|
|
2599
|
+
if (!local_config.PROC_POOL1 && 1 === obj.pool) {
|
|
2585
2600
|
if (s) {
|
|
2586
2601
|
s.ignored_type++;
|
|
2587
2602
|
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
|
|
2588
2603
|
}
|
|
2589
2604
|
return null;
|
|
2590
2605
|
}
|
|
2591
|
-
if (!
|
|
2606
|
+
if (!local_config.PROC_TYPE7 && 4 === obj.mode) {
|
|
2592
2607
|
if (s) {
|
|
2593
2608
|
s.ignored_type++;
|
|
2594
2609
|
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
|
|
2595
2610
|
}
|
|
2596
2611
|
return null;
|
|
2597
2612
|
}
|
|
2598
|
-
if (!
|
|
2613
|
+
if (!local_config.PROC_TYPE4 && 1 === obj.mode) {
|
|
2599
2614
|
if (s) {
|
|
2600
2615
|
s.ignored_type++;
|
|
2601
2616
|
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
|
|
@@ -2604,7 +2619,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
2604
2619
|
}
|
|
2605
2620
|
const disp_str = trim_dispstr(4 === obj.mode && '[' === obj.content[0] ? extract_special_danmu(obj.content) : obj.content);
|
|
2606
2621
|
if (4 !== obj.mode) {
|
|
2607
|
-
const matched = blacklisted(disp_str
|
|
2622
|
+
const matched = blacklisted(disp_str);
|
|
2608
2623
|
if (matched) {
|
|
2609
2624
|
if (s) {
|
|
2610
2625
|
s.deleted_blacklist++;
|
|
@@ -2619,17 +2634,17 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
2619
2634
|
return null;
|
|
2620
2635
|
}
|
|
2621
2636
|
}
|
|
2622
|
-
if (whitelisted(disp_str
|
|
2637
|
+
if (whitelisted(disp_str)) {
|
|
2623
2638
|
if (s) {
|
|
2624
2639
|
s.ignored_whitelist++;
|
|
2625
2640
|
apply_single_cluster(idx, obj, '命中白名单');
|
|
2626
2641
|
}
|
|
2627
2642
|
return null;
|
|
2628
2643
|
}
|
|
2629
|
-
const [matched_taolu, detaolued] = detaolu(disp_str
|
|
2644
|
+
const [matched_taolu, detaolued] = detaolu(disp_str);
|
|
2630
2645
|
if (matched_taolu) {
|
|
2631
2646
|
if (s) s.num_taolu_matched++;
|
|
2632
|
-
if (
|
|
2647
|
+
if (local_config.FORCELIST_APPLY_SINGULAR) obj = {
|
|
2633
2648
|
...obj,
|
|
2634
2649
|
content: detaolued
|
|
2635
2650
|
};
|
|
@@ -2644,7 +2659,7 @@ async function merge(chunk, config = DEFAULT_CONFIG) {
|
|
|
2644
2659
|
}
|
|
2645
2660
|
const danmus = obj_to_ir(chunk.objs, ret.stats, false);
|
|
2646
2661
|
const nearby_danmus = new Queue();
|
|
2647
|
-
const THRESHOLD_MS =
|
|
2662
|
+
const THRESHOLD_MS = 1000 * local_config.THRESHOLD;
|
|
2648
2663
|
for (const dm of danmus){
|
|
2649
2664
|
while(true){
|
|
2650
2665
|
const peeked = nearby_danmus.peek();
|
package/dist/index.umd.min.js
CHANGED
|
@@ -12008,7 +12008,7 @@
|
|
|
12008
12008
|
},
|
|
12009
12009
|
"./src/pakku.js/similarity-gen.wasm" (module1, __unused_rspack_exports, __webpack_require__) {
|
|
12010
12010
|
"use strict";
|
|
12011
|
-
module1.exports = __webpack_require__.p + "static/wasm/
|
|
12011
|
+
module1.exports = __webpack_require__.p + "static/wasm/21072e5de5.module.wasm";
|
|
12012
12012
|
},
|
|
12013
12013
|
"?efad" () {},
|
|
12014
12014
|
"?956c" () {},
|
|
@@ -38576,7 +38576,7 @@ and limitations under the License.
|
|
|
38576
38576
|
PlatformVideoSource: ()=>platform_PlatformVideoSource,
|
|
38577
38577
|
PlatformVideoSources: ()=>PlatformVideoSources
|
|
38578
38578
|
});
|
|
38579
|
-
var package_namespaceObject = JSON.parse('{"UU":"@dan-uni/dan-any","rE":"1.
|
|
38579
|
+
var package_namespaceObject = JSON.parse('{"UU":"@dan-uni/dan-any","rE":"1.4.0","TB":"https://github.com/ani-uni/danuni/tree/master/packages/dan-any#readme"}');
|
|
38580
38580
|
const color_pad = (s)=>s.length < 2 ? `0${s}` : s;
|
|
38581
38581
|
const decimalToHex = (n)=>color_pad(n.toString(16));
|
|
38582
38582
|
const isDarkColor = ({ r, g, b })=>0.299 * r + 0.587 * g + 0.114 * b < 0x30;
|
|
@@ -39132,6 +39132,9 @@ and limitations under the License.
|
|
|
39132
39132
|
platform: platform_PlatformVideoSource.Bilibili,
|
|
39133
39133
|
extra: {
|
|
39134
39134
|
bili: {
|
|
39135
|
+
dmid: args.id,
|
|
39136
|
+
attr: args.attr,
|
|
39137
|
+
mid: args.mid,
|
|
39135
39138
|
command: args
|
|
39136
39139
|
}
|
|
39137
39140
|
}
|
|
@@ -39865,7 +39868,7 @@ and limitations under the License.
|
|
|
39865
39868
|
function main(that) {
|
|
39866
39869
|
that.dans.forEach((d)=>{
|
|
39867
39870
|
if (d.platform !== platform_PlatformVideoSource.Bilibili) throw new Error('bili-dedupe: 仅支持B站(主站)的弹幕');
|
|
39868
|
-
if (!d.extra.bili?.dmid) throw new Error('bili-dedupe: 弹幕缺少bili extra dmid字段');
|
|
39871
|
+
if (!d.extra.bili?.dmid && !d.extra.bili?.command?.id) throw new Error('bili-dedupe: 弹幕缺少bili extra dmid字段');
|
|
39869
39872
|
});
|
|
39870
39873
|
const map = new Map();
|
|
39871
39874
|
that.dans.forEach((d)=>map.set(d.extra.bili.dmid, d));
|
|
@@ -40416,6 +40419,8 @@ and limitations under the License.
|
|
|
40416
40419
|
const ok = this.dans.every((d)=>d.senderID.endsWith(`@${platform_PlatformVideoSource.Bilibili}`));
|
|
40417
40420
|
if (!ok) throw new Error('存在其他来源的senderID,请关闭该功能再试!');
|
|
40418
40421
|
}
|
|
40422
|
+
let ds = this.dans.map((dan)=>dan.toBiliXML(options));
|
|
40423
|
+
if (options?.skipBiliCommand) ds = ds.filter((d)=>null !== d);
|
|
40419
40424
|
const builder = new json2xml({
|
|
40420
40425
|
ignoreAttributes: false
|
|
40421
40426
|
});
|
|
@@ -40436,7 +40441,7 @@ and limitations under the License.
|
|
|
40436
40441
|
...DanUniConvertTipTemplate,
|
|
40437
40442
|
data: this.getShared('SOID')
|
|
40438
40443
|
},
|
|
40439
|
-
d:
|
|
40444
|
+
d: ds
|
|
40440
40445
|
}
|
|
40441
40446
|
});
|
|
40442
40447
|
}
|
|
@@ -41089,55 +41094,57 @@ and limitations under the License.
|
|
|
41089
41094
|
N: 'N',
|
|
41090
41095
|
M: 'M'
|
|
41091
41096
|
}));
|
|
41092
|
-
|
|
41097
|
+
function detaolu_meta(config) {
|
|
41093
41098
|
const TRIM_ENDING = config.TRIM_ENDING;
|
|
41094
41099
|
const TRIM_SPACE = config.TRIM_SPACE;
|
|
41095
41100
|
const TRIM_WIDTH = config.TRIM_WIDTH;
|
|
41096
|
-
const FORCELIST =
|
|
41101
|
+
const FORCELIST = config.FORCELIST.map(([pattern, repl])=>[
|
|
41097
41102
|
new RegExp(pattern, 'giu'),
|
|
41098
41103
|
repl
|
|
41099
41104
|
]);
|
|
41100
41105
|
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH;
|
|
41101
|
-
|
|
41102
|
-
|
|
41103
|
-
|
|
41104
|
-
|
|
41105
|
-
|
|
41106
|
-
|
|
41107
|
-
|
|
41108
|
-
|
|
41109
|
-
|
|
41110
|
-
|
|
41111
|
-
|
|
41112
|
-
|
|
41113
|
-
|
|
41114
|
-
|
|
41115
|
-
|
|
41116
|
-
|
|
41117
|
-
|
|
41118
|
-
|
|
41119
|
-
|
|
41120
|
-
|
|
41121
|
-
|
|
41122
|
-
|
|
41123
|
-
|
|
41124
|
-
|
|
41125
|
-
|
|
41126
|
-
|
|
41127
|
-
|
|
41128
|
-
|
|
41129
|
-
|
|
41130
|
-
|
|
41131
|
-
|
|
41132
|
-
|
|
41106
|
+
return (inp)=>{
|
|
41107
|
+
let len = inp.length;
|
|
41108
|
+
let text = '';
|
|
41109
|
+
if (TRIM_ENDING) {
|
|
41110
|
+
while(ENDING_CHARS.has(inp.charAt(len - 1)))len--;
|
|
41111
|
+
if (0 === len) len = inp.length;
|
|
41112
|
+
}
|
|
41113
|
+
if (TRIM_WIDTH) for(let i = 0; i < len; i++){
|
|
41114
|
+
const c = inp.charAt(i);
|
|
41115
|
+
text += WIDTH_TABLE.get(c) || c;
|
|
41116
|
+
}
|
|
41117
|
+
else text = inp.slice(0, len);
|
|
41118
|
+
if (TRIM_SPACE) text = text.replaceAll(/[ \u3000]+/g, ' ').replaceAll(/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g, '$1');
|
|
41119
|
+
let taolu_matched = false;
|
|
41120
|
+
for (const taolu of FORCELIST)if (taolu[0].test(text)) {
|
|
41121
|
+
text = text.replace(taolu[0], taolu[1]);
|
|
41122
|
+
taolu_matched = true;
|
|
41123
|
+
if (FORCELIST_BREAK_ON_MATCH) break;
|
|
41124
|
+
}
|
|
41125
|
+
return [
|
|
41126
|
+
taolu_matched,
|
|
41127
|
+
text
|
|
41128
|
+
];
|
|
41129
|
+
};
|
|
41130
|
+
}
|
|
41131
|
+
function whitelisted_meta(config) {
|
|
41132
|
+
const WHITELIST = config.WHITELIST.map((x)=>new RegExp(x[0], 'iu'));
|
|
41133
|
+
if (0 === WHITELIST.length) return ()=>false;
|
|
41134
|
+
return (text)=>WHITELIST.some((re)=>re.test(text));
|
|
41135
|
+
}
|
|
41136
|
+
function blacklisted_meta(config) {
|
|
41137
|
+
const BLACKLIST = config.BLACKLIST.map((x)=>x[0] ? new RegExp(x[1]) : x[1].toLowerCase());
|
|
41138
|
+
if (0 === BLACKLIST.length) return ()=>null;
|
|
41139
|
+
return (text)=>{
|
|
41133
41140
|
const lower = text.toLowerCase();
|
|
41134
41141
|
for (const pattern of BLACKLIST){
|
|
41135
41142
|
const matched = 'string' == typeof pattern ? lower.includes(pattern) : pattern.test(text);
|
|
41136
41143
|
if (matched) return 'string' == typeof pattern ? ` ${pattern}` : ` /${pattern.source}/`;
|
|
41137
41144
|
}
|
|
41138
41145
|
return null;
|
|
41139
|
-
}
|
|
41140
|
-
}
|
|
41146
|
+
};
|
|
41147
|
+
}
|
|
41141
41148
|
function extract_special_danmu(text) {
|
|
41142
41149
|
try {
|
|
41143
41150
|
text = JSON.parse(text)[4];
|
|
@@ -41153,15 +41160,25 @@ and limitations under the License.
|
|
|
41153
41160
|
const mid = Math.floor(sorted.length / 2);
|
|
41154
41161
|
return sorted[mid];
|
|
41155
41162
|
}
|
|
41163
|
+
function u8array_to_arraybuffer(array) {
|
|
41164
|
+
return array.buffer.slice(array.byteOffset, array.byteOffset + array.byteLength);
|
|
41165
|
+
}
|
|
41156
41166
|
async function load_wasm(wasm_mod) {
|
|
41157
|
-
|
|
41167
|
+
if (wasm_mod) return void await similarity_stub_init(wasm_mod);
|
|
41168
|
+
const wasm_path = new URL(__webpack_require__("./src/pakku.js/similarity-gen.wasm"), __webpack_require__.b).pathname;
|
|
41169
|
+
const wasm_u8 = await lib_default().readFile(wasm_path);
|
|
41170
|
+
await similarity_stub_init(u8array_to_arraybuffer(wasm_u8));
|
|
41158
41171
|
}
|
|
41159
41172
|
function make_ptr_idx(idx, is_next_chunk) {
|
|
41160
41173
|
return is_next_chunk ? -1 - idx : idx;
|
|
41161
41174
|
}
|
|
41162
41175
|
async function pakku_js_merge(chunk, config = DEFAULT_CONFIG) {
|
|
41176
|
+
const local_config = {
|
|
41177
|
+
...DEFAULT_CONFIG,
|
|
41178
|
+
...config
|
|
41179
|
+
};
|
|
41163
41180
|
await load_wasm();
|
|
41164
|
-
begin_chunk(
|
|
41181
|
+
begin_chunk(local_config);
|
|
41165
41182
|
const ret = {
|
|
41166
41183
|
clusters: [],
|
|
41167
41184
|
stats: new Stats(),
|
|
@@ -41226,23 +41243,26 @@ and limitations under the License.
|
|
|
41226
41243
|
});
|
|
41227
41244
|
}
|
|
41228
41245
|
}
|
|
41246
|
+
const detaolu = detaolu_meta(local_config);
|
|
41247
|
+
const whitelisted = whitelisted_meta(local_config);
|
|
41248
|
+
const blacklisted = blacklisted_meta(local_config);
|
|
41229
41249
|
function obj_to_ir(objs, s, is_next_chunk) {
|
|
41230
41250
|
return objs.map((obj, idx)=>{
|
|
41231
|
-
if (!
|
|
41251
|
+
if (!local_config.PROC_POOL1 && 1 === obj.pool) {
|
|
41232
41252
|
if (s) {
|
|
41233
41253
|
s.ignored_type++;
|
|
41234
41254
|
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改');
|
|
41235
41255
|
}
|
|
41236
41256
|
return null;
|
|
41237
41257
|
}
|
|
41238
|
-
if (!
|
|
41258
|
+
if (!local_config.PROC_TYPE7 && 4 === obj.mode) {
|
|
41239
41259
|
if (s) {
|
|
41240
41260
|
s.ignored_type++;
|
|
41241
41261
|
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改');
|
|
41242
41262
|
}
|
|
41243
41263
|
return null;
|
|
41244
41264
|
}
|
|
41245
|
-
if (!
|
|
41265
|
+
if (!local_config.PROC_TYPE4 && 1 === obj.mode) {
|
|
41246
41266
|
if (s) {
|
|
41247
41267
|
s.ignored_type++;
|
|
41248
41268
|
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改');
|
|
@@ -41251,7 +41271,7 @@ and limitations under the License.
|
|
|
41251
41271
|
}
|
|
41252
41272
|
const disp_str = trim_dispstr(4 === obj.mode && '[' === obj.content[0] ? extract_special_danmu(obj.content) : obj.content);
|
|
41253
41273
|
if (4 !== obj.mode) {
|
|
41254
|
-
const matched = blacklisted(disp_str
|
|
41274
|
+
const matched = blacklisted(disp_str);
|
|
41255
41275
|
if (matched) {
|
|
41256
41276
|
if (s) {
|
|
41257
41277
|
s.deleted_blacklist++;
|
|
@@ -41266,17 +41286,17 @@ and limitations under the License.
|
|
|
41266
41286
|
return null;
|
|
41267
41287
|
}
|
|
41268
41288
|
}
|
|
41269
|
-
if (whitelisted(disp_str
|
|
41289
|
+
if (whitelisted(disp_str)) {
|
|
41270
41290
|
if (s) {
|
|
41271
41291
|
s.ignored_whitelist++;
|
|
41272
41292
|
apply_single_cluster(idx, obj, '命中白名单');
|
|
41273
41293
|
}
|
|
41274
41294
|
return null;
|
|
41275
41295
|
}
|
|
41276
|
-
const [matched_taolu, detaolued] = detaolu(disp_str
|
|
41296
|
+
const [matched_taolu, detaolued] = detaolu(disp_str);
|
|
41277
41297
|
if (matched_taolu) {
|
|
41278
41298
|
if (s) s.num_taolu_matched++;
|
|
41279
|
-
if (
|
|
41299
|
+
if (local_config.FORCELIST_APPLY_SINGULAR) obj = {
|
|
41280
41300
|
...obj,
|
|
41281
41301
|
content: detaolued
|
|
41282
41302
|
};
|
|
@@ -41291,7 +41311,7 @@ and limitations under the License.
|
|
|
41291
41311
|
}
|
|
41292
41312
|
const danmus = obj_to_ir(chunk.objs, ret.stats, false);
|
|
41293
41313
|
const nearby_danmus = new Queue();
|
|
41294
|
-
const THRESHOLD_MS =
|
|
41314
|
+
const THRESHOLD_MS = 1000 * local_config.THRESHOLD;
|
|
41295
41315
|
for (const dm of danmus){
|
|
41296
41316
|
while(true){
|
|
41297
41317
|
const peeked = nearby_danmus.peek();
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Config } from '.';
|
|
2
2
|
import type { int, Stats } from './types';
|
|
3
|
-
export declare function init(wasm_module: ArrayBuffer
|
|
3
|
+
export declare function init(wasm_module: ArrayBuffer): Promise<void>;
|
|
4
4
|
export declare function begin_chunk(config: Config): void;
|
|
5
5
|
export declare function begin_index_lock(): void;
|
|
6
6
|
export declare function detect_similarity(str: string, mode: number, index_l: int, S: Stats): null | {
|
|
Binary file
|
package/package.json
CHANGED
package/src/pakku.js/index.ts
CHANGED
|
@@ -109,6 +109,7 @@ export const DEFAULT_CONFIG = {
|
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
export type Config = Partial<typeof DEFAULT_CONFIG>
|
|
112
|
+
type ResolvedConfig = typeof DEFAULT_CONFIG
|
|
112
113
|
|
|
113
114
|
interface DanmuIr {
|
|
114
115
|
obj: DanmuObject
|
|
@@ -224,80 +225,88 @@ const WIDTH_TABLE = new Map(
|
|
|
224
225
|
/**
|
|
225
226
|
* 反套路
|
|
226
227
|
*/
|
|
227
|
-
|
|
228
|
+
function detaolu_meta(
|
|
229
|
+
config: ResolvedConfig,
|
|
230
|
+
): (text: string) => [boolean, string] {
|
|
228
231
|
const TRIM_ENDING = config.TRIM_ENDING
|
|
229
232
|
const TRIM_SPACE = config.TRIM_SPACE
|
|
230
233
|
const TRIM_WIDTH = config.TRIM_WIDTH
|
|
231
|
-
const FORCELIST =
|
|
234
|
+
const FORCELIST = config.FORCELIST.map(
|
|
232
235
|
([pattern, repl]) => [new RegExp(pattern, 'giu'), repl] as [RegExp, string],
|
|
233
236
|
)
|
|
234
237
|
const FORCELIST_BREAK_ON_MATCH = !config.FORCELIST_CONTINUE_ON_MATCH
|
|
235
238
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
239
|
+
return (inp: string) => {
|
|
240
|
+
let len = inp.length
|
|
241
|
+
let text = ''
|
|
242
|
+
|
|
243
|
+
if (TRIM_ENDING) {
|
|
244
|
+
while (ENDING_CHARS.has(inp.charAt(len - 1)))
|
|
245
|
+
// assert str.charAt(-1)===''
|
|
246
|
+
len--
|
|
247
|
+
if (len === 0)
|
|
248
|
+
// all chars are ending chars, do nothing
|
|
249
|
+
len = inp.length
|
|
250
|
+
}
|
|
247
251
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
+
if (TRIM_WIDTH) {
|
|
253
|
+
for (let i = 0; i < len; i++) {
|
|
254
|
+
const c = inp.charAt(i)
|
|
255
|
+
text += WIDTH_TABLE.get(c) || c
|
|
256
|
+
}
|
|
257
|
+
} else {
|
|
258
|
+
text = inp.slice(0, len)
|
|
252
259
|
}
|
|
253
|
-
} else {
|
|
254
|
-
text = inp.slice(0, len)
|
|
255
|
-
}
|
|
256
260
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
261
|
+
if (TRIM_SPACE) {
|
|
262
|
+
// text = text
|
|
263
|
+
// .replace(TRIM_EXTRA_SPACE_RE, ' ')
|
|
264
|
+
// .replace(TRIM_CJK_SPACE_RE, '$1')
|
|
265
|
+
text = text
|
|
266
|
+
.replaceAll(/[ \u3000]+/g, ' ')
|
|
267
|
+
.replaceAll(
|
|
268
|
+
/([\u3000-\u9FFF\uFF00-\uFFEF]) (?=[\u3000-\u9FFF\uFF00-\uFFEF])/g,
|
|
269
|
+
'$1',
|
|
270
|
+
)
|
|
271
|
+
}
|
|
268
272
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
273
|
+
let taolu_matched = false
|
|
274
|
+
for (const taolu of FORCELIST) {
|
|
275
|
+
if (taolu[0].test(text)) {
|
|
276
|
+
text = text.replace(taolu[0], taolu[1])
|
|
277
|
+
taolu_matched = true
|
|
278
|
+
if (FORCELIST_BREAK_ON_MATCH) break
|
|
279
|
+
}
|
|
275
280
|
}
|
|
276
|
-
}
|
|
277
281
|
|
|
278
|
-
|
|
282
|
+
return [taolu_matched, text]
|
|
283
|
+
}
|
|
279
284
|
}
|
|
280
285
|
|
|
281
286
|
/**
|
|
282
287
|
* 白名单处理
|
|
283
288
|
*/
|
|
284
|
-
|
|
285
|
-
const WHITELIST =
|
|
286
|
-
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
|
|
289
|
+
function whitelisted_meta(config: ResolvedConfig): (text: string) => boolean {
|
|
290
|
+
const WHITELIST = config.WHITELIST.map((x) => new RegExp(x[0], 'iu'))
|
|
291
|
+
|
|
292
|
+
if (WHITELIST.length === 0) return () => false
|
|
293
|
+
|
|
294
|
+
return (text: string) => WHITELIST.some((re) => re.test(text))
|
|
290
295
|
}
|
|
291
296
|
|
|
292
297
|
/**
|
|
293
298
|
* 黑名单处理
|
|
294
299
|
*/
|
|
295
|
-
|
|
296
|
-
|
|
300
|
+
function blacklisted_meta(
|
|
301
|
+
config: ResolvedConfig,
|
|
302
|
+
): (text: string) => string | null {
|
|
303
|
+
const BLACKLIST = config.BLACKLIST.map((x) =>
|
|
297
304
|
x[0] ? new RegExp(x[1]) : x[1].toLowerCase(),
|
|
298
305
|
)
|
|
299
|
-
|
|
300
|
-
|
|
306
|
+
|
|
307
|
+
if (BLACKLIST.length === 0) return () => null
|
|
308
|
+
|
|
309
|
+
return (text: string) => {
|
|
301
310
|
const lower = text.toLowerCase()
|
|
302
311
|
for (const pattern of BLACKLIST) {
|
|
303
312
|
const matched =
|
|
@@ -339,11 +348,22 @@ function select_median_length(strs: string[]): string {
|
|
|
339
348
|
return sorted[mid]
|
|
340
349
|
}
|
|
341
350
|
|
|
351
|
+
function u8array_to_arraybuffer(array: Uint8Array): ArrayBuffer {
|
|
352
|
+
return array.buffer.slice(
|
|
353
|
+
array.byteOffset,
|
|
354
|
+
array.byteOffset + array.byteLength,
|
|
355
|
+
) as ArrayBuffer
|
|
356
|
+
}
|
|
357
|
+
|
|
342
358
|
async function load_wasm(wasm_mod?: ArrayBuffer) {
|
|
343
|
-
|
|
344
|
-
wasm_mod
|
|
345
|
-
|
|
346
|
-
|
|
359
|
+
if (wasm_mod) {
|
|
360
|
+
await sim_init(wasm_mod)
|
|
361
|
+
return
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const wasm_path = new URL('similarity-gen.wasm', import.meta.url).pathname
|
|
365
|
+
const wasm_u8 = await fs.readFile(wasm_path)
|
|
366
|
+
await sim_init(u8array_to_arraybuffer(wasm_u8))
|
|
347
367
|
}
|
|
348
368
|
|
|
349
369
|
function make_ptr_idx(idx: int, is_next_chunk: boolean): int {
|
|
@@ -355,9 +375,11 @@ async function merge(
|
|
|
355
375
|
// next_chunk: DanmuChunk<DanmuObject>,
|
|
356
376
|
config: Config = DEFAULT_CONFIG,
|
|
357
377
|
): Promise<DanmuClusterOutput> {
|
|
378
|
+
const local_config: ResolvedConfig = { ...DEFAULT_CONFIG, ...config }
|
|
379
|
+
|
|
358
380
|
await load_wasm()
|
|
359
381
|
|
|
360
|
-
begin_chunk(
|
|
382
|
+
begin_chunk(local_config)
|
|
361
383
|
|
|
362
384
|
const ret: DanmuClusterOutput = {
|
|
363
385
|
clusters: [],
|
|
@@ -419,6 +441,10 @@ async function merge(
|
|
|
419
441
|
}
|
|
420
442
|
}
|
|
421
443
|
|
|
444
|
+
const detaolu = detaolu_meta(local_config)
|
|
445
|
+
const whitelisted = whitelisted_meta(local_config)
|
|
446
|
+
const blacklisted = blacklisted_meta(local_config)
|
|
447
|
+
|
|
422
448
|
function obj_to_ir(
|
|
423
449
|
objs: DanmuObject[],
|
|
424
450
|
s: Stats | null,
|
|
@@ -426,7 +452,7 @@ async function merge(
|
|
|
426
452
|
): DanmuIr[] {
|
|
427
453
|
return objs
|
|
428
454
|
.map((obj, idx) => {
|
|
429
|
-
if (!
|
|
455
|
+
if (!local_config.PROC_POOL1 && obj.pool === 1) {
|
|
430
456
|
if (s) {
|
|
431
457
|
s.ignored_type++
|
|
432
458
|
apply_single_cluster(idx, obj, '已忽略字幕弹幕,可以在选项中修改')
|
|
@@ -434,7 +460,7 @@ async function merge(
|
|
|
434
460
|
return null
|
|
435
461
|
}
|
|
436
462
|
// if (!config.PROC_TYPE7 && obj.mode === 7) {
|
|
437
|
-
if (!
|
|
463
|
+
if (!local_config.PROC_TYPE7 && obj.mode === 4) {
|
|
438
464
|
if (s) {
|
|
439
465
|
s.ignored_type++
|
|
440
466
|
apply_single_cluster(idx, obj, '已忽略特殊弹幕,可以在选项中修改')
|
|
@@ -442,7 +468,7 @@ async function merge(
|
|
|
442
468
|
return null
|
|
443
469
|
}
|
|
444
470
|
// if (!config.PROC_TYPE4 && obj.mode === 4) {
|
|
445
|
-
if (!
|
|
471
|
+
if (!local_config.PROC_TYPE4 && obj.mode === 1) {
|
|
446
472
|
if (s) {
|
|
447
473
|
s.ignored_type++
|
|
448
474
|
apply_single_cluster(idx, obj, '已忽略底部弹幕,可以在选项中修改')
|
|
@@ -473,7 +499,7 @@ async function merge(
|
|
|
473
499
|
|
|
474
500
|
// if (obj.mode !== 8 && obj.mode !== 9) {
|
|
475
501
|
if (obj.mode !== 4) {
|
|
476
|
-
const matched = blacklisted(disp_str
|
|
502
|
+
const matched = blacklisted(disp_str)
|
|
477
503
|
if (matched) {
|
|
478
504
|
if (s) {
|
|
479
505
|
s.deleted_blacklist++
|
|
@@ -489,7 +515,7 @@ async function merge(
|
|
|
489
515
|
return null
|
|
490
516
|
}
|
|
491
517
|
}
|
|
492
|
-
if (whitelisted(disp_str
|
|
518
|
+
if (whitelisted(disp_str)) {
|
|
493
519
|
if (s) {
|
|
494
520
|
s.ignored_whitelist++
|
|
495
521
|
apply_single_cluster(idx, obj, '命中白名单')
|
|
@@ -497,11 +523,11 @@ async function merge(
|
|
|
497
523
|
return null
|
|
498
524
|
}
|
|
499
525
|
|
|
500
|
-
const [matched_taolu, detaolued] = detaolu(disp_str
|
|
526
|
+
const [matched_taolu, detaolued] = detaolu(disp_str)
|
|
501
527
|
|
|
502
528
|
if (matched_taolu) {
|
|
503
529
|
if (s) s.num_taolu_matched++
|
|
504
|
-
if (
|
|
530
|
+
if (local_config.FORCELIST_APPLY_SINGULAR)
|
|
505
531
|
obj = {
|
|
506
532
|
...obj,
|
|
507
533
|
content: detaolued,
|
|
@@ -523,7 +549,7 @@ async function merge(
|
|
|
523
549
|
|
|
524
550
|
const nearby_danmus: Queue<DanmuIr[]> = new Queue()
|
|
525
551
|
|
|
526
|
-
const THRESHOLD_MS =
|
|
552
|
+
const THRESHOLD_MS = local_config.THRESHOLD * 1000
|
|
527
553
|
|
|
528
554
|
for (const dm of danmus) {
|
|
529
555
|
while (true) {
|
|
@@ -1,5 +1,24 @@
|
|
|
1
|
-
/* eslint-disable
|
|
2
|
-
/* eslint-disable */
|
|
1
|
+
/* eslint-disable import/no-default-export */
|
|
2
|
+
/* eslint-disable unicorn/prefer-number-properties */
|
|
3
|
+
/* eslint-disable object-shorthand */
|
|
4
|
+
/* eslint-disable block-scoped-var */
|
|
5
|
+
/* eslint-disable unicorn/catch-error-name */
|
|
6
|
+
/* eslint-disable getter-return */
|
|
7
|
+
/* eslint-disable no-setter-return */
|
|
8
|
+
/* eslint-disable prefer-template */
|
|
9
|
+
/* eslint-disable unicorn/throw-new-error */
|
|
10
|
+
/* eslint-disable unicorn/new-for-builtins */
|
|
11
|
+
/* eslint-disable unicorn/prefer-code-point */
|
|
12
|
+
/* eslint-disable unicorn/no-negated-condition */
|
|
13
|
+
/* eslint-disable no-console */
|
|
14
|
+
/* eslint-disable unicorn/consistent-function-scoping */
|
|
15
|
+
/* eslint-disable no-unused-expressions */
|
|
16
|
+
/* eslint-disable vars-on-top */
|
|
17
|
+
/* eslint-disable one-var */
|
|
18
|
+
/* eslint-disable import/no-mutable-exports */
|
|
19
|
+
/* eslint-disable no-var */
|
|
20
|
+
/* eslint-disable prettier/prettier */
|
|
21
|
+
|
|
3
22
|
/** @nocollapse */ var Module = function (moduleArg = {}) {
|
|
4
23
|
var moduleRtn
|
|
5
24
|
|
|
Binary file
|
|
@@ -9,7 +9,7 @@ let ptr_buf: number
|
|
|
9
9
|
|
|
10
10
|
const MAX_STRING_LEN = 16005
|
|
11
11
|
|
|
12
|
-
export async function init(wasm_module: ArrayBuffer
|
|
12
|
+
export async function init(wasm_module: ArrayBuffer) {
|
|
13
13
|
module = await generated_promise({ wasm: wasm_module })
|
|
14
14
|
ptr_buf = module._malloc(MAX_STRING_LEN * 2 + 7)
|
|
15
15
|
if (ptr_buf % 2)
|