@dan-uni/dan-any-plugin-detaolu 0.9.2 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +90 -77
- package/dist/index.js.LICENSE.txt +1 -1
- package/dist/index.umd.min.js +1007 -1013
- package/dist/index.umd.min.js.LICENSE.txt +1 -1
- package/dist/pakku.js/index.d.ts +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -2
- package/src/pakku.js/index.ts +23 -15
- package/src/pakku.js/similarity-gen.js +2 -15
- package/src/pakku.js/similarity_stub.ts +28 -14
- package/tsconfig.json +2 -2
- package/types/tsconfig.tsbuildinfo +1 -0
- /package/dist/static/wasm/{54a7637a81e5f86e.module.wasm → 54a7637a.module.wasm} +0 -0
|
@@ -37,7 +37,7 @@ and limitations under the License.
|
|
|
37
37
|
|
|
38
38
|
/**
|
|
39
39
|
* @author: xmcp(代码主要逻辑来源)
|
|
40
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
40
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
41
41
|
* @license: GPL-3.0
|
|
42
42
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
43
43
|
*/
|
package/dist/pakku.js/index.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dan-uni/dan-any-plugin-detaolu",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.5",
|
|
4
4
|
"description": "A filter, dedupe and anti-spam plugin of dan-any, a danmaku transformer lib, based on pakku.js.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"bangumi",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@dan-uni/dan-any": "workspace:^",
|
|
34
|
-
"fs-extra": "^11.3.
|
|
34
|
+
"fs-extra": "^11.3.2"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/fs-extra": "^11.0.4"
|
package/src/index.ts
CHANGED
|
@@ -22,8 +22,8 @@ async function detaolu(that: UniPool, config?: DeTaoLuConfig) {
|
|
|
22
22
|
if (p.danuni_dans.length === 1) {
|
|
23
23
|
return p.danuni_dans[0].danuni_dan
|
|
24
24
|
} else {
|
|
25
|
-
const dans = p.danuni_dans
|
|
26
|
-
|
|
25
|
+
const dans = p.danuni_dans
|
|
26
|
+
const pool = new UniPool(dans.map((d) => d.danuni_dan))
|
|
27
27
|
function isAllBottomMode(p: UniPool) {
|
|
28
28
|
return p.dans.every((d) => d.mode === UniDMTools.Modes.Bottom)
|
|
29
29
|
}
|
package/src/pakku.js/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @author: xmcp(代码主要逻辑来源)
|
|
3
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
3
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
4
4
|
* @license: GPL-3.0
|
|
5
5
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
6
6
|
*/
|
|
@@ -90,7 +90,7 @@ export type Config = Partial<typeof DEFAULT_CONFIG>
|
|
|
90
90
|
interface DanmuIr {
|
|
91
91
|
obj: DanmuObject
|
|
92
92
|
str: string // for similarity algorithm
|
|
93
|
-
|
|
93
|
+
ptr_idx: int
|
|
94
94
|
sim_reason: string
|
|
95
95
|
}
|
|
96
96
|
|
|
@@ -308,7 +308,7 @@ function trim_dispstr(text: string): string {
|
|
|
308
308
|
function select_median_length(strs: string[]): string {
|
|
309
309
|
if (strs.length === 1) return strs[0]
|
|
310
310
|
|
|
311
|
-
const sorted = strs.
|
|
311
|
+
const sorted = strs.toSorted((a, b) => a.length - b.length)
|
|
312
312
|
const mid = Math.floor(sorted.length / 2)
|
|
313
313
|
return sorted[mid]
|
|
314
314
|
}
|
|
@@ -316,10 +316,14 @@ function select_median_length(strs: string[]): string {
|
|
|
316
316
|
async function load_wasm(wasm_mod?: ArrayBuffer) {
|
|
317
317
|
await sim_init(
|
|
318
318
|
wasm_mod ??
|
|
319
|
-
(await fs.readFile(new URL('
|
|
319
|
+
(await fs.readFile(new URL('similarity-gen.wasm', import.meta.url))),
|
|
320
320
|
)
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
+
function make_ptr_idx(idx: int, is_next_chunk: boolean): int {
|
|
324
|
+
return is_next_chunk ? -1 - idx : idx
|
|
325
|
+
}
|
|
326
|
+
|
|
323
327
|
async function merge(
|
|
324
328
|
chunk: DanmuChunk<DanmuObject>,
|
|
325
329
|
// next_chunk: DanmuChunk<DanmuObject>,
|
|
@@ -349,7 +353,7 @@ async function merge(
|
|
|
349
353
|
function apply_cluster(irs: DanmuIr[]) {
|
|
350
354
|
if (irs.length === 1) {
|
|
351
355
|
ret.clusters.push({
|
|
352
|
-
peers_ptr: irs.map((ir) => [ir.
|
|
356
|
+
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
353
357
|
desc: [],
|
|
354
358
|
chosen_str: irs[0].obj.content, // do not use detaolued str for single danmu
|
|
355
359
|
// danuni
|
|
@@ -359,8 +363,8 @@ async function merge(
|
|
|
359
363
|
})
|
|
360
364
|
} else {
|
|
361
365
|
const text_cnts = new Map()
|
|
362
|
-
let most_texts: string[] = []
|
|
363
|
-
|
|
366
|
+
let most_texts: string[] = []
|
|
367
|
+
let most_cnt = 0
|
|
364
368
|
|
|
365
369
|
for (const ir of irs) {
|
|
366
370
|
const text = ir.str
|
|
@@ -378,7 +382,7 @@ async function merge(
|
|
|
378
382
|
const most_text = select_median_length(most_texts)
|
|
379
383
|
|
|
380
384
|
ret.clusters.push({
|
|
381
|
-
peers_ptr: irs.map((ir) => [ir.
|
|
385
|
+
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
382
386
|
desc: most_cnt > 1 ? [`采用了出现 ${most_cnt} 次的文本`] : [],
|
|
383
387
|
chosen_str: most_text,
|
|
384
388
|
// danuni
|
|
@@ -389,7 +393,11 @@ async function merge(
|
|
|
389
393
|
}
|
|
390
394
|
}
|
|
391
395
|
|
|
392
|
-
function obj_to_ir(
|
|
396
|
+
function obj_to_ir(
|
|
397
|
+
objs: DanmuObject[],
|
|
398
|
+
s: Stats | null,
|
|
399
|
+
is_next_chunk: boolean,
|
|
400
|
+
): DanmuIr[] {
|
|
393
401
|
return objs
|
|
394
402
|
.map((obj, idx) => {
|
|
395
403
|
if (!config.PROC_POOL1 && obj.pool === 1) {
|
|
@@ -472,15 +480,15 @@ async function merge(
|
|
|
472
480
|
return {
|
|
473
481
|
obj,
|
|
474
482
|
str: detaolued,
|
|
475
|
-
idx,
|
|
483
|
+
ptr_idx: make_ptr_idx(idx, is_next_chunk),
|
|
476
484
|
sim_reason: 'ORIG',
|
|
477
485
|
}
|
|
478
486
|
})
|
|
479
487
|
.filter((obj) => obj !== null) as DanmuIr[]
|
|
480
488
|
}
|
|
481
489
|
|
|
482
|
-
const danmus = obj_to_ir(chunk.objs, ret.stats)
|
|
483
|
-
// const next_chunk_danmus = obj_to_ir(next_chunk.objs, null)
|
|
490
|
+
const danmus = obj_to_ir(chunk.objs, ret.stats, false)
|
|
491
|
+
// const next_chunk_danmus = obj_to_ir(next_chunk.objs, null, true)
|
|
484
492
|
|
|
485
493
|
const nearby_danmus: Queue<DanmuIr[]> = new Queue()
|
|
486
494
|
|
|
@@ -504,13 +512,13 @@ async function merge(
|
|
|
504
512
|
nearby_danmus.index_l,
|
|
505
513
|
ret.stats,
|
|
506
514
|
)
|
|
507
|
-
if (sim
|
|
515
|
+
if (sim === null) {
|
|
516
|
+
nearby_danmus.push([dm])
|
|
517
|
+
} else {
|
|
508
518
|
const candidate =
|
|
509
519
|
nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff]
|
|
510
520
|
dm.sim_reason = sim.reason
|
|
511
521
|
candidate.push(dm)
|
|
512
|
-
} else {
|
|
513
|
-
nearby_danmus.push([dm])
|
|
514
522
|
}
|
|
515
523
|
}
|
|
516
524
|
|
|
@@ -1,18 +1,5 @@
|
|
|
1
|
-
/* eslint-disable
|
|
2
|
-
/* eslint-disable
|
|
3
|
-
/* eslint-disable unicorn/prefer-number-properties */
|
|
4
|
-
/* eslint-disable block-scoped-var */
|
|
5
|
-
/* eslint-disable object-shorthand */
|
|
6
|
-
/* eslint-disable no-setter-return */
|
|
7
|
-
/* eslint-disable prefer-template */
|
|
8
|
-
/* eslint-disable unicorn/new-for-builtins */
|
|
9
|
-
/* eslint-disable unicorn/throw-new-error */
|
|
10
|
-
/* eslint-disable no-console */
|
|
11
|
-
/* eslint-disable unicorn/consistent-function-scoping */
|
|
12
|
-
/* eslint-disable no-unused-expressions */
|
|
13
|
-
/* eslint-disable vars-on-top */
|
|
14
|
-
/* eslint-disable import/no-mutable-exports */
|
|
15
|
-
/* eslint-disable import/no-default-export */
|
|
1
|
+
/* eslint-disable @eslint-community/eslint-comments/no-unlimited-disable */
|
|
2
|
+
/* eslint-disable */
|
|
16
3
|
/** @nocollapse */ var Module = function (moduleArg = {}) {
|
|
17
4
|
var moduleRtn
|
|
18
5
|
|
|
@@ -75,20 +75,34 @@ export function detect_similarity(
|
|
|
75
75
|
const idx_diff = ret & ((1 << 19) - 1)
|
|
76
76
|
|
|
77
77
|
let reason_str
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
78
|
+
switch (reason) {
|
|
79
|
+
case CombinedReason.combined_identical: {
|
|
80
|
+
S.combined_identical++
|
|
81
|
+
reason_str = '=='
|
|
82
|
+
|
|
83
|
+
break
|
|
84
|
+
}
|
|
85
|
+
case CombinedReason.combined_edit_distance: {
|
|
86
|
+
S.combined_edit_distance++
|
|
87
|
+
reason_str = `≤${dist}`
|
|
88
|
+
|
|
89
|
+
break
|
|
90
|
+
}
|
|
91
|
+
case CombinedReason.combined_cosine_distance: {
|
|
92
|
+
S.combined_cosine_distance++
|
|
93
|
+
reason_str = `${dist}%`
|
|
94
|
+
|
|
95
|
+
break
|
|
96
|
+
}
|
|
97
|
+
case CombinedReason.combined_pinyin_distance: {
|
|
98
|
+
S.combined_pinyin_distance++
|
|
99
|
+
reason_str = `P≤${dist}`
|
|
100
|
+
|
|
101
|
+
break
|
|
102
|
+
}
|
|
103
|
+
default: {
|
|
104
|
+
throw new Error(`similarity wasm returned unknown reason: ${ret}`)
|
|
105
|
+
}
|
|
92
106
|
}
|
|
93
107
|
|
|
94
108
|
return { reason: reason_str, idx_diff }
|
package/tsconfig.json
CHANGED
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
"compilerOptions": {
|
|
3
3
|
/* Visit https://aka.ms/tsconfig to read more about this file */
|
|
4
4
|
/* Projects */
|
|
5
|
-
|
|
5
|
+
"incremental": true /* Save .tsbuildinfo files to allow for incremental compilation of projects. */,
|
|
6
6
|
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
|
|
7
7
|
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
|
|
8
8
|
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
|
|
9
9
|
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
|
|
10
10
|
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
|
11
11
|
/* Language and Environment */
|
|
12
|
-
"target": "
|
|
12
|
+
"target": "ES2023" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
|
|
13
13
|
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
|
14
14
|
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
|
15
15
|
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":"5.9.3"}
|
|
File without changes
|