@dan-uni/dan-any-plugin-detaolu 0.7.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +97 -77
- package/dist/index.js.LICENSE.txt +1 -1
- package/dist/index.umd.min.js +25452 -25255
- package/dist/index.umd.min.js.LICENSE.txt +2 -2
- package/dist/pakku.js/index.d.ts +1 -1
- package/dist/pakku.js/types.d.ts +6 -0
- package/package.json +2 -2
- package/src/index.ts +2 -2
- package/src/pakku.js/index.ts +30 -15
- package/src/pakku.js/similarity-gen.js +2 -15
- package/src/pakku.js/similarity_stub.ts +28 -14
- package/src/pakku.js/types.ts +7 -0
- package/tsconfig.json +2 -2
- package/types/tsconfig.tsbuildinfo +1 -0
- /package/dist/static/wasm/{54a7637a81e5f86e.module.wasm → 54a7637a.module.wasm} +0 -0
|
@@ -27,7 +27,7 @@ See the Apache Version 2.0 License for specific language governing permissions
|
|
|
27
27
|
and limitations under the License.
|
|
28
28
|
***************************************************************************** */
|
|
29
29
|
|
|
30
|
-
/*! For license information please see index.
|
|
30
|
+
/*! For license information please see index.min.js.LICENSE.txt */
|
|
31
31
|
|
|
32
32
|
/*! ieee754. BSD-3-Clause License. Feross Aboukhadijeh <https://feross.org/opensource> */
|
|
33
33
|
|
|
@@ -37,7 +37,7 @@ and limitations under the License.
|
|
|
37
37
|
|
|
38
38
|
/**
|
|
39
39
|
* @author: xmcp(代码主要逻辑来源)
|
|
40
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
40
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
41
41
|
* @license: GPL-3.0
|
|
42
42
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
43
43
|
*/
|
package/dist/pakku.js/index.d.ts
CHANGED
package/dist/pakku.js/types.d.ts
CHANGED
|
@@ -23,6 +23,11 @@ export interface DanmuObjectRepresentative extends DanmuObject {
|
|
|
23
23
|
disp_str: string;
|
|
24
24
|
};
|
|
25
25
|
}
|
|
26
|
+
export interface DanmuObjectDeleted extends DanmuObject {
|
|
27
|
+
pakku: {
|
|
28
|
+
deleted_reason: string;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
26
31
|
export interface DanmuChunk<ObjectType extends DanmuObject> {
|
|
27
32
|
objs: ObjectType[];
|
|
28
33
|
}
|
|
@@ -41,6 +46,7 @@ export interface DanmuClusterPtr {
|
|
|
41
46
|
export interface DanmuClusterOutput {
|
|
42
47
|
clusters: DanmuClusterPtr[];
|
|
43
48
|
stats: Stats;
|
|
49
|
+
deleted_chunk: DanmuObjectDeleted[];
|
|
44
50
|
}
|
|
45
51
|
export declare class Stats {
|
|
46
52
|
combined_identical: number;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@dan-uni/dan-any-plugin-detaolu",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.5",
|
|
4
4
|
"description": "A filter, dedupe and anti-spam plugin of dan-any, a danmaku transformer lib, based on pakku.js.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"bangumi",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@dan-uni/dan-any": "workspace:^",
|
|
34
|
-
"fs-extra": "^11.3.
|
|
34
|
+
"fs-extra": "^11.3.2"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"@types/fs-extra": "^11.0.4"
|
package/src/index.ts
CHANGED
|
@@ -22,8 +22,8 @@ async function detaolu(that: UniPool, config?: DeTaoLuConfig) {
|
|
|
22
22
|
if (p.danuni_dans.length === 1) {
|
|
23
23
|
return p.danuni_dans[0].danuni_dan
|
|
24
24
|
} else {
|
|
25
|
-
const dans = p.danuni_dans
|
|
26
|
-
|
|
25
|
+
const dans = p.danuni_dans
|
|
26
|
+
const pool = new UniPool(dans.map((d) => d.danuni_dan))
|
|
27
27
|
function isAllBottomMode(p: UniPool) {
|
|
28
28
|
return p.dans.every((d) => d.mode === UniDMTools.Modes.Bottom)
|
|
29
29
|
}
|
package/src/pakku.js/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @author: xmcp(代码主要逻辑来源)
|
|
3
|
-
* @see: https://github.com/xmcp/pakku.js
|
|
3
|
+
* @see: https://github.com/xmcp/pakku.js/blob/master/pakkujs/core/combine_worker.ts
|
|
4
4
|
* @license: GPL-3.0
|
|
5
5
|
* 本文件内代码来源见上,经部分修改,并整合config注释
|
|
6
6
|
*/
|
|
@@ -90,7 +90,7 @@ export type Config = Partial<typeof DEFAULT_CONFIG>
|
|
|
90
90
|
interface DanmuIr {
|
|
91
91
|
obj: DanmuObject
|
|
92
92
|
str: string // for similarity algorithm
|
|
93
|
-
|
|
93
|
+
ptr_idx: int
|
|
94
94
|
sim_reason: string
|
|
95
95
|
}
|
|
96
96
|
|
|
@@ -308,7 +308,7 @@ function trim_dispstr(text: string): string {
|
|
|
308
308
|
function select_median_length(strs: string[]): string {
|
|
309
309
|
if (strs.length === 1) return strs[0]
|
|
310
310
|
|
|
311
|
-
const sorted = strs.
|
|
311
|
+
const sorted = strs.toSorted((a, b) => a.length - b.length)
|
|
312
312
|
const mid = Math.floor(sorted.length / 2)
|
|
313
313
|
return sorted[mid]
|
|
314
314
|
}
|
|
@@ -316,10 +316,14 @@ function select_median_length(strs: string[]): string {
|
|
|
316
316
|
async function load_wasm(wasm_mod?: ArrayBuffer) {
|
|
317
317
|
await sim_init(
|
|
318
318
|
wasm_mod ??
|
|
319
|
-
(await fs.readFile(new URL('
|
|
319
|
+
(await fs.readFile(new URL('similarity-gen.wasm', import.meta.url))),
|
|
320
320
|
)
|
|
321
321
|
}
|
|
322
322
|
|
|
323
|
+
function make_ptr_idx(idx: int, is_next_chunk: boolean): int {
|
|
324
|
+
return is_next_chunk ? -1 - idx : idx
|
|
325
|
+
}
|
|
326
|
+
|
|
323
327
|
async function merge(
|
|
324
328
|
chunk: DanmuChunk<DanmuObject>,
|
|
325
329
|
// next_chunk: DanmuChunk<DanmuObject>,
|
|
@@ -332,6 +336,7 @@ async function merge(
|
|
|
332
336
|
const ret: DanmuClusterOutput = {
|
|
333
337
|
clusters: [],
|
|
334
338
|
stats: new Stats(),
|
|
339
|
+
deleted_chunk: [],
|
|
335
340
|
}
|
|
336
341
|
|
|
337
342
|
function apply_single_cluster(idx: int, obj: DanmuObject, desc: string) {
|
|
@@ -348,7 +353,7 @@ async function merge(
|
|
|
348
353
|
function apply_cluster(irs: DanmuIr[]) {
|
|
349
354
|
if (irs.length === 1) {
|
|
350
355
|
ret.clusters.push({
|
|
351
|
-
peers_ptr: irs.map((ir) => [ir.
|
|
356
|
+
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
352
357
|
desc: [],
|
|
353
358
|
chosen_str: irs[0].obj.content, // do not use detaolued str for single danmu
|
|
354
359
|
// danuni
|
|
@@ -358,8 +363,8 @@ async function merge(
|
|
|
358
363
|
})
|
|
359
364
|
} else {
|
|
360
365
|
const text_cnts = new Map()
|
|
361
|
-
let most_texts: string[] = []
|
|
362
|
-
|
|
366
|
+
let most_texts: string[] = []
|
|
367
|
+
let most_cnt = 0
|
|
363
368
|
|
|
364
369
|
for (const ir of irs) {
|
|
365
370
|
const text = ir.str
|
|
@@ -377,7 +382,7 @@ async function merge(
|
|
|
377
382
|
const most_text = select_median_length(most_texts)
|
|
378
383
|
|
|
379
384
|
ret.clusters.push({
|
|
380
|
-
peers_ptr: irs.map((ir) => [ir.
|
|
385
|
+
peers_ptr: irs.map((ir) => [ir.ptr_idx, ir.sim_reason]),
|
|
381
386
|
desc: most_cnt > 1 ? [`采用了出现 ${most_cnt} 次的文本`] : [],
|
|
382
387
|
chosen_str: most_text,
|
|
383
388
|
// danuni
|
|
@@ -388,7 +393,11 @@ async function merge(
|
|
|
388
393
|
}
|
|
389
394
|
}
|
|
390
395
|
|
|
391
|
-
function obj_to_ir(
|
|
396
|
+
function obj_to_ir(
|
|
397
|
+
objs: DanmuObject[],
|
|
398
|
+
s: Stats | null,
|
|
399
|
+
is_next_chunk: boolean,
|
|
400
|
+
): DanmuIr[] {
|
|
392
401
|
return objs
|
|
393
402
|
.map((obj, idx) => {
|
|
394
403
|
if (!config.PROC_POOL1 && obj.pool === 1) {
|
|
@@ -444,6 +453,12 @@ async function merge(
|
|
|
444
453
|
s.deleted_blacklist++
|
|
445
454
|
s.deleted_blacklist_each[matched] =
|
|
446
455
|
(s.deleted_blacklist_each[matched] || 0) + 1
|
|
456
|
+
ret.deleted_chunk.push({
|
|
457
|
+
...obj,
|
|
458
|
+
pakku: {
|
|
459
|
+
deleted_reason: `命中黑名单:${matched}`,
|
|
460
|
+
},
|
|
461
|
+
})
|
|
447
462
|
}
|
|
448
463
|
return null
|
|
449
464
|
}
|
|
@@ -465,15 +480,15 @@ async function merge(
|
|
|
465
480
|
return {
|
|
466
481
|
obj,
|
|
467
482
|
str: detaolued,
|
|
468
|
-
idx,
|
|
483
|
+
ptr_idx: make_ptr_idx(idx, is_next_chunk),
|
|
469
484
|
sim_reason: 'ORIG',
|
|
470
485
|
}
|
|
471
486
|
})
|
|
472
487
|
.filter((obj) => obj !== null) as DanmuIr[]
|
|
473
488
|
}
|
|
474
489
|
|
|
475
|
-
const danmus = obj_to_ir(chunk.objs, ret.stats)
|
|
476
|
-
// const next_chunk_danmus = obj_to_ir(next_chunk.objs, null)
|
|
490
|
+
const danmus = obj_to_ir(chunk.objs, ret.stats, false)
|
|
491
|
+
// const next_chunk_danmus = obj_to_ir(next_chunk.objs, null, true)
|
|
477
492
|
|
|
478
493
|
const nearby_danmus: Queue<DanmuIr[]> = new Queue()
|
|
479
494
|
|
|
@@ -497,13 +512,13 @@ async function merge(
|
|
|
497
512
|
nearby_danmus.index_l,
|
|
498
513
|
ret.stats,
|
|
499
514
|
)
|
|
500
|
-
if (sim
|
|
515
|
+
if (sim === null) {
|
|
516
|
+
nearby_danmus.push([dm])
|
|
517
|
+
} else {
|
|
501
518
|
const candidate =
|
|
502
519
|
nearby_danmus.storage[nearby_danmus.index_r - sim.idx_diff]
|
|
503
520
|
dm.sim_reason = sim.reason
|
|
504
521
|
candidate.push(dm)
|
|
505
|
-
} else {
|
|
506
|
-
nearby_danmus.push([dm])
|
|
507
522
|
}
|
|
508
523
|
}
|
|
509
524
|
|
|
@@ -1,18 +1,5 @@
|
|
|
1
|
-
/* eslint-disable
|
|
2
|
-
/* eslint-disable
|
|
3
|
-
/* eslint-disable unicorn/prefer-number-properties */
|
|
4
|
-
/* eslint-disable block-scoped-var */
|
|
5
|
-
/* eslint-disable object-shorthand */
|
|
6
|
-
/* eslint-disable no-setter-return */
|
|
7
|
-
/* eslint-disable prefer-template */
|
|
8
|
-
/* eslint-disable unicorn/new-for-builtins */
|
|
9
|
-
/* eslint-disable unicorn/throw-new-error */
|
|
10
|
-
/* eslint-disable no-console */
|
|
11
|
-
/* eslint-disable unicorn/consistent-function-scoping */
|
|
12
|
-
/* eslint-disable no-unused-expressions */
|
|
13
|
-
/* eslint-disable vars-on-top */
|
|
14
|
-
/* eslint-disable import/no-mutable-exports */
|
|
15
|
-
/* eslint-disable import/no-default-export */
|
|
1
|
+
/* eslint-disable @eslint-community/eslint-comments/no-unlimited-disable */
|
|
2
|
+
/* eslint-disable */
|
|
16
3
|
/** @nocollapse */ var Module = function (moduleArg = {}) {
|
|
17
4
|
var moduleRtn
|
|
18
5
|
|
|
@@ -75,20 +75,34 @@ export function detect_similarity(
|
|
|
75
75
|
const idx_diff = ret & ((1 << 19) - 1)
|
|
76
76
|
|
|
77
77
|
let reason_str
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
78
|
+
switch (reason) {
|
|
79
|
+
case CombinedReason.combined_identical: {
|
|
80
|
+
S.combined_identical++
|
|
81
|
+
reason_str = '=='
|
|
82
|
+
|
|
83
|
+
break
|
|
84
|
+
}
|
|
85
|
+
case CombinedReason.combined_edit_distance: {
|
|
86
|
+
S.combined_edit_distance++
|
|
87
|
+
reason_str = `≤${dist}`
|
|
88
|
+
|
|
89
|
+
break
|
|
90
|
+
}
|
|
91
|
+
case CombinedReason.combined_cosine_distance: {
|
|
92
|
+
S.combined_cosine_distance++
|
|
93
|
+
reason_str = `${dist}%`
|
|
94
|
+
|
|
95
|
+
break
|
|
96
|
+
}
|
|
97
|
+
case CombinedReason.combined_pinyin_distance: {
|
|
98
|
+
S.combined_pinyin_distance++
|
|
99
|
+
reason_str = `P≤${dist}`
|
|
100
|
+
|
|
101
|
+
break
|
|
102
|
+
}
|
|
103
|
+
default: {
|
|
104
|
+
throw new Error(`similarity wasm returned unknown reason: ${ret}`)
|
|
105
|
+
}
|
|
92
106
|
}
|
|
93
107
|
|
|
94
108
|
return { reason: reason_str, idx_diff }
|
package/src/pakku.js/types.ts
CHANGED
|
@@ -46,6 +46,12 @@ export interface DanmuObjectRepresentative extends DanmuObject {
|
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
export interface DanmuObjectDeleted extends DanmuObject {
|
|
50
|
+
pakku: {
|
|
51
|
+
deleted_reason: string
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
49
55
|
export interface DanmuChunk<ObjectType extends DanmuObject> {
|
|
50
56
|
objs: ObjectType[]
|
|
51
57
|
|
|
@@ -79,6 +85,7 @@ export interface DanmuClusterPtr {
|
|
|
79
85
|
export interface DanmuClusterOutput {
|
|
80
86
|
clusters: DanmuClusterPtr[]
|
|
81
87
|
stats: Stats
|
|
88
|
+
deleted_chunk: DanmuObjectDeleted[]
|
|
82
89
|
}
|
|
83
90
|
|
|
84
91
|
export class Stats {
|
package/tsconfig.json
CHANGED
|
@@ -2,14 +2,14 @@
|
|
|
2
2
|
"compilerOptions": {
|
|
3
3
|
/* Visit https://aka.ms/tsconfig to read more about this file */
|
|
4
4
|
/* Projects */
|
|
5
|
-
|
|
5
|
+
"incremental": true /* Save .tsbuildinfo files to allow for incremental compilation of projects. */,
|
|
6
6
|
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
|
|
7
7
|
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
|
|
8
8
|
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
|
|
9
9
|
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
|
|
10
10
|
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
|
|
11
11
|
/* Language and Environment */
|
|
12
|
-
"target": "
|
|
12
|
+
"target": "ES2023" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
|
|
13
13
|
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
|
|
14
14
|
// "jsx": "preserve", /* Specify what JSX code is generated. */
|
|
15
15
|
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":"5.9.3"}
|
|
File without changes
|