@joliegg/moderation 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/actions.d.ts +28 -0
- package/dist/actions.js +48 -0
- package/dist/client.d.ts +19 -0
- package/dist/client.js +97 -0
- package/dist/index.d.ts +3 -41
- package/dist/index.js +20 -213
- package/dist/providers/aws.d.ts +11 -0
- package/dist/providers/aws.js +58 -0
- package/dist/providers/google.d.ts +21 -0
- package/dist/providers/google.js +61 -0
- package/dist/providers/webrisk.d.ts +9 -0
- package/dist/providers/webrisk.js +33 -0
- package/dist/raid/age.d.ts +6 -0
- package/dist/raid/age.js +19 -0
- package/dist/raid/detector.d.ts +56 -0
- package/dist/raid/detector.js +88 -0
- package/dist/raid/index.d.ts +2 -0
- package/dist/raid/index.js +18 -0
- package/dist/spam/cache.d.ts +99 -0
- package/dist/spam/cache.js +210 -0
- package/dist/spam/index.d.ts +1 -0
- package/dist/spam/index.js +17 -0
- package/dist/text/index.d.ts +2 -0
- package/dist/text/index.js +18 -0
- package/dist/text/mentions.d.ts +31 -0
- package/dist/text/mentions.js +55 -0
- package/dist/text/normalize.d.ts +15 -0
- package/dist/text/normalize.js +45 -0
- package/dist/types/config.d.ts +13 -0
- package/dist/types/config.js +2 -0
- package/dist/types/index.d.ts +3 -10
- package/dist/types/index.js +15 -0
- package/package.json +54 -13
- package/src/actions.ts +50 -0
- package/src/client.ts +121 -0
- package/src/index.ts +3 -277
- package/src/providers/aws.ts +58 -0
- package/src/providers/google.ts +63 -0
- package/src/providers/webrisk.ts +30 -0
- package/src/raid/age.ts +19 -0
- package/src/raid/detector.ts +122 -0
- package/src/raid/index.ts +2 -0
- package/src/spam/cache.ts +342 -0
- package/src/spam/index.ts +1 -0
- package/src/text/index.ts +2 -0
- package/src/text/mentions.ts +91 -0
- package/src/text/normalize.ts +43 -0
- package/src/types/config.ts +14 -0
- package/src/types/index.ts +5 -11
- /package/dist/{url-blacklist.json → data/url-blacklist.json} +0 -0
- /package/dist/{url-shorteners.json → data/url-shorteners.json} +0 -0
- /package/src/{url-blacklist.json → data/url-blacklist.json} +0 -0
- /package/src/{url-shorteners.json → data/url-shorteners.json} +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeText = normalizeText;
|
|
4
|
+
/**
|
|
5
|
+
* Invisible / zero-width Unicode code points that users sometimes insert
|
|
6
|
+
* between letters to bypass substring-based filters. NFKC normalization
|
|
7
|
+
* does NOT collapse these on its own, so we strip them explicitly before
|
|
8
|
+
* normalizing.
|
|
9
|
+
*
|
|
10
|
+
* - U+200B Zero-Width Space
|
|
11
|
+
* - U+200C Zero-Width Non-Joiner
|
|
12
|
+
* - U+200D Zero-Width Joiner
|
|
13
|
+
* - U+200E Left-to-Right Mark
|
|
14
|
+
* - U+200F Right-to-Left Mark
|
|
15
|
+
* - U+2060 Word Joiner
|
|
16
|
+
* - U+FEFF Zero-Width No-Break Space (BOM)
|
|
17
|
+
*/
|
|
18
|
+
// Matching individual zero-width code points by design (we are stripping
|
|
19
|
+
// them, not joining anything). ESLint's no-misleading-character-class
|
|
20
|
+
// flags this since \u200d is the Zero-Width Joiner; the warning does not
|
|
21
|
+
// apply here because every code point in the class is a literal target.
|
|
22
|
+
// eslint-disable-next-line no-misleading-character-class
|
|
23
|
+
const ZERO_WIDTH = /[\u200b\u200c\u200d\u200e\u200f\u2060\ufeff]/gu;
|
|
24
|
+
/**
|
|
25
|
+
* Canonicalizes user-submitted text for content matching:
|
|
26
|
+
*
|
|
27
|
+
* 1. trim surrounding whitespace
|
|
28
|
+
* 2. lowercase
|
|
29
|
+
* 3. strip zero-width / invisible characters
|
|
30
|
+
* 4. NFKC normalize (collapses bold, italic, fullwidth, circled,
|
|
31
|
+
* small-caps, and other compatibility variants to ASCII)
|
|
32
|
+
* 5. collapse internal whitespace runs to single spaces
|
|
33
|
+
*
|
|
34
|
+
* Useful for spam hashing, ban-list matching, and any other comparison
|
|
35
|
+
* where users should not be able to defeat a match by visually similar
|
|
36
|
+
* but technically distinct input.
|
|
37
|
+
*/
|
|
38
|
+
function normalizeText(input) {
|
|
39
|
+
return input
|
|
40
|
+
.trim()
|
|
41
|
+
.toLowerCase()
|
|
42
|
+
.replace(ZERO_WIDTH, '')
|
|
43
|
+
.normalize('NFKC')
|
|
44
|
+
.replace(/\s+/g, ' ');
|
|
45
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
|
|
2
|
+
export interface ModerationConfiguration {
|
|
3
|
+
aws?: RekognitionClientConfig;
|
|
4
|
+
google?: {
|
|
5
|
+
apiKey?: string;
|
|
6
|
+
keyFile?: string;
|
|
7
|
+
};
|
|
8
|
+
openai?: {
|
|
9
|
+
apiKey?: string;
|
|
10
|
+
};
|
|
11
|
+
banList?: string[];
|
|
12
|
+
urlBlackList?: string[];
|
|
13
|
+
}
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1,16 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
export
|
|
3
|
-
aws?: RekognitionClientConfig;
|
|
4
|
-
google?: {
|
|
5
|
-
apiKey?: string;
|
|
6
|
-
keyFile?: string;
|
|
7
|
-
};
|
|
8
|
-
banList?: string[];
|
|
9
|
-
urlBlackList?: string[];
|
|
10
|
-
}
|
|
1
|
+
export * from './config';
|
|
2
|
+
export type Severity = 'low' | 'medium' | 'high' | 'critical';
|
|
11
3
|
export interface ModerationCategory {
|
|
12
4
|
category: string;
|
|
13
5
|
confidence: number;
|
|
6
|
+
severity?: Severity;
|
|
14
7
|
}
|
|
15
8
|
export interface ModerationResult {
|
|
16
9
|
source: string;
|
package/dist/types/index.js
CHANGED
|
@@ -1,2 +1,17 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
2
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./config"), exports);
|
package/package.json
CHANGED
|
@@ -1,15 +1,56 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@joliegg/moderation",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "A set of tools for chat moderation",
|
|
5
5
|
"author": "Diana Islas Ocampo",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
7
8
|
"license": "MIT",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"types": "./dist/index.d.ts",
|
|
12
|
+
"default": "./dist/index.js"
|
|
13
|
+
},
|
|
14
|
+
"./types": {
|
|
15
|
+
"types": "./dist/types/index.d.ts",
|
|
16
|
+
"default": "./dist/types/index.js"
|
|
17
|
+
},
|
|
18
|
+
"./actions": {
|
|
19
|
+
"types": "./dist/actions.d.ts",
|
|
20
|
+
"default": "./dist/actions.js"
|
|
21
|
+
},
|
|
22
|
+
"./text": {
|
|
23
|
+
"types": "./dist/text/index.d.ts",
|
|
24
|
+
"default": "./dist/text/index.js"
|
|
25
|
+
},
|
|
26
|
+
"./spam": {
|
|
27
|
+
"types": "./dist/spam/index.d.ts",
|
|
28
|
+
"default": "./dist/spam/index.js"
|
|
29
|
+
},
|
|
30
|
+
"./raid": {
|
|
31
|
+
"types": "./dist/raid/index.d.ts",
|
|
32
|
+
"default": "./dist/raid/index.js"
|
|
33
|
+
},
|
|
34
|
+
"./providers/google": {
|
|
35
|
+
"types": "./dist/providers/google.d.ts",
|
|
36
|
+
"default": "./dist/providers/google.js"
|
|
37
|
+
},
|
|
38
|
+
"./providers/aws": {
|
|
39
|
+
"types": "./dist/providers/aws.d.ts",
|
|
40
|
+
"default": "./dist/providers/aws.js"
|
|
41
|
+
},
|
|
42
|
+
"./providers/webrisk": {
|
|
43
|
+
"types": "./dist/providers/webrisk.d.ts",
|
|
44
|
+
"default": "./dist/providers/webrisk.js"
|
|
45
|
+
},
|
|
46
|
+
"./package.json": "./package.json"
|
|
47
|
+
},
|
|
8
48
|
"scripts": {
|
|
9
|
-
"
|
|
49
|
+
"prepublishOnly": "bun run build && bun run docs",
|
|
10
50
|
"build": "bun eslint . && rm -rf ./dist && bun tsc --declaration",
|
|
11
51
|
"docs": "typedoc",
|
|
12
|
-
"test": "
|
|
52
|
+
"test": "bun test",
|
|
53
|
+
"test:integration": "bun run test/integration.ts"
|
|
13
54
|
},
|
|
14
55
|
"engines": {
|
|
15
56
|
"node": ">=20.x"
|
|
@@ -17,19 +58,19 @@
|
|
|
17
58
|
"devDependencies": {
|
|
18
59
|
"@babel/eslint-parser": "^7.28.6",
|
|
19
60
|
"@eslint/js": "^10.0.1",
|
|
20
|
-
"@typescript-eslint/eslint-plugin": "^8.
|
|
21
|
-
"@typescript-eslint/parser": "^8.
|
|
22
|
-
"dotenv": "^17.
|
|
23
|
-
"eslint": "^10.0
|
|
24
|
-
"typedoc": "^0.28.
|
|
61
|
+
"@typescript-eslint/eslint-plugin": "^8.58.2",
|
|
62
|
+
"@typescript-eslint/parser": "^8.58.2",
|
|
63
|
+
"dotenv": "^17.4.2",
|
|
64
|
+
"eslint": "^10.2.0",
|
|
65
|
+
"typedoc": "^0.28.19",
|
|
25
66
|
"typescript": "^5.9.3",
|
|
26
|
-
"typescript-eslint": "^8.
|
|
67
|
+
"typescript-eslint": "^8.58.2"
|
|
27
68
|
},
|
|
28
69
|
"dependencies": {
|
|
29
|
-
"@aws-sdk/client-rekognition": "^3.
|
|
70
|
+
"@aws-sdk/client-rekognition": "^3.1031.0",
|
|
30
71
|
"@google-cloud/language": "^7.2.1",
|
|
31
|
-
"@google-cloud/speech": "^7.
|
|
32
|
-
"axios": "^1.
|
|
72
|
+
"@google-cloud/speech": "^7.3.0",
|
|
73
|
+
"axios": "^1.15.0",
|
|
33
74
|
"sharp": "^0.34.5"
|
|
34
75
|
},
|
|
35
76
|
"files": [
|
|
@@ -40,4 +81,4 @@
|
|
|
40
81
|
"dist/*"
|
|
41
82
|
],
|
|
42
83
|
"packageManager": "bun@1.3.4"
|
|
43
|
-
}
|
|
84
|
+
}
|
package/src/actions.ts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { Severity } from './types';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Moderation actions taxonomy. These should be stable so they can be persisted
|
|
5
|
+
* as a canonical list of actions.
|
|
6
|
+
*/
|
|
7
|
+
export const ACTION_TYPES = {
|
|
8
|
+
TIMEOUT: 'timeout',
|
|
9
|
+
BAN: 'ban',
|
|
10
|
+
KICK: 'kick',
|
|
11
|
+
WARN: 'warn',
|
|
12
|
+
DELETE: 'delete',
|
|
13
|
+
RESTRICT: 'restrict',
|
|
14
|
+
APPEAL_APPROVE: 'appeal_approve',
|
|
15
|
+
APPEAL_DENY: 'appeal_deny',
|
|
16
|
+
MENTION_SPAM: 'mention_spam',
|
|
17
|
+
NEW_USER_RESTRICT: 'new_user_restrict',
|
|
18
|
+
SPAM_DETECTED: 'spam_detected',
|
|
19
|
+
ESCALATION: 'escalation',
|
|
20
|
+
RAID_DETECTED: 'raid_detected',
|
|
21
|
+
RAID_TIMEOUT: 'raid_timeout',
|
|
22
|
+
RAID_JOIN: 'raid_join',
|
|
23
|
+
PERMISSION_BLOCK: 'permission_block',
|
|
24
|
+
UNTIMEOUT: 'untimeout',
|
|
25
|
+
UNBAN: 'unban',
|
|
26
|
+
} as const;
|
|
27
|
+
|
|
28
|
+
export type ActionType = typeof ACTION_TYPES[keyof typeof ACTION_TYPES];
|
|
29
|
+
|
|
30
|
+
/** Default severity per action type. */
|
|
31
|
+
export const SEVERITY_BY_ACTION: Record<ActionType, Severity> = {
|
|
32
|
+
timeout: 'medium',
|
|
33
|
+
ban: 'critical',
|
|
34
|
+
kick: 'high',
|
|
35
|
+
warn: 'low',
|
|
36
|
+
delete: 'low',
|
|
37
|
+
restrict: 'medium',
|
|
38
|
+
appeal_approve: 'low',
|
|
39
|
+
appeal_deny: 'low',
|
|
40
|
+
mention_spam: 'medium',
|
|
41
|
+
new_user_restrict: 'low',
|
|
42
|
+
spam_detected: 'medium',
|
|
43
|
+
escalation: 'high',
|
|
44
|
+
raid_detected: 'critical',
|
|
45
|
+
raid_timeout: 'medium',
|
|
46
|
+
raid_join: 'low',
|
|
47
|
+
permission_block: 'low',
|
|
48
|
+
untimeout: 'low',
|
|
49
|
+
unban: 'low',
|
|
50
|
+
};
|
package/src/client.ts
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { Rekognition } from '@aws-sdk/client-rekognition';
|
|
2
|
+
import { LanguageServiceClient } from '@google-cloud/language';
|
|
3
|
+
import { SpeechClient } from '@google-cloud/speech';
|
|
4
|
+
|
|
5
|
+
import { GoogleLanguageProvider, GoogleSpeechProvider, fetchAudio } from './providers/google';
|
|
6
|
+
import { RekognitionProvider } from './providers/aws';
|
|
7
|
+
import { WebRiskProvider } from './providers/webrisk';
|
|
8
|
+
import URLBlackList from './data/url-blacklist.json';
|
|
9
|
+
import URLShortenerList from './data/url-shorteners.json';
|
|
10
|
+
|
|
11
|
+
import type { ModerationCategory, ModerationConfiguration, ModerationResult } from './types';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Composes text / image / link / audio moderation across the configured
|
|
15
|
+
* providers.
|
|
16
|
+
*/
|
|
17
|
+
export class ModerationClient {
|
|
18
|
+
private googleLanguage?: GoogleLanguageProvider;
|
|
19
|
+
private googleSpeech?: GoogleSpeechProvider;
|
|
20
|
+
private aws?: RekognitionProvider;
|
|
21
|
+
private webRisk?: WebRiskProvider;
|
|
22
|
+
private banList: string[] = [];
|
|
23
|
+
private urlBlackList: string[] = [];
|
|
24
|
+
|
|
25
|
+
constructor(configuration: ModerationConfiguration) {
|
|
26
|
+
if (configuration.aws) {
|
|
27
|
+
this.aws = new RekognitionProvider(new Rekognition(configuration.aws));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
if (typeof configuration.google?.keyFile === 'string') {
|
|
31
|
+
this.googleLanguage = new GoogleLanguageProvider(
|
|
32
|
+
new LanguageServiceClient({ keyFile: configuration.google.keyFile })
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
this.googleSpeech = new GoogleSpeechProvider(
|
|
36
|
+
new SpeechClient({ keyFile: configuration.google.keyFile })
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (typeof configuration.google?.apiKey === 'string') {
|
|
41
|
+
this.webRisk = new WebRiskProvider(configuration.google.apiKey);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (Array.isArray(configuration.banList)) {
|
|
45
|
+
this.banList = configuration.banList;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (Array.isArray(configuration.urlBlackList)) {
|
|
49
|
+
this.urlBlackList = configuration.urlBlackList;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
async moderateText(text: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
54
|
+
const categories: ModerationCategory[] = [];
|
|
55
|
+
const normalized = text.toLowerCase();
|
|
56
|
+
const matches = this.banList.filter(w => normalized.includes(w));
|
|
57
|
+
|
|
58
|
+
if (matches.length > 0) {
|
|
59
|
+
const words = normalized.split(' ');
|
|
60
|
+
categories.push({ category: 'BAN_LIST', confidence: (matches.length / words.length) * 100 });
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (!this.googleLanguage) {
|
|
64
|
+
return { source: text, moderation: categories };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const googleCategories = await this.googleLanguage.moderateText(text, minimumConfidence);
|
|
68
|
+
|
|
69
|
+
return { source: text, moderation: [...categories, ...googleCategories] };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async moderateImage(url: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
73
|
+
if (!this.aws) {
|
|
74
|
+
return { source: url, moderation: [] };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const moderation = await this.aws.moderateImage(url, minimumConfidence);
|
|
78
|
+
|
|
79
|
+
return { source: url, moderation };
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async moderateLink(url: string, allowShorteners = false): Promise<ModerationResult> {
|
|
83
|
+
try {
|
|
84
|
+
const domain = new URL(url).hostname;
|
|
85
|
+
|
|
86
|
+
if (this.urlBlackList.some(u => url.includes(u))) {
|
|
87
|
+
return { source: url, moderation: [{ category: 'CUSTOM_BLACK_LIST', confidence: 100 }] };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (URLBlackList.some(u => u === domain)) {
|
|
91
|
+
return { source: url, moderation: [{ category: 'BLACK_LIST', confidence: 100 }] };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (!allowShorteners && URLShortenerList.some(u => u === domain)) {
|
|
95
|
+
return { source: url, moderation: [{ category: 'URL_SHORTENER', confidence: 100 }] };
|
|
96
|
+
}
|
|
97
|
+
} catch {
|
|
98
|
+
return { source: url, moderation: [] };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (!this.webRisk) {
|
|
102
|
+
return { source: url, moderation: [] };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const moderation = await this.webRisk.checkLink(url);
|
|
106
|
+
return { source: url, moderation };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async moderateAudio(url: string, language: string = 'en-US', minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
110
|
+
if (!this.googleSpeech) {
|
|
111
|
+
return { source: url, moderation: [] };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const buffer = await fetchAudio(url);
|
|
115
|
+
const transcription = await this.googleSpeech.transcribe(buffer, language);
|
|
116
|
+
|
|
117
|
+
return this.moderateText(transcription, minimumConfidence);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export default ModerationClient;
|
package/src/index.ts
CHANGED
|
@@ -1,277 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
import { LanguageServiceClient } from '@google-cloud/language';
|
|
5
|
-
import { SpeechClient, protos } from '@google-cloud/speech';
|
|
6
|
-
|
|
7
|
-
import sharp from 'sharp';
|
|
8
|
-
|
|
9
|
-
import URLBlackList from './url-blacklist.json';
|
|
10
|
-
import URLShortenerList from './url-shorteners.json';
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
import { ModerationCategory, ModerationConfiguration, ModerationResult, ThreatsResponse } from './types';
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
type IRecognitionConfig = protos.google.cloud.speech.v1.IRecognitionConfig;
|
|
17
|
-
|
|
18
|
-
type ISpeechRecognitionResult = protos.google.cloud.speech.v1.ISpeechRecognitionResult;
|
|
19
|
-
|
|
20
|
-
const MAX_IMAGE_SIZE = 5 * 1024 * 1024; // 5MB in bytes for Rekognition limit
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Moderation Client
|
|
24
|
-
*
|
|
25
|
-
* @class ModerationClient
|
|
26
|
-
*/
|
|
27
|
-
class ModerationClient {
|
|
28
|
-
|
|
29
|
-
private rekognitionClient?: Rekognition;
|
|
30
|
-
private googleLanguageClient?: LanguageServiceClient;
|
|
31
|
-
private googleSpeechClient?: SpeechClient;
|
|
32
|
-
private googleAPIKey?: string;
|
|
33
|
-
private banList?: string[] = [];
|
|
34
|
-
private urlBlackList?: string[] = [];
|
|
35
|
-
|
|
36
|
-
/**
|
|
37
|
-
*
|
|
38
|
-
* @param {ModerationConfiguration} configuration
|
|
39
|
-
*/
|
|
40
|
-
constructor (configuration: ModerationConfiguration) {
|
|
41
|
-
if (configuration.aws) {
|
|
42
|
-
this.rekognitionClient = new Rekognition(configuration.aws);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
if (typeof configuration.google?.keyFile === 'string') {
|
|
46
|
-
this.googleLanguageClient = new LanguageServiceClient({ keyFile: configuration.google.keyFile });
|
|
47
|
-
this.googleSpeechClient = new SpeechClient({ keyFile: configuration.google.keyFile });
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
if (typeof configuration.google?.apiKey === 'string') {
|
|
51
|
-
this.googleAPIKey = configuration.google.apiKey;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
if (Array.isArray(configuration.banList)) {
|
|
55
|
-
this.banList = configuration.banList;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
if (Array.isArray(configuration.urlBlackList)) {
|
|
59
|
-
this.urlBlackList = configuration.urlBlackList;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* Returns a list of moderation categories detected on a text
|
|
65
|
-
*
|
|
66
|
-
* @param {string} text The text to moderate
|
|
67
|
-
* @param {number} [minimumConfidence = 50] The minimum confidence required for a category to be considered
|
|
68
|
-
*
|
|
69
|
-
* @returns {Promise<ModerationResult>} The list of results that were detected with the minimum confidence specified
|
|
70
|
-
*/
|
|
71
|
-
async moderateText (text: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
72
|
-
const categories: ModerationCategory[] = [];
|
|
73
|
-
|
|
74
|
-
if (Array.isArray(this.banList)) {
|
|
75
|
-
const normalizedText = text.toLowerCase();
|
|
76
|
-
const matches = this.banList.filter(w => normalizedText.indexOf(w) > -1);
|
|
77
|
-
|
|
78
|
-
if (matches.length > 0) {
|
|
79
|
-
const words = normalizedText.split(' ');
|
|
80
|
-
|
|
81
|
-
categories.push({
|
|
82
|
-
category: 'BAN_LIST',
|
|
83
|
-
confidence: (matches.length / words.length) * 100,
|
|
84
|
-
});
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
if (typeof this.googleLanguageClient === 'undefined') {
|
|
90
|
-
return { source: text, moderation: categories };
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
const [ result ] = await this.googleLanguageClient.moderateText({
|
|
94
|
-
document: {
|
|
95
|
-
content: text,
|
|
96
|
-
type: 'PLAIN_TEXT',
|
|
97
|
-
},
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
if (result && 'moderationCategories' in result) {
|
|
101
|
-
if (Array.isArray(result.moderationCategories)) {
|
|
102
|
-
const results = result.moderationCategories.map(c => ({
|
|
103
|
-
category: c.name ?? 'Unknown',
|
|
104
|
-
confidence: (c.confidence ?? 0) * 100,
|
|
105
|
-
})).filter(c => c.confidence >= minimumConfidence);
|
|
106
|
-
return { source: text, moderation: [...categories, ...results] };
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
return { source: text, moderation: [] };
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* Returns a list of moderation categories detected on an image
|
|
115
|
-
*
|
|
116
|
-
* @param {string} url
|
|
117
|
-
* @param {number} [minimumConfidence = 50] The minimum confidence required for a category to be considered
|
|
118
|
-
*
|
|
119
|
-
*
|
|
120
|
-
* @returns {Promise<ModerationResult[]>} The list of results that were detected with the minimum confidence specified
|
|
121
|
-
*/
|
|
122
|
-
async moderateImage (url: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
123
|
-
if (typeof this.rekognitionClient === 'undefined') {
|
|
124
|
-
return { source: url, moderation: [] };
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
const { data } = await axios.get<string>(url, { responseType: 'arraybuffer' });
|
|
128
|
-
|
|
129
|
-
let buffer: Buffer;
|
|
130
|
-
|
|
131
|
-
// GIFs will be split into frames
|
|
132
|
-
if (url.toLowerCase().indexOf('.gif') > -1) {
|
|
133
|
-
buffer = await sharp(data, { pages: -1 }).toFormat('png').toBuffer();
|
|
134
|
-
} else if (url.toLowerCase().indexOf('.webp') > -1) {
|
|
135
|
-
buffer = await sharp(data).toFormat('png').toBuffer();
|
|
136
|
-
} else {
|
|
137
|
-
// Download image as binary data
|
|
138
|
-
buffer = Buffer.from(data, 'binary');
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// Ensure image is not larger than 5MB (Rekognition limit)
|
|
142
|
-
if (buffer.length > MAX_IMAGE_SIZE) {
|
|
143
|
-
try {
|
|
144
|
-
// Calculate new dimensions to reduce size
|
|
145
|
-
const metadata = await sharp(buffer).metadata();
|
|
146
|
-
|
|
147
|
-
const { width, height } = metadata;
|
|
148
|
-
|
|
149
|
-
if (typeof width !== 'number' || typeof height !== 'number') {
|
|
150
|
-
throw new Error('Invalid image metadata');
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Calculate the scaling factor
|
|
154
|
-
const scalingFactor = Math.sqrt(MAX_IMAGE_SIZE / buffer.length);
|
|
155
|
-
|
|
156
|
-
// Calculate new dimensions
|
|
157
|
-
const newWidth = Math.floor(width * scalingFactor);
|
|
158
|
-
const newHeight = Math.floor(height * scalingFactor);
|
|
159
|
-
|
|
160
|
-
const resizedBuffer = await sharp(buffer)
|
|
161
|
-
.resize(Math.round(newWidth), Math.round(newHeight))
|
|
162
|
-
.toBuffer();
|
|
163
|
-
|
|
164
|
-
buffer = resizedBuffer;
|
|
165
|
-
} catch {
|
|
166
|
-
// We can't resize the image. We'll skip the resize and try to process it as is
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
const { ModerationLabels } = await this.rekognitionClient.detectModerationLabels({
|
|
171
|
-
Image: {
|
|
172
|
-
Bytes: buffer
|
|
173
|
-
},
|
|
174
|
-
MinConfidence: minimumConfidence
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
if (Array.isArray(ModerationLabels)) {
|
|
178
|
-
const moderation = ModerationLabels.map(l => ({
|
|
179
|
-
category: l.Name ?? 'Unknown',
|
|
180
|
-
confidence: l.Confidence ?? 0,
|
|
181
|
-
}));
|
|
182
|
-
|
|
183
|
-
return { source: url, moderation };
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
return { source: url, moderation: [] };
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
async moderateLink (url: string, allowShorteners = false): Promise<ModerationResult> {
|
|
190
|
-
try {
|
|
191
|
-
const domain = new URL(url).hostname;
|
|
192
|
-
|
|
193
|
-
const blacklisted = this.urlBlackList?.some(u => u.indexOf(url) > -1);
|
|
194
|
-
|
|
195
|
-
if (blacklisted) {
|
|
196
|
-
return { source: url, moderation: [{ category: 'CUSTOM_BLACK_LIST', confidence: 100 }] };
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
const globallyBlacklisted = URLBlackList.some(u => u === domain);
|
|
200
|
-
|
|
201
|
-
if (globallyBlacklisted) {
|
|
202
|
-
return { source: url, moderation: [{ category: 'BLACK_LIST', confidence: 100 }] };
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
if (!allowShorteners) {
|
|
206
|
-
const isShortened = URLShortenerList.some(u => u === domain);
|
|
207
|
-
|
|
208
|
-
if (isShortened) {
|
|
209
|
-
return { source: url, moderation: [{ category: 'URL_SHORTENER', confidence: 100 }] };
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
} catch {
|
|
213
|
-
// Invalid URL
|
|
214
|
-
return { source: url, moderation: [] };
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
if (typeof this.googleAPIKey !== 'string') {
|
|
218
|
-
return { source: url, moderation: [] };
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
const types = [
|
|
222
|
-
'MALWARE',
|
|
223
|
-
'SOCIAL_ENGINEERING',
|
|
224
|
-
'UNWANTED_SOFTWARE',
|
|
225
|
-
'SOCIAL_ENGINEERING_EXTENDED_COVERAGE'
|
|
226
|
-
];
|
|
227
|
-
|
|
228
|
-
const threatTypes = types.join('&threatTypes=');
|
|
229
|
-
const requestUrl = `https://webrisk.googleapis.com/v1/uris:search?threatTypes=${threatTypes}&key=${this.googleAPIKey}`;
|
|
230
|
-
|
|
231
|
-
const { data } = await axios.get<ThreatsResponse>(`${requestUrl}&uri=${encodeURIComponent(url)}`);
|
|
232
|
-
|
|
233
|
-
const threats = data?.threat?.threatTypes;
|
|
234
|
-
|
|
235
|
-
if (Array.isArray(threats)) {
|
|
236
|
-
const moderation = threats.map(t => ({
|
|
237
|
-
category: t,
|
|
238
|
-
confidence: 100,
|
|
239
|
-
}));
|
|
240
|
-
|
|
241
|
-
return { source: url, moderation };
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
return { source: url, moderation: [] };
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
async moderateAudio (url: string, language: string = 'en-US', minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
248
|
-
if (typeof this.googleSpeechClient === 'undefined') {
|
|
249
|
-
return { source: url, moderation: [] };
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
const { data } = await axios.get<string>(url, { responseType: 'arraybuffer' });
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
const options: IRecognitionConfig = {
|
|
256
|
-
encoding: 'OGG_OPUS',
|
|
257
|
-
sampleRateHertz: 48000,
|
|
258
|
-
languageCode: language,
|
|
259
|
-
};
|
|
260
|
-
|
|
261
|
-
const [ response ] = await this.googleSpeechClient.recognize ({
|
|
262
|
-
audio: { content: Buffer.from(data, 'binary').toString('base64') },
|
|
263
|
-
config: options,
|
|
264
|
-
});
|
|
265
|
-
|
|
266
|
-
if (!Array.isArray(response?.results)) {
|
|
267
|
-
return { source: url, moderation: [] };
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const transcription = response?.results?.map((result: ISpeechRecognitionResult) => result.alternatives?.at(0)?.transcript ?? '').join(' ');
|
|
271
|
-
|
|
272
|
-
return this.moderateText(transcription, minimumConfidence);
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
export default ModerationClient;
|
|
1
|
+
export { ModerationClient, default } from './client';
|
|
2
|
+
export * from './types';
|
|
3
|
+
export * from './actions';
|