@joliegg/moderation 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +118 -0
- package/README.md +67 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +171 -0
- package/dist/types/index.d.ts +27 -0
- package/dist/types/index.js +2 -0
- package/dist/url-blacklist.json +31735 -0
- package/package.json +43 -0
- package/src/index.ts +218 -0
- package/src/types/index.ts +31 -0
- package/src/url-blacklist.json +31735 -0
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@joliegg/moderation",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "A set of tools for chat moderation",
|
|
5
|
+
"author": "Diana Islas Ocampo",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"license": "HL3-CL",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"publish": "yarn build && yarn docs",
|
|
10
|
+
"build": "yarn eslint . && rm -rf ./dist && yarn tsc --declaration",
|
|
11
|
+
"docs": "typedoc",
|
|
12
|
+
"test": "node test/index.js"
|
|
13
|
+
},
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": "20.x"
|
|
16
|
+
},
|
|
17
|
+
"devDependencies": {
|
|
18
|
+
"@babel/eslint-parser": "^7.24.8",
|
|
19
|
+
"@eslint/js": "^9.7.0",
|
|
20
|
+
"@typescript-eslint/eslint-plugin": "^7.16.0",
|
|
21
|
+
"@typescript-eslint/parser": "^7.16.0",
|
|
22
|
+
"dotenv": "^16.4.5",
|
|
23
|
+
"eslint": "^9.7.0",
|
|
24
|
+
"typedoc": "^0.26.4",
|
|
25
|
+
"typescript": "^5.5.3",
|
|
26
|
+
"typescript-eslint": "^7.16.0"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@aws-sdk/client-rekognition": "^3.614.0",
|
|
30
|
+
"@google-cloud/language": "^6.4.0",
|
|
31
|
+
"@google-cloud/speech": "^6.6.1",
|
|
32
|
+
"axios": "^1.7.2",
|
|
33
|
+
"sharp": "^0.32.6"
|
|
34
|
+
},
|
|
35
|
+
"files": [
|
|
36
|
+
"README.md",
|
|
37
|
+
"LICENSE.md",
|
|
38
|
+
"package.json",
|
|
39
|
+
"src/*",
|
|
40
|
+
"dist/*"
|
|
41
|
+
],
|
|
42
|
+
"packageManager": "yarn@4.3.1"
|
|
43
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
|
|
3
|
+
import { Rekognition } from '@aws-sdk/client-rekognition';
|
|
4
|
+
import { LanguageServiceClient } from '@google-cloud/language';
|
|
5
|
+
import { SpeechClient, protos } from '@google-cloud/speech';
|
|
6
|
+
|
|
7
|
+
import sharp from 'sharp';
|
|
8
|
+
|
|
9
|
+
import URLBlackList from './url-blacklist.json';
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
import { ModerationCategory, ModerationConfiguration, ModerationResult, ThreatsResponse } from './types';
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
type IRecognitionConfig = protos.google.cloud.speech.v1.IRecognitionConfig;
|
|
16
|
+
|
|
17
|
+
type ISpeechRecognitionResult = protos.google.cloud.speech.v1.ISpeechRecognitionResult;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Moderation Client
|
|
21
|
+
*
|
|
22
|
+
* @name ModerationClient
|
|
23
|
+
*/
|
|
24
|
+
class ModerationClient {
|
|
25
|
+
|
|
26
|
+
private rekognitionClient?: Rekognition;
|
|
27
|
+
private googleLanguageClient?: LanguageServiceClient;
|
|
28
|
+
private googleSpeechClient?: SpeechClient;
|
|
29
|
+
private googleAPIKey?: string;
|
|
30
|
+
private banList?: string[];
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
*
|
|
34
|
+
* @param {ModerationConfiguration} configuration
|
|
35
|
+
*/
|
|
36
|
+
constructor (configuration: ModerationConfiguration) {
|
|
37
|
+
if (configuration.aws) {
|
|
38
|
+
this.rekognitionClient = new Rekognition(configuration.aws);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (typeof configuration.google?.keyFile === 'string') {
|
|
42
|
+
this.googleLanguageClient = new LanguageServiceClient({ keyFile: configuration.google.keyFile });
|
|
43
|
+
this.googleSpeechClient = new SpeechClient({ keyFile: configuration.google.keyFile });
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (typeof configuration.google?.apiKey === 'string') {
|
|
47
|
+
this.googleAPIKey = configuration.google.apiKey;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (Array.isArray(configuration.banList)) {
|
|
51
|
+
this.banList = configuration.banList;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Returns a list of moderation categories detected on a text
|
|
57
|
+
*
|
|
58
|
+
* @param {string} text The text to moderate
|
|
59
|
+
* @param {number} [minimumConfidence = 50] The minimum confidence required for a category to be considered
|
|
60
|
+
*
|
|
61
|
+
* @returns {Promise<ModerationResult>} The list of results that were detected with the minimum confidence specified
|
|
62
|
+
*/
|
|
63
|
+
async moderateText (text: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
64
|
+
const categories: ModerationCategory[] = [];
|
|
65
|
+
|
|
66
|
+
if (Array.isArray(this.banList)) {
|
|
67
|
+
const normalizedText = text.toLowerCase();
|
|
68
|
+
const matches = this.banList.filter(w => normalizedText.indexOf(w) > -1);
|
|
69
|
+
|
|
70
|
+
if (matches.length > 0) {
|
|
71
|
+
categories.push({
|
|
72
|
+
category: 'Ban List',
|
|
73
|
+
confidence: matches.length,
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if (typeof this.googleLanguageClient === 'undefined') {
|
|
80
|
+
return { source: text, moderation: categories };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const [ result ] = await this.googleLanguageClient.moderateText({
|
|
84
|
+
document: {
|
|
85
|
+
content: text,
|
|
86
|
+
type: 'PLAIN_TEXT',
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
if (result && 'moderationCategories' in result) {
|
|
91
|
+
if (Array.isArray(result.moderationCategories)) {
|
|
92
|
+
const results = result.moderationCategories.map(c => ({
|
|
93
|
+
category: c.name ?? 'Unknown',
|
|
94
|
+
confidence: (c.confidence ?? 0) * 100,
|
|
95
|
+
})).filter(c => c.confidence >= minimumConfidence);
|
|
96
|
+
return { source: text, moderation: [...categories, ...results] };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return { source: text, moderation: [] };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Returns a list of moderation categories detected on an image
|
|
105
|
+
*
|
|
106
|
+
* @param {string} url
|
|
107
|
+
* @param {number} [minimumConfidence = 95] The minimum confidence required for a category to be considered
|
|
108
|
+
*
|
|
109
|
+
*
|
|
110
|
+
* @returns {Promise<ModerationResult[]>} The list of results that were detected with the minimum confidence specified
|
|
111
|
+
*/
|
|
112
|
+
async moderateImage (url: string, minimumConfidence: number = 95): Promise<ModerationResult> {
|
|
113
|
+
if (typeof this.rekognitionClient === 'undefined') {
|
|
114
|
+
return { source: url, moderation: [] };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const { data } = await axios.get<string>(url, { responseType: 'arraybuffer' });
|
|
118
|
+
|
|
119
|
+
let buffer: Buffer | null = null;
|
|
120
|
+
|
|
121
|
+
// GIFs will be split into frames
|
|
122
|
+
if (url.toLowerCase().indexOf('.gif') > -1) {
|
|
123
|
+
buffer = await sharp(data, { pages: -1 }).toFormat('png').toBuffer();
|
|
124
|
+
} else if (url.toLowerCase().indexOf('.webp') > -1) {
|
|
125
|
+
buffer = await sharp(data).toFormat('png').toBuffer();
|
|
126
|
+
} else {
|
|
127
|
+
// Download image as binary data
|
|
128
|
+
buffer = Buffer.from(data, 'binary');
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const { ModerationLabels } = await this.rekognitionClient.detectModerationLabels({
|
|
132
|
+
Image: {
|
|
133
|
+
Bytes: buffer
|
|
134
|
+
},
|
|
135
|
+
MinConfidence: minimumConfidence
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
if (Array.isArray(ModerationLabels)) {
|
|
139
|
+
const moderation = ModerationLabels.map(l => ({
|
|
140
|
+
category: l.Name ?? 'Unknown',
|
|
141
|
+
confidence: l.Confidence ?? 0,
|
|
142
|
+
}));
|
|
143
|
+
|
|
144
|
+
return { source: url, moderation };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return { source: url, moderation: [] };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
async moderateLink (url: string): Promise<ModerationResult> {
|
|
151
|
+
const blacklisted = URLBlackList.some(b => url.indexOf(b) > -1);
|
|
152
|
+
|
|
153
|
+
if (blacklisted) {
|
|
154
|
+
return { source: url, moderation: [{ category: 'BLACK_LIST', confidence: 100 }] };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
if (typeof this.googleAPIKey !== 'string') {
|
|
159
|
+
return { source: url, moderation: [] };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const types = [
|
|
163
|
+
'MALWARE',
|
|
164
|
+
'SOCIAL_ENGINEERING',
|
|
165
|
+
'UNWANTED_SOFTWARE',
|
|
166
|
+
'SOCIAL_ENGINEERING_EXTENDED_COVERAGE'
|
|
167
|
+
];
|
|
168
|
+
|
|
169
|
+
const threatTypes = types.join('&threatTypes=');
|
|
170
|
+
const requestUrl = `https://webrisk.googleapis.com/v1/uris:search?threatTypes=${threatTypes}&key=${this.googleAPIKey}`;
|
|
171
|
+
|
|
172
|
+
const { data } = await axios.get<ThreatsResponse>(`${requestUrl}&uri=${encodeURIComponent(url)}`);
|
|
173
|
+
|
|
174
|
+
const threats = data?.threat?.threatTypes;
|
|
175
|
+
|
|
176
|
+
if (Array.isArray(threats)) {
|
|
177
|
+
const moderation = threats.map(t => ({
|
|
178
|
+
category: t,
|
|
179
|
+
confidence: 100,
|
|
180
|
+
}));
|
|
181
|
+
|
|
182
|
+
return { source: url, moderation };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return { source: url, moderation: [] };
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async moderateAudio (url: string, minimumConfidence: number = 50): Promise<ModerationResult> {
|
|
189
|
+
if (typeof this.googleSpeechClient === 'undefined') {
|
|
190
|
+
return { source: url, moderation: [] };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const { data } = await axios.get<string>(url, { responseType: 'arraybuffer' });
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
const options: IRecognitionConfig = {
|
|
197
|
+
encoding: 'OGG_OPUS',
|
|
198
|
+
sampleRateHertz: 48000,
|
|
199
|
+
languageCode: 'es-US'
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
const [ response ] = await this.googleSpeechClient.recognize ({
|
|
203
|
+
audio: { content: Buffer.from(data, 'binary').toString('base64') },
|
|
204
|
+
config: options,
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
if (!Array.isArray(response?.results)) {
|
|
208
|
+
return { source: url, moderation: [] };
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
const transcription = response?.results?.map((result: ISpeechRecognitionResult) => result.alternatives?.at(0)?.transcript ?? '').join(' ');
|
|
212
|
+
|
|
213
|
+
return this.moderateText(transcription, minimumConfidence);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export default ModerationClient;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { RekognitionClientConfig } from '@aws-sdk/client-rekognition';
|
|
2
|
+
|
|
3
|
+
export interface ModerationConfiguration {
|
|
4
|
+
aws?: RekognitionClientConfig;
|
|
5
|
+
google?: {
|
|
6
|
+
apiKey?: string;
|
|
7
|
+
keyFile?: string;
|
|
8
|
+
};
|
|
9
|
+
banList?: string[];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface ModerationCategory {
|
|
13
|
+
category: string;
|
|
14
|
+
confidence: number;
|
|
15
|
+
}
|
|
16
|
+
export interface ModerationResult {
|
|
17
|
+
source: string;
|
|
18
|
+
moderation: ModerationCategory[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface BannedWord {
|
|
22
|
+
word: string;
|
|
23
|
+
category: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface ThreatsResponse {
|
|
27
|
+
threat?: {
|
|
28
|
+
threatTypes: string[];
|
|
29
|
+
expireTime: string;
|
|
30
|
+
};
|
|
31
|
+
}
|